#' @title Relative Longitudinal Difference (RLD)
#'
#' @description
#' This function identifies influential subjects in longitudinal data based on their relative change
#' in response over time. It helps in detecting subjects whose response values exhibit significant
#' fluctuations beyond a specified threshold (k standard deviations).
#'
#' @details
#' The function follows these steps:
#' \itemize{
#'   \item Computes the relative change in response values over time for each subject.
#'   \item Calculates the threshold based on k standard deviations from the mean relative change.
#'   \item Identifies subjects whose relative change exceeds the threshold.
#'   \item Separates data into influential and non-influential subjects.
#'   \item Generates visualizations to highlight influential subjects.
#' }
#'
#' This method is particularly useful for detecting subjects with extreme response variations in
#' longitudinal studies.
#'
#' @param data A data frame containing the longitudinal data.
#' @param subject_id A column specifying the column name for subject IDs.
#' @param time A column specifying different time points that observations are measured like 0 as baseline, 1 as first visit etc.
#' @param response A column specifying the column name for response values.
#' @param k A numeric value (default = 2) used to define the threshold for detecting influential subjects.
#'
#' @param verbose Logical; if TRUE, prints informative messages during execution.
#' @return A list containing:
#' \item{influential_subjects}{IDs of influential subjects.}
#' \item{influential_data}{Data frame of influential subjects.}
#' \item{non_influential_data}{Data frame of non-influential subjects.}
#' \item{relative_change_plot}{Plot of max relative change per subject.}
#' \item{longitudinal_plot}{Plot of longitudinal data with influential subjects highlighted.}
#' \item{IS_table}{A data frame containing the Influence Score (IS) and the Partial Influence Score (PIS) values for each subject at each time point.}
#'
#'
#' @examples
#' data(infsdata)
#' infsdata <- infsdata[1:5,]
#' result <- rld(infsdata, "subject_id", "time", "response", k = 2)
#' print(result$influential_subjects)
#' head(result$influential_data)
#' head(result$non_influential_data)
#'
#' @export
#'
#'
#' @seealso tvm, wlm, sld, slm



rld <- function(data, subject_id, time, response, k = 2, verbose = FALSE) {

  # Sort data by subject and time
  data <- data[order(data[[subject_id]], data[[time]]), ]

  # Compute relative change
  data <- data |>
    dplyr::group_by(.data[[subject_id]]) |>
    dplyr::mutate(
      relative_change = (.data[[response]] - dplyr::lag(.data[[response]])) /
        dplyr::lag(.data[[response]]) * 100
    ) |>
    dplyr::ungroup()

  # Mean and SD of relative change by time
  mean_rc <- data |>
    dplyr::group_by(.data[[time]]) |>
    dplyr::summarise(
      mean_rc = mean(relative_change, na.rm = TRUE),
      sd_rc   = sd(relative_change, na.rm = TRUE),
      .groups = "drop"
    )

  data <- dplyr::left_join(data, mean_rc, by = time)

  # Max deviation per subject
  max_diff_per_subject <- data |>
    dplyr::group_by(.data[[subject_id]]) |>
    dplyr::summarise(
      max_diff = max(abs(relative_change - mean_rc), na.rm = TRUE),
      max_sd   = max(sd_rc, na.rm = TRUE),
      .groups  = "drop"
    )

  # Threshold
  threshold <- k * max(max_diff_per_subject$max_sd, na.rm = TRUE)

  influential_subjects <- max_diff_per_subject |>
    dplyr::filter(max_diff > threshold) |>
    dplyr::pull(.data[[subject_id]])

  if (length(influential_subjects) == 0) {
    warning("No influential subjects detected based on the given threshold.")
    return(list(
      influential_subjects = integer(0),
      influential_data = data[0, ],
      non_influential_data = data,
      relative_change_plot = NULL,
      longitudinal_plot = NULL,
      IS_table = NULL
    ))
  }

  influential_data <- data |>
    dplyr::filter(.data[[subject_id]] %in% influential_subjects)

  non_influential_data <- data |>
    dplyr::filter(!.data[[subject_id]] %in% influential_subjects)

  if (verbose) {
    message("Influential subjects detected: ",
            paste(influential_subjects, collapse = ", "))
  }

  ## Plot 1
  p1 <- ggplot2::ggplot(
    max_diff_per_subject,
    ggplot2::aes(x = .data[[subject_id]], y = max_diff)
  ) +
    ggplot2::geom_point(size = 3) +
    ggplot2::geom_point(
      data = max_diff_per_subject |>
        dplyr::filter(.data[[subject_id]] %in% influential_subjects),
      color = "red", size = 4
    ) +
    ggplot2::geom_hline(
      yintercept = threshold,
      linetype = "dashed",
      color = "red"
    ) +
    ggplot2::labs(
      title = "Max Relative Difference per Subject (RLD Method)",
      x = subject_id,
      y = "Max Relative Difference"
    ) +
    ggplot2::theme_minimal()

  ## Plot 2
  p2 <- ggplot2::ggplot(
    data,
    ggplot2::aes(
      x = .data[[time]],
      y = .data[[response]],
      group = .data[[subject_id]]
    )
  ) +
    ggplot2::geom_line(alpha = 0.6) +
    ggplot2::geom_line(
      data = influential_data,
      color = "red",
      linewidth = 1.2
    ) +
    ggplot2::labs(
      title = "Longitudinal Data with Influential Subjects (RLD Method)",
      x = time,
      y = response
    ) +
    ggplot2::theme_minimal()

  IS_table <- max_diff_per_subject |>
    dplyr::mutate(
      IS  = max_diff / max_sd,
      PIS = (max_diff^2) / (nrow(data) * max_sd^2)
    )

  list(
    influential_subjects = influential_subjects,
    influential_data = influential_data,
    non_influential_data = non_influential_data,
    relative_change_plot = p1,
    longitudinal_plot = p2,
    IS_table = IS_table
  )
}
utils::globalVariables(c('relative_change','sd','sd_rc','max_sd'))
