#' @title Simple Longitudinal Difference (SLD)
#'
#' @description
#' This function detects influential subjects in a longitudinal dataset by analyzing their successive differences.
#' It calculates the successive differences for each subject, determines a threshold using the mean and standard deviation,
#' and identifies subjects whose maximum successive difference exceeds this threshold. This approach helps in detecting abrupt changes
#' in subject responses over time.
#'
#' @details
#' The function follows these steps:
#' \itemize{
#'   \item Computes successive differences for each subject.
#'   \item Calculates the mean and standard deviation of these differences across all subjects.
#'   \item Defines a threshold as \code{k} standard deviations from the mean.
#'   \item Identifies subjects whose maximum successive difference exceeds this threshold.
#'   \item Separates data into influential and non-influential subjects.
#'   \item Visualizes the results using \code{ggplot2}.
#' }
#'
#' This method is useful for identifying subjects with sudden changes in their response patterns over time.
#'
#' @param data A data frame containing longitudinal data.
#' @param subject_id A column specifying the column name for subject IDs.
#' @param time A column specifying different time points that observations are measured.
#' @param response A column specifying the column name for the response variable.
#' @param k A numeric value for the threshold parameter (default is 2), representing the number of standard deviations used to define the threshold.
#' @param verbose Logical; if TRUE, prints informative messages during execution.
#'
#' @return A list containing:
#' \item{influential_subjects}{A vector of subject IDs identified as influential.}
#' \item{influential_data}{A data frame containing data for influential subjects.}
#' \item{non_influential_data}{A data frame containing data for non-influential subjects.}
#' \item{successive_difference_plot}{A ggplot object visualizing maximum successive differences across subjects.}
#' \item{longitudinal_plot}{A ggplot object displaying longitudinal data with influential subjects highlighted.}
#' \item{IS_table}{A data frame containing the Influence Score (IS) and the Partial Influence Score (PIS) values for each subject at each time point.}
#'
#'
#' @examples
#' data(infsdata)
#' infsdata <- infsdata[1:5,]
#' result <- sld(infsdata, "subject_id", "time", "response", k = 2)
#' print(result$influential_subjects)
#' head(result$influential_data)
#' head(result$non_influential_data)
#'
#' @export
#'
#'
#' @seealso tvm, wlm, slm, rld


sld <- function(data, subject_id, time, response, k = 2, verbose = FALSE) {

  # Order data by subject and time
  data <- data[order(data[[subject_id]], data[[time]]), ]

  # Compute successive differences
  data <- data |>
    dplyr::group_by(.data[[subject_id]]) |>
    dplyr::mutate(
      successive_diff = c(diff(.data[[response]]), NA_real_)
    ) |>
    dplyr::ungroup()

  # Overall mean and SD of successive differences
  overall_mean_diff <- mean(data$successive_diff, na.rm = TRUE)
  overall_sd_diff   <- stats::sd(data$successive_diff, na.rm = TRUE)

  # Max absolute difference per subject
  max_diff_per_subject <- data |>
    dplyr::group_by(.data[[subject_id]]) |>
    dplyr::summarise(
      max_diff = max(abs(successive_diff), na.rm = TRUE),
      .groups = "drop"
    )

  # Threshold
  threshold <- overall_mean_diff + k * overall_sd_diff

  # Identify influential subjects
  influential_subjects <- max_diff_per_subject |>
    dplyr::filter(max_diff > threshold) |>
    dplyr::pull(.data[[subject_id]])

  if (length(influential_subjects) == 0) {
    warning("No influential subjects detected based on the given threshold.")
    return(list(
      influential_subjects = integer(0),
      influential_data = data[0, ],
      non_influential_data = data,
      successive_difference_plot = NULL,
      longitudinal_plot = NULL,
      IS_table = NULL
    ))
  }

  influential_data <- data |>
    dplyr::filter(.data[[subject_id]] %in% influential_subjects)

  non_influential_data <- data |>
    dplyr::filter(!.data[[subject_id]] %in% influential_subjects)

  if (verbose) {
    message(
      "Influential subjects detected: ",
      paste(influential_subjects, collapse = ", ")
    )
  }

  ## --- Plot 1: Longitudinal data --- ##
  p1 <- ggplot2::ggplot(
    data,
    ggplot2::aes(
      x = .data[[time]],
      y = .data[[response]],
      group = .data[[subject_id]]
    )
  ) +
    ggplot2::geom_line(alpha = 0.8) +
    ggplot2::geom_line(
      data = influential_data,
      color = "red",
      linewidth = 1.2
    ) +
    ggplot2::labs(
      title = "Longitudinal Data with Influential Subjects (SLD Method)",
      x = time,
      y = response
    ) +
    ggplot2::theme_minimal()

  ## --- Plot 2: Max successive difference per subject --- ##
  p2 <- ggplot2::ggplot(
    max_diff_per_subject,
    ggplot2::aes(x = .data[[subject_id]], y = max_diff)
  ) +
    ggplot2::geom_point(size = 3) +
    ggplot2::geom_point(
      data = max_diff_per_subject |>
        dplyr::filter(.data[[subject_id]] %in% influential_subjects),
      color = "red",
      size = 4
    ) +
    ggplot2::geom_hline(
      yintercept = threshold,
      linetype = "dashed",
      color = "red"
    ) +
    ggplot2::labs(
      title = "Max Successive Difference per Subject (SLD Method)",
      x = subject_id,
      y = "Max Successive Difference"
    ) +
    ggplot2::theme_minimal()

  IS_table <- max_diff_per_subject |>
    dplyr::mutate(
      IS  = (max_diff - overall_mean_diff) / overall_sd_diff,
      PIS = (max_diff - overall_mean_diff)^2 /
        (nrow(data) * overall_sd_diff^2)
    )

  list(
    influential_subjects = influential_subjects,
    influential_data = influential_data,
    non_influential_data = non_influential_data,
    successive_difference_plot = p2,
    longitudinal_plot = p1,
    IS_table = IS_table
  )
}

utils::globalVariables(c(".data", "successive_diff", "max_diff"))
