# WARNING - Generated by {fusen} from dev/dereplicate-spectra.Rmd: do not edit by hand

#' Process Bruker MALDI Biotyper spectra _à la_ Strejcek et al. (2018)
#'
#' @details
#' Based on the original implementation, the function performs the following tasks:
#'
#' 1. Square-root transformation
#' 2. Mass range trimming to 4-10 kDa as they were deemed most determinant by Strejcek et al. (2018)
#' 3. Signal smoothing using the Savitzky-Golay method and a half window size of 20
#' 4. Baseline correction with the SNIP procedure
#' 5. Normalization by Total Ion Current
#' 6. Peak detection using the SuperSmoother procedure and with a signal-to-noise ratio above 3
#' 7. Peak filtering. This step has been added to discard peaks with a negative signal-to-noise ratio probably due to being on the edge of the mass range.
#'
#'
#' @param spectra_list A list of [MALDIquant::MassSpectrum] objects.
#' @param rds_prefix A character indicating the prefix for the `.RDS` output files to be written in the `processed` directory. By default, no prefix are given and thus no files are written.
#'
#' @return A named list of three objects:
#' * `spectra`: a list the length of the spectra list of [MALDIquant::MassSpectrum] objects.
#' * `peaks`: a list the length of the spectra list of [MALDIquant::MassPeaks] objects.
#' * `metadata`: a tibble indicating the median signal-to-noise ratio (`SNR`) and peaks number for all spectra list (`peaks`), with spectra names in the `name` column.
#'
#' @seealso [import_biotyper_spectra] and [check_spectra] for the inputs and [merge_processed_spectra] for further analysis.
#' @export
#'
#' @references Strejcek M, Smrhova T, Junkova P & Uhlik O (2018). “Whole-Cell MALDI-TOF MS versus 16S rRNA Gene Analysis for Identification and Dereplication of Recurrent Bacterial Isolates.” *Frontiers in Microbiology* 9 <doi:10.3389/fmicb.2018.01294>.
#'
#' @note The original R code on which this function is based is accessible at: <https://github.com/strejcem/MALDIvs16S>
#' @examples
#' # Get an example directory of six Bruker MALDI Biotyper spectra
#' directory_biotyper_spectra <- system.file(
#'   "toy-species-spectra",
#'   package = "maldipickr"
#' )
#' # Import the six spectra
#' spectra_list <- import_biotyper_spectra(directory_biotyper_spectra)
#' # Transform the spectra signals according to Strejcek et al. (2018)
#' processed <- process_spectra(spectra_list)
#' # Overview of the list architecture that is returned
#' #  with the list of processed spectra, peaks identified and the
#' #  metadata table
#' str(processed, max.level = 2)
#' # A detailed view of the metadata with the median signal-to-noise
#' #  ratio (SNR) and the number of peaks
#' processed$metadata
process_spectra <- function(spectra_list, rds_prefix = NULL) {
  # It returns the list and write it for future processing as an RDS file.

  # 1. SQRT transformation
  # 2. Mass range trimming
  # 3. Signal smoothing
  # 4. Baseline Correction
  # 5. Normalization
  spectra <- spectra_list %>%
    MALDIquant::transformIntensity("sqrt") %>%
    MALDIquant::trim(range = c(4000, 10000)) %>%
    MALDIquant::smoothIntensity("SavitzkyGolay", halfWindowSize = 20) %>%
    MALDIquant::removeBaseline("SNIP", iterations = 50) %>%
    MALDIquant::calibrateIntensity(method = "TIC")

  # 6. Peak detection
  peaks <- spectra %>%
    MALDIquant::detectPeaks(
      method = "SuperSmoother",
      halfWindowSize = 20,
      SNR = 3
    )
  # Added extra step to remove the peaks with
  # negative SNR due to being on the edge of the masses trimmed
  peaks <- lapply(peaks, function(pk) {
    pk[MALDIquant::snr(pk) >= 3]
  })

  # Spectrum metadata table
  # Extract signal-to-noise ratios and peaks number
  snr_list <- lapply(peaks, MALDIquant::snr)
  metadata <- data.frame(
    "SNR" = vapply(snr_list, stats::median, FUN.VALUE = numeric(1)),
    "peaks" = lengths(snr_list)
  )


  # Add the spectra identifiers to all objects
  rownames(metadata) <- names(spectra) <- names(peaks) <- sapply(spectra, function(x) {
    # e.g., 230117_1750_1_B1
    gsub(
      "[-\\.]", "_",
      MALDIquant::metaData(x)[["fullName"]]
    )
  })
  # Aggregate the objects to a list
  processed_list <- list(
    "spectra" = spectra,
    "peaks" = peaks,
    # convert the data.frame to a tibble
    "metadata" = tibble::as_tibble(metadata, rownames = "name")
  )

  # Optional: writing objects to RDS files
  if (!is.null(rds_prefix)) {
    if (!dir.exists("processed")) {
      dir.create("processed")
    }
    saveRDS(processed_list,
      version = 2, compress = FALSE,
      file = paste0("processed/", rds_prefix, ".RDS")
    )
  }
  return(processed_list)
}
