#' Normalize CT values using NORMAgene
#'
#' Applies the least squares fit-based NORMAgene data-driven normalization algorithm originally described by Heckmann et al. (2011) to raw CT values provided via an input data frame appended with experimental meta-data, and returns a data frame containing normalized CT values with scaling factors and relative variability metrics attached as attributes.
#'
#' Users must explicitly specify how experimental conditions and replicate identifiers are handled to avoid accidental normalization of numeric metadata. Scaling factors can be retrieved from the output object using [correction_factors()]. Relative variability metrics can be retrieved from the output object using [relative_variability()]. For more information on the NORMAgene algorithm or relative variability metrics, see [NORMAgene-package].
#'
#' @param data
#' A data frame structured with biological replicates in rows, and experimental metadata and gene-wise raw CT values in columns.
#'
#' @param conditions
#' A single column name in `data` specifying experimental condition membership in the case of a multi-condition experiment, or `NULL` in the case of a single condition experiment. Normalization is applied within experimental conditions when specified, or across all replicates when `NULL`. This argument must be explicitly provided.
#'
#' @param replicates
#' A single column name in `data` containing replicate identifiers, or `NULL` if replicate identifiers are not present. If provided, replicate identifiers are used for naming of outputs only, and are not used in normalization calculations. This argument must be explicitly provided.
#'
#' @param ct_values
#' Optional character vector specifying column names in `data` containing CT values to be normalized. If `NULL`, all numeric columns except `conditions` and `replicates` are used.
#'
#' @return
#' A data frame with the same organization as `data` containing normalized CT values and any provided experimental metadata. The per-replicate scaling factors used for normalization, as well as within gene and within experimental condition relative variability metrics, are attached as attributes.
#'
#' @references
#' Heckmann, LH., Sørensen, PB., Krogh, PH., & Sørensen, JG. (2011).
#' NORMA-Gene: a simple and robust method for qPCR normalization based on target gene data.
#' \emph{BMC Bioinformatics}, 12, 250.
#' \doi{10.1186/1471-2105-12-250}
#'
#' @seealso
#' [correction_factors()] \cr
#' [relative_variability()] \cr
#' [NORMAgene-package]
#'
#' @examples
#' # USE-CASE WITH MULTIPLE EXPERIMENTAL CONDITIONS
#'
#' # load example dataset containing raw CT values and
#' # metadata from a multi-condition experiment
#'
#' data(multi_cond_data)
#' raw_data<-multi_cond_data
#'
#' #normalize CT values via NORMAgene
#'
#' norm_data<-norma_gene(
#'   data = raw_data,
#'   conditions = "Diagnosis",
#'   replicates= "Sample_id"
#' )
#'
#' # retrieve relative variability metrics
#'
#' relative_variability(norm_data, type = "by_gene")
#' relative_variability(norm_data, type = "by_condition")
#'
#' # USE-CASE WITH a SINGLE EXPERIMENTAL CONDITION
#'
#' # load example dataset containing raw CT values and
#' # metadata from a single-condition experiment
#'
#' data(single_cond_data)
#' raw_data<-single_cond_data
#'
#' #normalize CT values via NORMAgene
#'
#' norm_data<-norma_gene(
#'   data = raw_data,
#'   conditions = NULL,
#'   replicates= "Sample_id"
#' )
#'
#' # retrieve relative variability metrics
#'
#' relative_variability(norm_data, type = "by_gene")
#' relative_variability(norm_data, type = "by_condition")
#'
#' @export

norma_gene<-function(data, conditions=NULL, replicates=NULL, ct_values=NULL){

  #----input parsing and validation----

  if(!is.data.frame(data)){
    stop("`data` must be a data.frame.")
  }

  if(missing(conditions)){
    stop("`conditions` must be explicitly specified. ",
         "Use a column name (e.g. conditions = \"treatment\") ",
         "or explicitly set conditions = NULL.")
  }

  if(missing(replicates)){
    stop("`replicates` must be explicitly specified. ",
         "Use a column name (e.g. replicates = \"sample_id\") ",
         "or explicitly set replicates = NULL.")
  }

  #detect gene expression columns

  if(is.null(ct_values)){
    num_cols<-vapply(data, is.numeric, logical(1))

    exclude<-character(0)
    if(!is.null(conditions)) exclude<-c(exclude, conditions)
    if(!is.null(replicates)) exclude<-c(exclude, replicates)

    num_cols[names(num_cols)%in%exclude]<-FALSE
    ct_values<-names(num_cols)[num_cols]

    if(length(ct_values)==0L){
      stop("No numeric gene expression columns found in `data`. ",
           "Specify gene expression columns explicitly using `ct_values`.")
    }
  }else{
    if(!all(ct_values%in%colnames(data))){
      stop("Some `ct_values` columns are not present in `data`.")
    }

    if(!all(vapply(data[,ct_values,drop=FALSE], is.numeric, logical(1)))){
      stop("All `ct_values` columns must be numeric.")
    }
  }

  expr_data<-data[,ct_values,drop=FALSE]

  #conditions

  if(!is.null(conditions)){
    if(length(conditions)!=1L||!conditions%in%colnames(data)){
      stop("`conditions` must be a single column name present in `data`.")
    }
    cond_vec<-factor(data[[conditions]])

  }else{
    cond_vec<-NULL
  }

  #replicates

  if(!is.null(replicates)){
    if(length(replicates)!=1L||!replicates%in%colnames(data)){
      stop("`replicates` must be a single column name present in `data`.")
    }
    rep_ids<-as.character(data[[replicates]])
  }else{
    rep_ids<-NULL
  }

  #----call core function----

  res<-norma_gene_core(
    X=as.matrix(expr_data),
    conditions=cond_vec
  )

  #----attach replicate names----

  if(!is.null(rep_ids)){
    names(res$cor_fact)<-rep_ids
  }

  #----rebuild data.frame----

  out<-data
  out[,ct_values]<-res$norm

  #----attach normalization metrics----

  attr(out, "rel_var")<-res$rel_var
  attr(out, "cor_fact")<-res$cor_fact

  #----return----

  class(out)<-c("norma_gene", class(out))
  out
}
