#' NORMAgene core normalization engine
#'
#' Applies the least squares fit-based NORMAgene data-driven normalization algorithm originally described by Heckmann et al. (2011) to a matrix of raw CT values, and returns a list containing a matrix of normalized CT values along with associated scaling factors and relative variability metrics.
#'
#' This function implements the core normalization and variance calculations and is primarily intended for internal use; most users should call [norma_gene()] instead. For more information on the NORMAgene algorithm or relative variability metrics, see [NORMAgene-package].
#'
#' @param X
#' A numeric matrix of raw ct values with replicates in rows and genes in columns.
#'
#' @param conditions
#' A vector of factors specifying experimental condition membership for replicates in the case of a multi-condition experiment, or `NULL` in the case of a single condition experiment. Normalization is applied within experimental conditions when specified, or across all replicates when `NULL`.
#'
#' @return
#' A list with the following components:
#' \describe{
#'   \item{norm}{A numeric matrix of normalized CT values with identical row and column order as \code{X}. Row and column names are inherited from \code{X}.}
#'   \item{cor_fact}{A numeric vector of length \code{nrow(X)} containing the per-replicate scaling factors used for normalization. }
#'   \item{rel_var}{A list containing relative variability metrics:
#'     \describe{
#'       \item{by_gene}{A named numeric matrix of gene-level relative variability values, calculated both within experimental conditions and cumulatively across all experimental conditions.}
#'       \item{by_cond}{A named numeric vector of relative variability calculated within experimental conditions, as well as cumulatively across all experimental conditions, regardless of gene.}
#'     }
#'   }
#' }
#'
#' @references
#' Heckmann, LH., Sørensen, PB., Krogh, PH., & Sørensen, JG. (2011).
#' NORMA-Gene: a simple and robust method for qPCR normalization based on target gene data.
#' \emph{BMC Bioinformatics}, 12, 250.
#' \doi{10.1186/1471-2105-12-250}
#'
#' @seealso
#' [norma_gene()] \cr
#' [NORMAgene-package]
#'
#' @keywords internal

norma_gene_core<-function(X, conditions=NULL){

  #----input parsing and validation----

  if(!is.matrix(X)||!is.numeric(X))
    stop("`X` must be a numeric matrix in the form of replicates x genes.")

  n<-nrow(X)
  p<-ncol(X)

  if(p<5L){
    warning("Normalization may be unstable with fewer than 5 genes.")
  }

  if(is.null(conditions)){
    warning("`conditions` is NULL; all replicates treated as a single condition for normalization.")
    conditions<-factor(rep.int("All_replicates", n))
  }

  if(!is.factor(conditions))
    stop("`conditions` must be a factor.")

  if(length(conditions)!=n)
    stop("`conditions` must have length nrow(X).")

  rep_n<-table(conditions)
  if(any(rep_n<2L)){
    bad<-names(rep_n)[rep_n<2L]
    stop("At least two replicates per condition are required for normalization. ",
         "The following condition(s) have fewer than two replicates: ",
         paste(bad, collapse=", "))
  }

  if(any(is.infinite(X), na.rm=TRUE)){
    stop("All entries in `X` must be finite.")
  }

  if(any(X<=0, na.rm=TRUE)){
    stop("All entries in `X` must be positive.")
  }

  cond<-conditions
  cond_levels<-levels(cond)

  gene_names<-colnames(X)
  if(is.null(gene_names))
    gene_names<-paste0("Gene", seq_len(p))

  L<-log(X)

  #----build containers for normalization and variance calculation output----

  #normalized data and correction factors

  Z<-matrix(NA_real_, n, p, dimnames=dimnames(X))
  a_vec<-numeric(n)

  #Per-(condition,gene) residual-variance

  var_raw_cond<-matrix(NA_real_, length(cond_levels), p, dimnames=list(cond_levels, gene_names))
  var_trans_cond<-matrix(NA_real_, length(cond_levels), p, dimnames=list(cond_levels, gene_names))

  #DF-pooled per-condition variances

  var_raw_cond_pooled<-stats::setNames(numeric(length(cond_levels)), cond_levels)
  var_trans_cond_pooled<-stats::setNames(numeric(length(cond_levels)), cond_levels)

  #per-(cond,gene) counts for DF pooling

  ng_mat<-matrix(0L, nrow=length(cond_levels), ncol=p, dimnames=list(cond_levels, gene_names))

  #----normalization and variance calculation----

  for(lev in cond_levels){
    index<-which(cond==lev)
    Lsub<-L[index,,drop=FALSE]
    present<-is.finite(Lsub)

    n_g<-colSums(present)
    mu_g<-colMeans(Lsub, na.rm=TRUE)

    L0<-Lsub
    L0[!present]<-0

    sum1<-rowSums(L0)
    sum2<-present%*%mu_g
    k_s<-rowSums(present)

    c_s<-numeric(length(index))
    c_s[k_s>0]<-(sum2[k_s>0]-sum1[k_s>0])/k_s[k_s>0]
    a_s<-exp(c_s)
    a_vec[index]<-a_s

    Z[index,]<-exp(Lsub+c_s)

    raw_res0<-sweep(Lsub, 2, mu_g, "-")
    raw_res0[!present]<-0

    trans_res0<-raw_res0+c_s
    trans_res0[!present]<-0

    ss_raw_g<-colSums(raw_res0^2)
    ss_trans_g<-colSums(trans_res0^2)

    has2<-(n_g>1L)
    var_raw_cond[lev,has2]<-ss_raw_g[has2]/(n_g[has2]-1)
    var_trans_cond[lev,has2]<-ss_trans_g[has2]/(n_g[has2]-1)

    total_present<-sum(present)
    if(total_present>1L){
      var_raw_cond_pooled[lev]<-sum(raw_res0^2)/(total_present-1)
      var_trans_cond_pooled[lev]<-sum(trans_res0^2)/(total_present-1)
    }else{
      var_raw_cond_pooled[lev]<-NA_real_
      var_trans_cond_pooled[lev]<-NA_real_
    }

    ng_mat[lev,]<-n_g
  }

  #----relative variability calculation----

  sdr_gene_cond<-t(sqrt(var_trans_cond/var_raw_cond))
  colnames(sdr_gene_cond)<-cond_levels
  rownames(sdr_gene_cond)<-gene_names

  df_gene_cond<-pmax(ng_mat-1L,0L)
  num_raw_gene<-colSums(df_gene_cond*var_raw_cond, na.rm=TRUE)
  num_trans_gene<-colSums(df_gene_cond*var_trans_cond, na.rm=TRUE)
  den_gene<-colSums(df_gene_cond, na.rm=TRUE)
  sdr_gene_all<-sqrt((num_trans_gene/den_gene)/(num_raw_gene/den_gene))
  sdr_gene_all[den_gene==0]<-NA_real_

  by_gene<-cbind(sdr_gene_cond, Cumulative=sdr_gene_all)

  by_cond<-sqrt(var_trans_cond_pooled/var_raw_cond_pooled)

  df_cond<-rowSums(ng_mat)-1L
  num_raw_all<-sum(df_cond*var_raw_cond_pooled, na.rm=TRUE)
  num_trans_all<-sum(df_cond*var_trans_cond_pooled, na.rm=TRUE)
  den_all<-sum(df_cond, na.rm=TRUE)
  total<-sqrt((num_trans_all/den_all)/(num_raw_all/den_all))

  if(den_all==0){
    total<-NA_real_
  }

  by_cond<-c(by_cond, total)
  names(by_cond)<-c(cond_levels, "Cumulative")

  rel_var<-list(
    by_gene=by_gene,
    by_cond=by_cond
  )

  #----return----

  list(
    norm=Z,
    cor_fact=a_vec,
    rel_var=rel_var
  )
}
