#' The stochastic degree sequence model (sdsm) for backbone probabilities
#'
#' `sdsm` computes the probability of edge weights being
#'     above or below the observed edge weights in a bipartite projection
#'     using the stochastic degree sequence model.
#'     Once computed, use \code{\link{backbone.extract}} to return
#'     the backbone matrix for a given alpha value.
#'
#' @param B graph: Bipartite graph object of class matrix, sparse matrix, igraph, edgelist, or network object.
#' @param model String: A method used to compute probabilities for generating random bipartite graphs.
#'     Can be c("logit", "probit", "cauchit", "log", "cloglog", "scobit", "oldlogit","lpm", "rcn", "curveball", "polytope").
#' @param trials Integer: If ‘model’ = ‘curveball’, number of random bipartite graphs generated using curveball to compute probabilities. Default is 1000.
#'
#' @details Specifically, the sdsm function compares an edge's observed weight in the projection \code{B*t(B)}
#'    to the distribution of weights expected in a projection obtained from a random bipartite network where
#'    both the row vertex degrees and column vertex degrees are approximately fixed.
#' @details If the 'model' parameter is one of c('logit', 'probit', 'cauchit', 'log', 'cloglog','scobit'),
#'     then this model is used as a 'link' function for a binary outcome model conditioned on the row degrees and column degrees,
#'     as described by \link[stats]{glm} and \link[stats]{family}.
#'     If the 'model' parameter is 'oldlogit', then a logit link function is used but the model is conditioned on the row degrees, column degrees, and their product.
#'     If 'model = lpm', a linear probability model is used. If 'model = rcn', the probabilities are given by (row degree * column degree)/(total number of edges).
#' @details If 'model' = 'curveball' and 'trials' > 0, the probabilities are computed by using \link[backbone]{curveball} function `trials` times. The proportion of each cell being 1 is used as its probability.
#'     If 'model = polytope', the \link{polytope} function is used to find a matrix of probabilities that maximizes the entropy function, with same row and column sums.
#'
#' @details The "backbone" S3 class object returned is composed of two matrices, a summary dataframe and (optionally, if generated by using \link{fdsm}) a 'dyad_values' vector.
#' @return backbone, a list(positive, negative, summary). Here
#'     `positive` is a matrix of probabilities of edge weights being equal to or above the observed value in the projection,
#'     `negative` is a matrix of probabilities of edge weights being equal to or below the observed value in the projection, and
#'     `summary` is a data frame summary of the inputted matrix and the model used including: model name, number of rows, skew of row sums, number of columns, skew of column sums, and running time.
#' @references \href{https://www.sciencedirect.com/science/article/abs/pii/S0378873314000343}{Neal, Z. P. (2014). The backbone of bipartite projections: Inferring relationships from co-authorship, co-sponsorship, co-attendance, and other co-behaviors. Social Networks, 39, Elsevier: 84-97. DOI: 10.1016/j.socnet.2014.06.001}
#' @export
#'
#' @examples
#'sdsm_probs <- sdsm(davis)
#'\dontrun{sdsm_probs2 <- sdsm(davis, model = "curveball", trials = 1000)}

sdsm <- function(B,
                 model = "polytope",
                 trials = 1000){

  #### Argument Checks ####
  if ((model!="logit") &
      (model!="probit") &
      (model!="log") &
      (model!="cloglog") &
      (model!="cauchit") &
      (model!="oldlogit") &
      (model!="scobit") &
      (model!="lpm") &
      (model!="rcn") &
      (model!="chi2") &
      (model!="curveball") &
      (model!="polytope"))
  {stop("incorrect model type")}
  if ((trials < 1000)) {stop("trials must be at least 1000 to get reasonable approximations for curveball algorithm.")}
  if (!(methods::is(B, "matrix")) & !(methods::is(B, "sparseMatrix")) & !(methods::is(B, "igraph")) & !(methods::is(B, "network"))) {stop("input bipartite data must be a matrix, igraph, or network object.")}

  ### Run Time ###
  run.time.start <- Sys.time()
  message(paste0("Finding the distribution using SDSM with ", model, " model."))

  #### Class Conversion ####
  convert <- class.convert(B, "matrix")
  class <- convert[[1]]
  B <- convert[[2]]

  #### Bipartite Projection ####
  ### If sparse matrix input, use sparse matrix operations ###
  if (!methods::is(B, "sparseMatrix")) {
    B <- Matrix::Matrix(B, sparse = T)
  }
  P <- Matrix::tcrossprod(B)

  ### Create Positive and Negative Matrices to hold backbone ###
  Positive <- matrix(0, nrow(P), ncol(P))
  Negative <- matrix(0, nrow(P), ncol(P))

  #### Compute Probabilities for SDSM ####

  ### Compute row and column sums if necessary ###
  if (model=="logit" | model=="probit" | model=="log" | model=="cloglog" | model=="cauchit" | model=="oldlogit" | model=="scobit" | model=="lpm" | model=="chi2" | model=="rcn") {
    ## Vectorize the bipartite data ##
    A <- data.frame(as.vector(B))
    names(A)[names(A)=="as.vector.B."] <- "value"

    ## Assign row and column IDs in the vectorized data ##
    A$row <- rep(1:nrow(B), times=ncol(B))
    A$col <- rep(1:ncol(B), each=nrow(B))

    ## Compute and attach rowsums, columnsums ##
    A$rowmarg <- stats::ave(A$value,A$row,FUN=sum)
    A$colmarg <- stats::ave(A$value,A$col,FUN=sum)
    A$rowcol <- A$rowmarg * A$colmarg
  }

  ### Binomial Models ###
  if (model=="logit" | model=="probit" | model=="log" | model=="cloglog" | model=="cauchit" | model=="oldlogit") {
    if (requireNamespace("speedglm", quietly = TRUE)){
      if (model == "logit") {model.estimates <- speedglm::speedglm(formula= value ~  rowmarg + colmarg, family = stats::binomial(link="logit"), data=A)}
      if (model == "probit") {model.estimates <- speedglm::speedglm(formula= value ~  rowmarg + colmarg, family = stats::binomial(link="probit"), data=A)}
      if (model == "log") {model.estimates <- speedglm::speedglm(formula= value ~  rowmarg + colmarg, family = stats::binomial(link="log"), data=A)}
      if (model == "cloglog") {model.estimates <- speedglm::speedglm(formula= value ~  rowmarg + colmarg, family = stats::binomial(link="cloglog"), data=A)}
      if (model == "cauchit") {model.estimates <- speedglm::speedglm(formula= value ~  rowmarg + colmarg, family = stats::binomial(link="cauchit"), data=A)}
      if (model == "oldlogit") {model.estimates <- speedglm::speedglm(formula= value ~  rowmarg + colmarg + rowcol, family = stats::binomial(link="logit"), data=A)}
      probs <- as.vector(stats::predict(model.estimates,newdata=A,type = "response"))
    } else {
      if (model == "logit") {model.estimates <- stats::glm(formula= value ~  rowmarg + colmarg, family = stats::binomial(link="logit"), data=A)}
      if (model == "probit") {model.estimates <- stats::glm(formula= value ~  rowmarg + colmarg, family = stats::binomial(link="probit"), data=A)}
      if (model == "log") {model.estimates <- stats::glm(formula= value ~  rowmarg + colmarg, family = stats::binomial(link="log"), data=A)}
      if (model == "cloglog") {model.estimates <- stats::glm(formula= value ~  rowmarg + colmarg, family = stats::binomial(link="cloglog"), data=A)}
      if (model == "cauchit") {model.estimates <- stats::glm(formula= value ~  rowmarg + colmarg, family = stats::binomial(link="cauchit"), data=A)}
      if (model == "oldlogit") {model.estimates <- stats::glm(formula= value ~  rowmarg + colmarg + rowcol, family = stats::binomial(link="logit"), data=A)}
      probs <- as.vector(stats::predict(model.estimates,newdata=A,type = "response"))
    }
  }
  ### Scobit model ###
  if (model == "scobit") {
    params <- list(b0=0.1,b1=0.00005,b2=0.00005,a=0.01)
    model.estimates <- stats::optim(params,scobit_loglike_cpp,gr=scobit_loglike_gr_cpp,method="BFGS",x1=A$rowmarg,x2=A$colmarg,y=A$value)
    pars <- c(model.estimates$par[1],model.estimates$par[2],model.estimates$par[3])
    probs <- 1-1/(1+exp(pars[1]+pars[2]*A$rowmarg+pars[3]*A$colmarg))^model.estimates$par[4]
  }
  ### Linear probability model ###
  if (model=="lpm") {
    model.estimates <- stats::lm(formula= value ~ rowmarg + colmarg, data=A)
    probs <- as.vector(stats::predict(model.estimates,newdata=A,type = "response"))
    probs[probs<0] <- 0 #Truncate out-of-bounds estimates
    probs[probs>1] <- 1
  }
  ### Chi-Square model ###
  if (model=="chi2") {
    probs <- as.vector((A$rowmarg * A$colmarg)/sum(A$value))
    probs[probs<0] <- 0 #Truncate out-of-bounds estimates
    probs[probs>1] <- 1
  }

  ### Chi-Square model ###
  if (model=="rcn") {
    probs <- as.vector((A$rowmarg * A$colmarg)/sum(A$value))
    probs[probs<0] <- 0 #Truncate out-of-bounds estimates
    probs[probs>1] <- 1
  }

  ### Curveball model ###
  if (model=="curveball") {
    probs <- data.frame(as.vector(B))  #Vectorized original
    probs[probs==1] <- 0  #Make it only 0s
    for (i in 1:trials) {
      Bstar <- curveball(B)
      probs <- probs + data.frame(as.vector(Bstar))
    }
    probs <- (probs/trials)
    probs <- as.vector(probs$as.vector.B.)
  }
  ### Polytopes model ###
  if (model=="polytope") {
    probs <- as.vector(polytope(B))
  }

  #### Assemble and Probabilities ####
  prob.mat <- matrix(probs, nrow = nrow(B), ncol = ncol(B))  #Probability matrix
  rows <- dim(prob.mat)[1]

  #### Compute Null Edge Weight Distributions Using Poisson Binomial RNA ####

  for (i in 1:rows){
    ### Compute prob.mat[i,]*prob.mat[j,] for each j ###
    prob.imat <- sweep(prob.mat, MARGIN = 2, prob.mat[i,], `*`)

    ### Find cdf, below or equal to value for negative, above or equal to value for positive ###
    ### Using RNA approximation ###
    negative <- as.array(mapply(rna, kk= as.data.frame(t(P[i,])), pp = as.data.frame(t(prob.imat))))
    positive <- as.array((1- mapply(rna, kk=(as.data.frame(t(P[i,])-1)), pp = as.data.frame(t(prob.imat)))))

    ### Set values in Positive & Negative matrices ###
    Positive[i,] <- positive
    Negative[i,] <- negative
  } #end for i in rows
  rownames(Positive) <- rownames(B)
  colnames(Positive) <- rownames(B)
  rownames(Negative) <- rownames(B)
  colnames(Negative) <- rownames(B)

  ### Run Time ###
  run.time.end <- Sys.time()
  total.time = (round(difftime(run.time.end, run.time.start, units = "secs"), 2))

  #### Compile Summary ####
  r <- Matrix::rowSums(B)
  c <- Matrix::colSums(B)

  a <- c("Input Class", "Model", "Method", "Number of Rows", "Mean of Row Sums", "SD of Row Sums", "Skew of Row Sums", "Number of Columns", "Mean of Column Sums", "SD of Column Sums", "Skew of Column Sums", "Running Time (secs)")
  b <- c(class[1], "Stochastic Degree Sequence Model", model, dim(B)[1], round(mean(r),5), round(stats::sd(r),5), round((sum((r-mean(r))**3))/((length(r))*((stats::sd(r))**3)), 5), dim(B)[2], round(mean(c),5), round(stats::sd(c),5), round((sum((c-mean(c))**3))/((length(c))*((stats::sd(c))**3)), 5), as.numeric(total.time))
  model.summary <- data.frame(a,b, row.names = 1)
  colnames(model.summary)<-"Model Summary"

  #### Return Backbone Object ####
  bb <- list(positive = Positive, negative = Negative, summary = model.summary)
  class(bb) <- "backbone"
  return(bb)

} #end sdsm function
