#' Diversity Metrics of Simulated and Original Data
#'
#' Estimates the average number of species and the Simpson diversity index per sampling unit,
#' as well as the total multivariate dispersion of both the original (pilot) and simulated datasets.
#'
#' @param data Data frame with species as columns and samples as rows. The first column should indicate the site to which the sample belongs, regardless of whether a single site was sampled.
#' @param dat.sim List of simulated data sets generated by \code{\link{simdata}}.
#' @param Par List of parameters generated by \code{\link{assempar}}.
#' @param transformation Mathematical transformation to reduce the weight of dominant species: one of "square root", "fourth root", "Log (X+1)", "P/A", or "none".
#' @param method Dissimilarity metric used for multivariate dispersion, passed to \code{\link[vegan]{vegdist}}.
#'
#' @details
#' The quality of the simulated data sets is evaluated by statistical similarity to the pilot data.
#' This includes: (i) the average number of species per sampling unit, (ii) the average Simpson diversity index,
#' and (iii) the multivariate dispersion (MVD), defined as the average dissimilarity of each sampling unit to the
#' group centroid in the dissimilarity space (Anderson 2006). For simulated datasets, mean and standard deviation
#' are reported for (i) and (ii), and the 0.95 quantile of the MVD distribution is used to describe its variability.
#'
#' @return A data frame containing the mean and standard deviation of richness and diversity per sampling unit,
#' and the MVD for original data, as well as the 0.95 quantile of MVD from the simulated data.
#'
#' @note It is desirable that simulated data resemble observed data in species richness and diversity per sampling unit.
#'
#' @references
#' Anderson, M. J. (2006). Distance-based tests for homogeneity of multivariate dispersions. Biometrics, 62, 245–253.
#'
#' Guerra-Castro, E.J., Cajas, J.C., Simões, N., Cruz-Motta, J.J., & Mascaró, M. (2021). SSP: an R package to estimate sampling effort in studies of ecological communities. Ecography 44(4), 561-573. doi: \doi{10.1111/ecog.05284}
#'
#' @seealso \code{\link[vegan]{vegdist}}, \code{\link[vegan]{diversity}}
#'
#' @examples
#' ## Single site: micromollusk from Cayo Nuevo (Yucatan, Mexico)
#' data(micromollusk)
#' par.mic <- assempar(data = micromollusk, type = "P/A", Sest.method = "average")
#' sim.mic <- simdata(par.mic, cases = 2, N = 10, sites = 1)
#' qua.mic <- datquality(data = micromollusk, dat.sim = sim.mic, Par = par.mic,
#'                       transformation = "none", method = "jaccard")
#' qua.mic
#'
#' ## See the full multi-site workflow in the vignette
#'
#' @importFrom vegan vegdist diversity specnumber
#' @export


datquality <- function(data, dat.sim, Par, transformation, method) {
    #Definition of preliminary functions

    MVD = function(Y, transformation, method) {
        if (transformation == "square root") {
            Y$dummy<-1
            Y.t <- Y^0.5
            D <- vegdist(Y.t, method = method)
        }
        if (transformation == "fourth root") {
            Y$dummy<-1
            Y.t <- Y^0.25
            D <- vegdist(Y.t, method = method)
        }
        if (transformation == "Log (X+1)") {
            Y$dummy<-1
            Y.t <- log(Y + 1)
            D <- vegdist(Y.t, method = method)
        }
        if (transformation == "P/A") {
            Y$dummy<-1
            Y.t <- 1 * (Y > 0)
            D <- vegdist(Y.t, method = method)
        }
        if (transformation == "none") {
            Y$dummy<-1
            D <- vegdist(Y, method = method)
        }

        n = dim(as.matrix(D))
        ss = sum(D^2)/n
        v = ss/(n - 1)
        return(v[1])
    }

    alfa = function(Y) {
        mean = mean(specnumber(Y))
        sd = sd(specnumber(Y))
        x <- c(mean, sd)
        return(x)
    }

    simpson = function(Y) {
        mean = mean(diversity(Y, index = "simpson"))
        sd = sd(diversity(Y, index = "simpson"))
        x <- c(mean, sd)
        return(x)
    }

    #data frame to host results
    parameters <- as.data.frame(matrix(nrow = 2, ncol = 6))
    rownames(parameters) <- c("Pilot data", "Simulated data")
    colnames(parameters) <- c("S.mean", "S.sd", "1-D.mean", "1-D.sd", "MVDmin", "MVDmax")

    #Diversity metrics of pilot data
    data<-data[,2:length(data)]
    parameters[1, 1:2] <- alfa(data)
    parameters[1, 3:4] <- simpson(data)
    parameters[1, 5] <- MVD(data, transformation = transformation, method = method)
    parameters[1, 6] <- parameters[1, 5]

    #Diversity metrics of simulated data
    x <- matrix(nrow = length(dat.sim), ncol = 5)

    for (i in 1:length(dat.sim)) {
        x[i, 1:2] <- alfa(dat.sim[[i]][, 1:Par$Sest])
        x[i, 3:4] <- simpson(dat.sim[[i]][, 1:Par$Sest])
        x[i, 5] <- MVD(dat.sim[[i]][, 1:Par$Sest], transformation = transformation,
                       method = method)
    }
    parameters[2, 1:4] <- apply(x[, 1:4], 2, mean)
    parameters[2, 5:6] <- range(x[, 5])

    #End of estimations
    return(parameters)
}
