% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/BayesSUR.R
\docType{package}
\name{BayesSUR}
\alias{BayesSUR}
\alias{BayesSUR-package}
\title{Fitting BayesSUR models}
\usage{
BayesSUR(
  data = NULL,
  Y,
  X,
  X_0 = NULL,
  covariancePrior = "HIW",
  gammaPrior = "hotspot",
  betaPrior = "independent",
  nIter = 10000,
  burnin = 5000,
  nChains = 2,
  outFilePath = "",
  gammaSampler = "bandit",
  gammaInit = "R",
  mrfG = NULL,
  standardize = TRUE,
  standardize.response = TRUE,
  maxThreads = 1,
  output_gamma = TRUE,
  output_beta = TRUE,
  output_Gy = TRUE,
  output_sigmaRho = TRUE,
  output_pi = TRUE,
  output_tail = TRUE,
  output_model_size = TRUE,
  output_model_visit = FALSE,
  output_CPO = FALSE,
  output_Y = TRUE,
  output_X = TRUE,
  hyperpar = list(),
  tmpFolder = "tmp/"
)
}
\arguments{
\item{data}{a numeric matrix with variables on the columns and observations 
on the rows, if arguments \code{Y} and \code{X} (and possibly \code{X_0}) 
are vectors. Can be \code{NULL} if arguments \code{Y} and \code{X} (and 
possibly \code{X_0}) are numeric matrices}

\item{Y, X}{vectors of indices (with respect to the data matrix) for the 
outcomes (\code{Y}) and the predictors to select (\code{X}) respectively; 
if the \code{data} argument is \code{NULL}, these needs to be numeric 
matrices containing the data instead, with variables on the columns and 
observations on the rows}

\item{X_0}{vectors of indices (with respect to the data matrix) for the 
fixed predictors that are not selected, i.e. always included in the model; 
if the data argument is not provided, this needs to be a numeric matrix 
containing the data instead, with variables on the columns and observations 
on the rows}

\item{covariancePrior}{string indicating the prior for the covariance $C$; 
it has to be either \code{HIW} for the hyper-inverse-Wishar (which will 
result in a sparse covariance matrix), \code{IW} for the inverse-Wishart 
prior (dense covariance) or \code{IG} for independent inverse-Gamma on all 
the diagonal elements and 0 otherwise. See the details for the model 
specification}

\item{gammaPrior}{string indicating the gamma prior to use, either 
\code{hotspot} (default) for the Hotspot prior of Bottolo (2011), \code{MRF} 
for the Markov Random Field prior or \code{hierarchical} for a simpler 
hierarchical prior. See the details for the model specification}

\item{betaPrior}{string indicating the prior for regression coefficients; it 
has to be either \code{independent} for independent spike-and-slab priors 
(only slab part for \code{X_0} if specified), or \code{reGroup} for weakly 
normal priors for mandatory variables (random effects) and spike-and-slab 
priors for other variables of Zhao (2021b)}

\item{nIter}{number of iterations for the MCMC procedure. Default 10000}

\item{burnin}{number of iterations to discard at the start of the chain. 
Default is 5000}

\item{nChains}{number of parallel tempered chains to run (default 2). The 
temperature is adapted during the burnin phase}

\item{outFilePath}{path to where the output files are to be written}

\item{gammaSampler}{string indicating the type of sampler for gamma, either 
\code{bandit} for the Thompson sampling inspired samper or \code{MC3} for 
the usual MC^3 sampler.  See Russo et al.(2018) or Madigan and York (1995) 
for details}

\item{gammaInit}{gamma initialisation to either all-zeros (\code{0}), all 
ones (\code{1}), MLE-informed (\code{MLE}) or (default) randomly (\code{R})}

\item{mrfG}{either a matrix or a path to the file containing (the edge list 
of) the G matrix for the MRF prior on gamma (if necessary)}

\item{standardize}{logical flag for X variable standardization. Default is 
\code{standardize=TRUE}. Coefficients are returned on the standardized scale}

\item{standardize.response}{logical flag for Y standardization. Default is 
\code{standardize.response=TRUE}}

\item{maxThreads}{maximum threads used for parallelization. Default is 1. 
Reproducibility of results with \code{set.seed()} is only guaranteed if 
\code{maxThreads=1}}

\item{output_gamma}{allow (\code{TRUE}) or suppress (\code{FALSE}) the 
output for  gamma. See the return value below for more information}

\item{output_beta}{allow (\code{TRUE}) or suppress (\code{FALSE}) the output 
for beta. See the return value below for more information}

\item{output_Gy}{allow (\code{TRUE}) or suppress (\code{FALSE}) the output 
for Gy. See the return value below for more information}

\item{output_sigmaRho}{allow (\code{TRUE}) or suppress (\code{FALSE}) the 
output for sigmaRho. See the return value below for more information}

\item{output_pi}{allow (\code{TRUE}) or suppress (\code{FALSE}) the output 
for pi. See the return value below for more information}

\item{output_tail}{allow (\code{TRUE}) or suppress (\code{FALSE}) the output 
for tail (hotspot tail probability). See the return value below for more 
information}

\item{output_model_size}{allow (\code{TRUE}) or suppress (\code{FALSE}) the 
output for model_size. See the return value below for more information}

\item{output_model_visit}{allow (\code{TRUE}) or suppress (\code{FALSE}) the 
output for all visited models over the MCMC iterations. Default is 
\code{FALSE}. See the return value below for more information}

\item{output_CPO}{allow (\code{TRUE}) or suppress (\code{FALSE}) the output 
for (scaled) conditional predictive ordinates (\code{*_CPO_out.txt}),
CPO with joint posterior predictive of the response variables 
(\code{*_CPOsumy_out.txt}) and widely applicable information criterion 
(\code{*_WAIC_out.txt}). See the return value below for more information}

\item{output_Y}{allow (\code{TRUE}) or suppress (\code{FALSE}) the output 
for responses dataset Y}

\item{output_X}{allow (\code{TRUE}) or suppress (\code{FALSE}) the output 
for predictors dataset X}

\item{hyperpar}{a list of named hypeparameters to use instead of the default 
values. Valid names are mrf_d, mrf_e, a_sigma, b_sigma, a_tau, b_tau, nu, 
a_eta, b_eta, a_o, b_o, a_pi, b_pi, a_w and b_w. Their default values are 
a_w=2, b_w=5, a_omega=2, b_omega=1, a_o=2, b_o=p-2, a_pi=2, b_pi=1, nu=s+2, 
a_tau=0.1, b_tau=10, a_eta=0.1, b_eta=1, a_sigma=1, b_sigma=1, mrf_d=-3 and 
mrf_e=0.03. See the vignette for more information}

\item{tmpFolder}{the path to a temporary folder where intermediate data 
files are stored (will be erased at the end of the chain). It is specified 
relative to \code{outFilePath}}
}
\value{
An object of class \code{BayesSUR} is saved as 
\code{obj_BayesSUR.RData} in the output file, including the following 
components:
\itemize{
\item status - the running status
\item input - a list of all input parameters by the user
\item output - a list of the all output filenames:
\itemize{
\item "\code{*_logP_out.txt}" - contains each row for the \eqn{1000t}-th iteration's log-likelihoods of parameters, i.e., Tau, Eta, JunctionTree, SigmaRho, O, Pi, Gamma, W, Beta and data conditional log-likelihood depending on the models.
\item "\code{*_gamma_out.txt}" - posterior mean of the latent indicator matrix.
\item "\code{*_pi_out.txt}" - posterior mean of the predictor effects (prospensity) by decomposing the probability of the latent indicator.
\item "\code{*_hotspot_tail_p_out.txt}" - posterior mean of the hotspot tail probability. Only available for the hotspot prior on the gamma.
\item "\code{*_beta_out.txt}" - posterior mean of the coefficients matrix.
\item "\code{*_Gy_out.txt}" - posterior mean of the response graph. Only available for the HIW prior on the covariance.
\item "\code{*_sigmaRho_out.txt}" - posterior mean of the transformed parameters. Not available for the IG prior on the covariance.
\item "\code{*_model_size_out.txt}" - contains each row for the\eqn{1000t}-th iteration's model sizes of the multiple response variables.
\item "\code{*_model_visit_gy_out.txt}" - contains each row for the nonzero indices of the vectorized estimated graph matrix for each iteration.
\item "\code{*_model_visit_gamma_out.txt}" - contains each row for the nonzero indices of the vectorized estimated gamma matrix for each iteration.
\item "\code{*_CPO_out.txt}" - the (scaled) conditional predictive ordinates (CPO).
\item "\code{*_CPOsumy_out.txt}" - the (scaled) conditional predictive ordinates (CPO) with joint posterior predictive of the response variables.
\item "\code{*_WAIC_out.txt}" - the widely applicable information criterion (WAIC).
\item "\code{*_Y.txt}" - responses dataset.
\item "\code{*_X.txt}" - predictors dataset.
\item "\code{*_X0.txt}" - fixed predictors dataset.
}
\item call - the matched call.
}
}
\description{
Main function of the package. Fits a range of models introduced in the 
package vignette \code{BayesSUR.pdf}. Returns an object of S3 class 
\code{BayesSUR}. There are three options for the prior on the residual 
covariance matrix (i.e., independent inverse-Gamma, inverse-Wishart and 
hyper-inverse Wishart) and three options for the prior on the latent 
indicator variable (i.e., independent Bernoulli, hotspot and Markov random 
field). So there are nine models in total. See details for their combinations.
}
\details{
The arguments \code{covariancePrior} and \code{gammaPrior} specify 
the model HRR, dSUR or SSUR with different gamma prior. Let 
\eqn{\gamma_{jk}} be latent indicator variable of each coefficient and 
\eqn{C} be covariance matrix of response variables. The nine models 
specified through the arguments \code{covariancePrior} and 
\code{gammaPrior} are as follows.
\tabular{cccc}{
                \tab \eqn{\gamma_{jk}}~Bernoulli \tab \eqn{\gamma_{jk}}~hotspot \tab \eqn{\gamma}~MRF \cr
  \eqn{C}~indep \tab HRR-B                       \tab HRR-H                     \tab HRR-M           \cr
  \eqn{C}~IW   \tab dSUR-B                      \tab dSUR-H                    \tab dSUR-M          \cr
  \eqn{C}~HIW   \tab SSUR-B                      \tab SSUR-H                    \tab SSUR-M
}
}
\examples{
data("exampleEQTL", package = "BayesSUR")
hyperpar <- list(a_w = 2, b_w = 5)
set.seed(9173)
fit <- BayesSUR(
  Y = exampleEQTL[["blockList"]][[1]],
  X = exampleEQTL[["blockList"]][[2]],
  data = exampleEQTL[["data"]], outFilePath = tempdir(),
  nIter = 5, burnin = 0, nChains = 1, gammaPrior = "hotspot",
  hyperpar = hyperpar, tmpFolder = "tmp/", output_CPO = TRUE
)

## check output
# show the summary information
summary(fit)

# show the estimated beta, gamma and graph of responses Gy
plot(fit, estimator = c("beta", "gamma", "Gy"), type = "heatmap")

\dontrun{
## Set up temporary work directory for saving a pdf figure
# td <- tempdir()
# oldwd <- getwd()
# setwd(td)

## Produce authentic math formulas in the graph
# plot(fit, estimator = c("beta", "gamma", "Gy"), type = "heatmap", fig.tex = TRUE)
# system(paste(getOption("pdfviewer"), "ParamEstimator.pdf"))
# setwd(oldwd)
}

}
\references{
Russo D, Van Roy B, Kazerouni A, Osband I, Wen Z (2018). \emph{A tutorial on Thompson sampling.} Foundations and Trends in Machine Learning, 11: 1-96.

Madigan D, York J (1995). \emph{Bayesian graphical models for discrete data.} International Statistical Review, 63: 215–232.

Bottolo L, Banterle M, Richardson S, Ala-Korpela M, Jarvelin MR, Lewin A (2020). \emph{A computationally efficient Bayesian seemingly unrelated regressions model for high-dimensional quantitative trait loci discovery.} Journal of Royal Statistical Society: Series C, 70: 886-908.

Zhao Z, Banterle M, Bottolo L, Richardson S, Lewin A, Zucknick M (2021a). \emph{BayesSUR: An R package for high-dimensional multivariate Bayesian variable and covariance selection in linear regression.} Journal of Statistical Software, 100: 1–32.

Zhao Z, Banterle M, Lewin A, Zucknick M (2021b). \emph{Structured Bayesian variable selection for multiple correlated response variables and high-dimensional predictors.} arXiv:2101.05899.
}
