% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/gpboost.R
\name{gpb_shared_params}
\alias{gpb_shared_params}
\title{Shared parameter docs}
\arguments{
\item{callbacks}{List of callback functions that are applied at each iteration.}

\item{data}{a \code{gpb.Dataset} object, used for training. Some functions, such as \code{\link{gpb.cv}},
may allow you to pass other types of data like \code{matrix} and then separately supply
\code{label} as a keyword argument.}

\item{early_stopping_rounds}{int. Activates early stopping. Requires at least one validation data
and one metric. When this parameter is non-null,
training will stop if the evaluation of any metric on any validation set
fails to improve for \code{early_stopping_rounds} consecutive boosting rounds.
If training stops early, the returned model will have attribute \code{best_iter}
set to the iteration number of the best iteration.}

\item{eval}{evaluation function(s). This can be a character vector, function, or list with a mixture of
            strings and functions.

            \itemize{
                \item{\bold{a. character vector}:
                    If you provide a character vector to this argument, it should contain strings with valid
                    evaluation metrics.
                    See \href{https://github.com/fabsig/GPBoost/blob/master/docs/Parameters.rst#metric-parameters}{
                    the "metric" section of the parameter documentation}
                    for a list of valid metrics.
                }
                \item{\bold{b. function}:
                     You can provide a custom evaluation function. This
                     should accept the keyword arguments \code{preds} and \code{dtrain} and should return a named
                     list with three elements:
                     \itemize{
                         \item{\code{name}: A string with the name of the metric, used for printing
                             and storing results.
                         }
                         \item{\code{value}: A single number indicating the value of the metric for the
                             given predictions and true values
                         }
                         \item{
                             \code{higher_better}: A boolean indicating whether higher values indicate a better fit.
                             For example, this would be \code{FALSE} for metrics like MAE or RMSE.
                         }
                     }
                }
                \item{\bold{c. list}:
                    If a list is given, it should only contain character vectors and functions.
                    These should follow the requirements from the descriptions above.
                }
            }}

\item{eval_freq}{evaluation output frequency, only effect when verbose > 0}

\item{valids}{a list of \code{gpb.Dataset} objects, used for validation}

\item{record}{Boolean, TRUE will record iteration message to \code{booster$record_evals}}

\item{colnames}{feature names, if not null, will use this to overwrite the names in dataset}

\item{categorical_feature}{categorical features. This can either be a character vector of feature
names or an integer vector with the indices of the features (e.g.
\code{c(1L, 10L)} to say "the first and tenth columns").}

\item{init_model}{path of model file of \code{gpb.Booster} object, will continue training from this model}

\item{nrounds}{number of boosting iterations (= number of trees). This is the most important tuning parameter for boosting}

\item{obj}{(character) The distribution of the response variable (=label) conditional on fixed and random effects.
This only needs to be set when doing independent boosting without random effects / Gaussian processes.}

\item{params}{list of "tuning" parameters. 
See \href{https://github.com/fabsig/GPBoost/blob/master/docs/Parameters.rst}{the parameter documentation} for more information. 
A few key parameters:
           \itemize{
               \item{\code{learning_rate}: The learning rate, also called shrinkage or damping parameter 
               (default = 0.1). An important tuning parameter for boosting. Lower values usually 
               lead to higher predictive accuracy but more boosting iterations are needed }
               \item{\code{num_leaves}: Number of leaves in a tree. Tuning parameter for 
               tree-boosting (default = 31)}
               \item{\code{max_depth}: Maximal depth of a tree. Tuning parameter for tree-boosting (default = no limit)}
               \item{\code{min_data_in_leaf}: Minimal number of samples per leaf. Tuning parameter for 
               tree-boosting (default = 20)}
               \item{\code{lambda_l2}: L2 regularization (default = 0)}
               \item{\code{lambda_l1}: L1 regularization (default = 0)}
               \item{\code{max_bin}: Maximal number of bins that feature values will be bucketed in (default = 255)}
               \item{\code{reuse_learning_rates_gp_model}: If TRUE, the learning rates for the covariance and potential 
               auxiliary parameters are kept at the values from the previous boosting iteration and 
               not re-initialized when optimizing them. Applies only to Gaussian process boosting (GPBoost algorithm) (default = FALSE)}
               \item{\code{train_gp_model_cov_pars}: If TRUE, the covariance parameters of the Gaussian process 
               are stimated in every boosting iterations, 
               otherwise the gp_model parameters are not estimated. In the latter case, you need to 
               either esimate them beforehand or provide the values via 
               the 'init_cov_pars' parameter when creating the gp_model (default = TRUE).}
               \item{\code{use_gp_model_for_validation}: If TRUE, the Gaussian process is also used 
               (in addition to the tree model) for calculating predictions on the validation data 
               (default = TRUE)}
               \item{\code{leaves_newton_update}: Set this to TRUE to do a Newton update step for the tree leaves 
               after the gradient step. Applies only to Gaussian process boosting (GPBoost algorithm) (default = FALSE)}
               \item{num_threads: Number of threads. For the best speed, set this to
                            the number of real CPU cores(\code{parallel::detectCores(logical = FALSE)}),
                            not the number of threads (most CPU using hyper-threading to generate 2 threads
                            per CPU core).}
           }}

\item{verbose}{verbosity for output, if <= 0, also will disable the print of evaluation during training}

\item{gp_model}{A \code{GPModel} object that contains the random effects (Gaussian process and / or grouped random effects) model}

\item{use_gp_model_for_validation}{Boolean. If TRUE, the \code{gp_model} 
(Gaussian process and/or random effects) is also used (in addition to the tree model) for calculating 
predictions on the validation data. If FALSE, the \code{gp_model} (random effects part) is ignored 
for making predictions and only the tree ensemble is used for making predictions for calculating the validation / test error.}

\item{train_gp_model_cov_pars}{Boolean. If TRUE, the covariance parameters 
of the \code{gp_model} (Gaussian process and/or random effects) are estimated in every 
boosting iterations, otherwise the \code{gp_model} parameters are not estimated. 
In the latter case, you need to either estimate them beforehand or provide the values via 
the \code{init_cov_pars} parameter when creating the \code{gp_model}}

\item{reuse_learning_rates_gp_model}{Boolean. If TRUE, the learning rates for the covariance and potential 
auxiliary parameters are kept at the values from the previous boosting iteration and 
not re-initialized when optimizing them. Applies only to Gaussian process boosting (GPBoost algorithm)}
}
\description{
Parameter docs shared by \code{gpb.train}, \code{gpb.cv}, and \code{gpboost}
}
\section{Early Stopping}{


         "early stopping" refers to stopping the training process if the model's performance on a given
         validation set does not improve for several consecutive iterations.

         If multiple arguments are given to \code{eval}, their order will be preserved. If you enable
         early stopping by setting \code{early_stopping_rounds} in \code{params}, by default all
         metrics will be considered for early stopping.

         If you want to only consider the first metric for early stopping, pass
         \code{first_metric_only = TRUE} in \code{params}. Note that if you also specify \code{metric}
         in \code{params}, that metric will be considered the "first" one. If you omit \code{metric},
         a default metric will be used based on your choice for the parameter \code{obj} (keyword argument)
         or \code{objective} (passed into \code{params}).
}

\keyword{internal}
