#' @title Out of Range Imputation
#'
#' @usage NULL
#' @name mlr_pipeops_imputeoor
#' @format [`R6Class`][R6::R6Class] object inheriting from [`PipeOpImpute`]/[`PipeOp`].
#'
#' @description
#' Impute factorial features by adding a new level `".MISSING"`.
#'
#' Impute numeric, integer, POSIXct or Date features by constant values shifted below the minimum or above the maximum by
#' using \eqn{min(x) - offset - multiplier * diff(range(x))} or
#' \eqn{max(x) + offset + multiplier * diff(range(x))}.
#'
#' This type of imputation is especially sensible in the context of tree-based methods, see also
#' Ding & Simonoff (2010).
#'
#' [`Learner`][mlr3::Learner]s expect input [`Task`][mlr3::Task]s to have the same `factor` (or `ordered`) levels during
#' training as well as prediction. This `PipeOp` modifies the levels of `factor` and `ordered` features,
#' and since it may occur that a `factor` or `ordered` feature contains missing values only during prediction, but not
#' during training, the output `Task` could also have different levels during the two stages.
#'
#' To avoid problems with the `Learner`s' expectation, controlling the `PipeOp`s' handling of this edge-case is necessary.
#' For this, use the `create_empty_level` hyperparameter inherited from [`PipeOpImpute`].\cr
#' If `create_empty_level` is set to `TRUE`, then an unseen level `".MISSING"` is added to the feature during
#' training and missing values are imputed as `".MISSING"` during prediction.
#' However, empty factor levels during training can be a problem for many [`Learners`][mlr3::Learner].\cr
#' If `create_empty_level` is set to `FALSE`, then no empty level is introduced during training, but columns that
#' have missing values only during prediction will *not* be imputed. This is why it may still be necessary to use
#' [`po("imputesample", affect_columns = selector_type(types = c("factor", "ordered")))`][mlr_pipeops_imputesample]
#' (or another imputation method) after this imputation method.
#' Note that setting `create_empty_level` to `FALSE` is the same as setting it to `TRUE` and using [`PipeOpFixFactors`]
#' after this `PipeOp`.
#'
#' @section Construction:
#' ```
#' PipeOpImputeOOR$new(id = "imputeoor", param_vals = list())
#' ```
#'
#' * `id` :: `character(1)`\cr
#'   Identifier of resulting object, default `"imputeoor"`.
#' * `param_vals` :: named `list`\cr
#'   List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise be set during construction. Default `list()`.
#'
#' @section Input and Output Channels:
#' Input and output channels are inherited from [`PipeOpImpute`].
#'
#' The output is the input [`Task`][mlr3::Task] with all affected features having missing values imputed as described above.
#'
#' @section State:
#' The `$state` is a named `list` with the `$state` elements inherited from [`PipeOpImpute`].
#'
#' The `$state$model` contains either `".MISSING"` used for `character` and `factor` (also
#' `ordered`) features or `numeric(1)` indicating the constant value used for imputation of
#' `integer`, `numeric`, `POSIXct` or `Date` features.
#'
#' @section Parameters:
#' The parameters are the parameters inherited from [`PipeOpImpute`], as well as:
#' * `min` :: `logical(1)` \cr
#'   Should `integer` and `numeric` features be shifted below the minimum? Initialized to `TRUE`. If `FALSE`
#'   they are shifted above the maximum. See also the description above.
#' * `offset` :: `numeric(1)` \cr
#'   Numerical non-negative offset as used in the description above for `integer`, `numeric`, `POSIXCT` and `Date`.
#'   features. Initialized to `1`.
#' * `multiplier` :: `numeric(1)` \cr
#'   Numerical non-negative multiplier as used in the description above for `integer`, `numeric`, `POSIXct` and `Date`.
#'   features. Initialized to `1`.
#'
#' @section Internals:
#' Adds an explicit new `level()` to `factor` and `ordered` features, but not to `character` features.
#' For `integer` and `numeric` features uses the `min`, `max`, `diff` and `range` functions.
#' `integer` and `numeric` features that are entirely `NA` are imputed as `0`. `factor` and `ordered` features that are
#' entirely `NA` are imputed as `".MISSING"`. For `POSIXct` and `Date` features the value `0` is transformed into the respective data type.
#'
#' @section Fields:
#' Only fields inherited from [`PipeOp`].
#'
#' @section Methods:
#' Only methods inherited from [`PipeOpImpute`]/[`PipeOp`].
#'
#' @references
#' `r format_bib("ding_2010")`
#'
#' @examples
#' library("mlr3")
#' set.seed(2409)
#' data = tsk("pima")$data()
#' data$y = factor(c(NA, sample(letters, size = 766, replace = TRUE), NA))
#' data$z = ordered(c(NA, sample(1:10, size = 767, replace = TRUE)))
#' task = TaskClassif$new("task", backend = data, target = "diabetes")
#' task$missings()
#' po = po("imputeoor")
#' new_task = po$train(list(task = task))[[1]]
#' new_task$missings()
#' new_task$data()
#'
#' # recommended use when missing values are expected during prediction on
#' # factor columns that had no missing values during training
#' gr = po("imputeoor", create_empty_level = FALSE) %>>%
#'   po("imputesample", affect_columns = selector_type(types = c("factor", "ordered")))
#' t1 = as_task_classif(data.frame(l = as.ordered(letters[1:3]), t = letters[1:3]), target = "t")
#' t2 = as_task_classif(data.frame(l = as.ordered(c("a", NA, NA)), t = letters[1:3]), target = "t")
#' gr$train(t1)[[1]]$data()
#'
#' # missing values during prediction are sampled randomly
#' gr$predict(t2)[[1]]$data()
#' @family PipeOps
#' @family Imputation PipeOps
#' @template seealso_pipeopslist
#' @include PipeOpImpute.R
#' @export
PipeOpImputeOOR = R6Class("PipeOpImputeOOR",
  inherit = PipeOpImpute,
  public = list(
    initialize = function(id = "imputeoor", param_vals = list()) {
      ps = ps(
        min = p_lgl(init = TRUE, tags = c("train", "predict")),
        offset = p_dbl(init = 1, lower = 0, tags = c("train", "predict")),
        multiplier = p_dbl(init = 1, lower = 0, tags = c("train", "predict"))
      )
      # this is one of the few imputers that handles 'character' features!
      super$initialize(id, param_set = ps, param_vals = param_vals, empty_level_control = "param",
        feature_types = c("character", "factor", "integer", "numeric", "ordered", "POSIXct", "Date"))
    }
  ),
  private = list(
    .train_imputer = function(feature, type, context) {
      if (type %in% c("factor", "ordered", "character")) {
        return(".MISSING")  # early exit
      }
      featrange = as.numeric(range(feature, na.rm = TRUE))  # as.numeric to avoid integer overflow
      offset = self$param_set$values$offset + self$param_set$values$multiplier * (featrange[[2L]] - featrange[[1L]])
      oor = if (self$param_set$values$min) {
        featrange[[1L]] - offset
      } else {
        featrange[[2L]] + offset
      }

      if (type == "integer") {
        oor = round(oor)
        # make sure we get an integer. this is faster than pmin(pmax(...)).
        oor[oor > .Machine$integer.max] = .Machine$integer.max
        oor[oor < -.Machine$integer.max] = -.Machine$integer.max
        oor = as.integer(oor)
      }

      oor
    },

    .train_nullmodel = function(feature, type, context) {
      switch(type,
        factor = ".MISSING",
        integer = 0L,
        logical = c(TRUE, FALSE),
        numeric = 0,
        ordered = ".MISSING",
        character = "",
        POSIXct = as.POSIXct(0),
        Date = as.Date(0)
      )
    }
  )
)

mlr_pipeops$add("imputeoor", PipeOpImputeOOR)

