#' Filter National Forest Inventory Data 
#' 
#' @description
#' The `filter_nfi()` function provides hierarchical and non-hierarchical filtering approaches for the complex structure of National Forest Inventory data based on user-provided condition expressions (`expr_texts`).
#' This function enables effective filtering while maintaining the relationship between plot data (parent data) and other data (child data).
#' 
#' @details
#' This function parses expressions targeting specific columns in the dataframes within the provided list.
#' 
#' Expression requirements:
#' - Each expression in `expr_texts` must start with a valid dataframe name in the list (e.g., "plot", "tree", "cwd") and combine the dataframe name, dollar sign, and condition (e.g. c("plot$OWN_CD == '5'").
#' - Separate expressions must be provided for each dataframe being filtered (e.g. c("plot$OWN_CD == '5'", "tree$FAMILY == 'Pinaceae' | tree$WDY_PLNTS_TYP_CD == '1'").
#'
#' Hierarchical filtering (`hier = TRUE`):
#' - Filters applied to plot table affect all connected child data (tree, CWD, stump, etc.).
#' - Filters applied to child data only operate within that dataframe and do not affect other dataframes.
#' - Example: If only coniferous forest subplots are selected in the plot table, the child data will retain only the tree, CWD, stump, etc., associated with those subplots.
#'
#' Non-hierarchical filtering (`hier = FALSE`):
#' - Filters applied to the parent dataframe (plot table) do not affect the child data.
#' - Filtering results from child data affect all other parent and child data.
#' - Example: If only certain species are selected in the tree table, the plot table, CWD table, stump table, etc., will be filtered based on the remaining subplots from this selection.
#'
#' @param data : A `list` generated by \code{\link{read_nfi}}. Each dataframe should have a 'SUB_PLOT' column.
#' @param expr_texts : @param expr_texts : A character vector; expressions specifying filtering conditions. Each expression should combine the dataframe name, dollar sign, and condition, with separate expressions for each data frame. (e.g., c("plot$OWN_CD == '5'", "tree$FAMILY == 'Pinaceae'"). Conditions must be valid R expressions.
#' @param hier : A logical flag (default TRUE); indicates whether to apply hierarchical filtering (`TRUE`) or non-hierarchical filtering (`FALSE`). Hierarchical filtering ensures that connected dataframes are filtered based on the results of filters applied to the parent frame.
#' 
#' @return A `list` of dataframes.
#' 
#' @examples
#' 
#' data("nfi_donghae")
#' 
#' # Applying hierarchical filtering to select only privately owned forest subplots.
#' # Ensures all child tables' subplots match the filtered plot table's subplots.
#' # Expected results after filtering:
#' # all(nfi_donghae$tree$SUB_PLOT %in% nfi_donghae$plot$SUB_PLOT)  result: TRUE
#  # all(nfi_donghae$plot$SUB_PLOT %in% nfi_donghae$tree$SUB_PLOT)  result: FALSE
#' nfi_donghae <- filter_nfi(nfi_donghae, c("plot$OWN_CD == '5'"), hier = TRUE)
#' 
#' \donttest{ 
#' # Non-hierarchical filtering to select only privately owned forest subplots.
#' # Child tables remain unfiltered and may not correspond to the plot table subplots.
#' # Expected results after filtering:
#' # all(nfi_donghae$tree$SUB_PLOT %in% nfi_donghae$plot$SUB_PLOT)  result: FALSE
#' nfi_donghae <- filter_nfi(nfi_donghae, c("plot$OWN_CD == '5'"), hier = FALSE)
#' 
#' # Non-Hierarchical Filtering with only woody plants.
#' # Other tables remain filtered and correspond to the tree table.
#' # Expected results after filtering:
#' # all(nfi_donghae$plot$SUB_PLOT %in% nfi_donghae$tree$SUB_PLOT)  result: TRUE
#' nfi_donghae <- filter_nfi(nfi_donghae, c("tree$WDY_PLNTS_TYP_CD == '1'"), hier = FALSE)
#' 
#' # Combining multiple filters across different dataframes
#' nfi_donghae <- filter_nfi(nfi_donghae, 
#'                     c("plot$OWN_CD == '5'", 
#'                     "tree$FAMILY == 'Pinaceae' | tree$WDY_PLNTS_TYP_CD == '1'"))
#' }
#' 
#' @export
# 

filter_nfi <- function(data, expr_texts, hier=TRUE){
  
  variable_names <- stringr::str_extract_all(expr_texts, "\\b\\w+\\b") %>% 
    unlist() %>% 
    unique()
  
  env_c <- rlang::caller_env(n=1)
  force(env_c)
  
  values <- lapply(variable_names, function(var_name) {
    if (exists(var_name, envir = env_c)) {
      var <- get(var_name, envir = env_c)
    } else {
      var <- NULL
    }
    
    if (!is.function(var)){
      var <- var
    } else {
      var <- NULL
    }
    return(var)
  })

  named_values <- purrr::set_names(values, variable_names)
  named_values <- Filter(function(x) !(is.null(x) || all(sapply(x, is.null))), named_values)
  env <- rlang::env(!!!named_values, data=data, environment())

  data_names <- paste(names(data), collapse = ", ")

  ## Preprocessing--------------------------------------------------------------
  # Iterate over each expression
  for(expr_text in expr_texts) {
    extracted_df <- character()
    matched_texts <- regmatches(expr_text, gregexpr("\\b\\w+\\$\\w+\\b", expr_text, perl = TRUE))[[1]]

    for(matched_text  in matched_texts) {
      df_name <- sub("(\\w+)\\$(\\w+)", "\\1", matched_text)
      col_name <- sub("(\\w+)\\$(\\w+)", "\\2", matched_text)

      if(exists(df_name, where = data) && col_name %in% names(data[[df_name]])) {
        extracted_df <- df_name
        break
      }else{
        ## error message--------------------------------------------------------------
        stop(paste0("Each value in param 'expr_texts' must start with ", data_names ,
                    ". and param 'expr_texts' requires separate expressions for each item in ", deparse(substitute(data)),
                    ". For example: c('plot$OWN_CD == \"5\"', 'tree$FAMILY == \"Pinaceae\" | tree$WDY_PLNTS_TYP_CD == \"1\"')"))
      }
    }


    modified_text <- gsub(paste0("(?<!\\$)", extracted_df, "\\$"), "", expr_text, perl = TRUE)
    modified_expression <- rlang::parse_exprs(modified_text)[[1]]

    # Apply conditions to the 'plot' dataframe
    if(extracted_df == "plot"){

      # Hierarchical filtering
      if(hier){

        filter_plot <- data$plot %>%
          filter(!!rlang::eval_tidy(modified_expression, data = data$plot, env = env))

        plot_all <- filter_plot[, c("SUB_PLOT", "CYCLE")]

        results <- lapply(data[-1], function(df) {
          df_filtered <- semi_join(df, plot_all, by=c("SUB_PLOT", "CYCLE"))
          return(df_filtered)
        })

        data <- c(list(plot = filter_plot), results)

      }else{
        # Non-hierarchical filtering
        data$plot <- data$plot %>%
          filter(!!rlang::eval_tidy(modified_expression, data = data$plot, env = env))
      }

    }else{
      # Apply conditions to other dataframe
      if(hier){

        # Hierarchical filtering
        data[[extracted_df]] <- data[[extracted_df]] %>%
          filter(!!rlang::eval_tidy(modified_expression, data = data[[extracted_df]], env = env))

      }else{
        # Non-hierarchical filtering
        filter_plot <- data[[extracted_df]] %>%
          filter(!!rlang::eval_tidy(modified_expression, data = data[[extracted_df]], env = env))


        plot_all <- filter_plot[, c("SUB_PLOT", "CYCLE")]

        results <- lapply(data, function(df) {
          df_filtered <- semi_join(df, plot_all, by=c("SUB_PLOT", "CYCLE"))
          return(df_filtered)
        })

        results[[extracted_df]] <- results[[extracted_df]] %>%
          filter(!!rlang::eval_tidy(modified_expression, data = data[[extracted_df]], env = env))

        data <- results

      }
    }

  }
    
  # Return the filtered data
  return(data)
}
