#' @importFrom dplyr rename
#' 
#' @title get_groups
#' 
#' @author Johannes De Groeve
#' @description get groups of interconnected shapes
#'
#' @param filename path where files or qs2/rds was exported, reconstruct object of class tabs or recvect object 
#' @param col character varying, which column to use to identify groups
#' @param unnest boolean, unnest groups to list all shape names and ids in a column
#'
#' @return list with groups of interconnected shapes 
#' 
#' 
#' @export
#' 
#' @examples
#' # load data samples
#' sporades <- sporades()
#' topo <- sporades$topo
#' labs <- sporades$labs
#' correction <- sporades$correction
#' curve <- sporades$curve
#' 
#' # subset first and last period 
#' curve <- curve[[c(1,dim(curve)[3])]]
#' 
#' # reconstruct
#' rec <- reconstruct(topo=topo,
#'                    region=labs,
#'                    curve=curve,
#'                    correction=correction,
#'                    reclabs='name'
#'                    )
#' groups <- get_groups(rec,unnest=FALSE)
#' ids <- get_groups(rec,unnest=TRUE)
#' 
get_groups <- function(filename,
                       unnest=TRUE,
                       col=c('recnames','refnames')){
  
  col <- match.arg(col)
  
  if(is.character(filename)){ # if character, calculate area from an already exported object 
    reconstruction <- import(filename)
    recvect <- reconstruction$recvect
  } else {
    if(inherits(filename,'SpatVectorCollection')){
      recvect <- filename
    } else {
      # if reconstruct
      recvect <- filename$recvect  
    }
  }
  
  recvect <- vect(recvect)
  
  
  # select maximum period
  (periodmaxarea <- recvect %>% 
      as.data.frame() %>%
      dplyr::group_by(.data$period) %>% 
      dplyr::summarize(area=sum(.data$area,na.rm=TRUE)) %>% 
      dplyr::slice_max(order_by=.data$area) %>% 
      dplyr::select("period") %>% 
      unlist() %>% 
      as.vector())
  
  # subset the shapes at the period for which the area was the largest 
  groups <- recvect[which(recvect$period == periodmaxarea),c('recname','recid',col)]
  groups <- as_tibble(groups)
  
  # extract labels/names from recnames JSON variable

  groups$recids <- lapply(as.vector(unlist(groups[,col])), function(x) jsonlite::fromJSON(x)$id)
  groups$recnames <- lapply(as.vector(unlist(groups[,col])), function(x) jsonlite::fromJSON(x)$name)
  groups <- groups %>% 
    dplyr::rename(group='recname',
                  group_id='recid',
                  recid='recids',
                  recname='recnames')
  
  # unnest groups (islands within each group)
  if(unnest){
  groups <- data.frame(group = rep(groups$group, lengths(groups$recname)),
                    group_id = rep(groups$group_id, lengths(groups$recname)),
                    recname = unlist(groups$recname),
                    recid = unlist(groups$recid)) %>%
    dplyr::group_by(.data$group) %>%
    dplyr::arrange(dplyr::desc(.data$recname == .data$group), .by_group = TRUE) %>%
    dplyr::ungroup() 
  
  }
  return(groups)
}

#' @title regroup
#' 
#' @author Johannes De Groeve
#' @description regroup shapes by recname identifier or by interconnected cluster (group)
#' 
#' @param filename path where files or qs2/rds was exported, reconstruct object of class tabs or recvect object
#' @param by character varying, restructure by interconnected cluster ('group'), or by recname ('recname')
#' @param extant boolean, only currently existing shapes (TRUE) or all shapes (FALSE) are returned
#' @param group boolean, if TRUE, all shapes of the group will be returned.
#' @param recname character varying, by specifying a recname of interest, only shapes are returned for its group or recname
#' 
#' @return list of regrouped shapes
#' 
#' @export
#' 
#' @examples
#' #' # load data samples
#' sporades <- sporades()
#' topo <- sporades$topo
#' labs <- sporades$labs
#' correction <- sporades$correction
#' curve <- sporades$curve
#' 
#' # subset first and last period 
#' curve <- curve[[seq(1,dim(curve)[3],4)]]
#' 
#' # reconstruct
#' rec <- reconstruct(topo=topo,
#'                    region=labs,
#'                    curve=curve,
#'                    correction=correction,
#'                    reclabs='name'
#'                    )
#'                    
#' # regroup by group and return all shapes within group
#' recv <- regroup(filename=rec,
#'                 by='group',
#'                 group=TRUE,
#'                 extant=FALSE)
#' # regroup by group and only return main group shape
#' recv <- regroup(filename=rec,
#'                 by='group',
#'                 group=FALSE,
#'                 extant=FALSE)
#' # regroup by recname and return all shapes within group
#' recv <- regroup(filename=rec,
#'                 by='recname',
#'                 group=TRUE,
#'                 extant=FALSE)
#' # regroup by group and only return main group shapes
#' recv <- regroup(filename=rec,
#'                 by='recname',
#'                 group=FALSE,
#'                 extant=FALSE)
#' # regroup by recname and return all shapes within group
#' recv <- regroup(filename=rec,
#'                 by='recname',
#'                 recname='Nisída Ágios Geórgios',
#'                 group=TRUE,
#'                 extant=FALSE)
#' # regroup by group and only return main group shapes
#' recv <- regroup(filename=rec,
#'                 by='recname',
#'                 recname='Nisída Ágios Geórgios',
#'                 group=FALSE,
#'                 extant=FALSE)
#' 
#' # only extant (present day existing shapes)
#' 
#' # regroup by group and return all shapes within group
#' recv <- regroup(filename=rec,
#'                 by='group',
#'                 group=TRUE,
#'                 extant=TRUE)
#' # regroup by group and only return main group shapes
#' recv <- regroup(filename=rec,
#'                 by='group',
#'                 group=FALSE,
#'                 extant=TRUE)
#' # regroup by recname and return all shapes within group
#' recv <- regroup(filename=rec,
#'                 by='recname',
#'                 group=TRUE,
#'                 extant=TRUE)
#' # regroup by group and only return main group shapes
#' recv <- regroup(filename=rec,
#'                 by='recname',
#'                 group=FALSE,
#'                 extant=TRUE)
#' # regroup by recname and return all shapes within group
#' recv <- regroup(filename=rec,
#'                 by='recname',
#'                 recname='Nisída Ágios Geórgios',
#'                 group=TRUE,
#'                 extant=TRUE)
#' # regroup by group and only return main group shapes
#' recv <- regroup(filename=rec,
#'                 by='recname',
#'                 recname='Nisída Ágios Geórgios',
#'                 group=FALSE,
#'                 extant=TRUE)
#'                 
regroup <- function(filename,
                    by=c('group','recname'), # restructure by group or by recname 
                    extant=FALSE, # only present shapes are returned
                    group=TRUE,  # if TRUE, all shapes part of the defined group in by are returned
                    recname=NULL){
  
  by <- match.arg(by)
  
  if(is.character(filename)){ # if character, calculate area from an already exported object 
    reconstruction <- import(filename)
    recvect <- reconstruction$recvect
  } else {
    if(inherits(filename,'SpatVectorCollection')){
      recvect <- filename
    } else {
      # if reconstruct
      recvect <- filename$recvect  
    }
  }
  
  recvect <- vect(recvect)
  
  # get groups 
  groups <- get_groups(filename,unnest=FALSE,col='recnames')
  ids <- get_groups(filename,unnest=TRUE,col='recnames')
  
  
  # shapes over time for main groups
  if(!group){ # only the shapes part of the defined group in by are returned
    
    # Only shapes for the defined group in `by`
    key_values <- if (by == "group") groups$group else ids$recid
    recvect_groups <- lapply(key_values, function(x) {
      recvect[grepl(x, recvect$recnames), ]
    })
    names(recvect_groups) <- if (by == "group") groups$group else ids$recname
    # if(by=='group'){ # restructures shapes per group 
    #   recvect_groups <- lapply(groups$group, function(x)
    #   recvect[grepl(x,recvect$recnames),])
    # names(recvect_groups) <- groups$group
    # }
    # if(by=='recname'){ # restructures shapes per shape 
    #   recvect_groups <- lapply(ids$recid, function(x)
    #   recvect[grepl(x,recvect$recnames),])
    # names(recvect_groups) <- ids$recname
    # }
  } else { # returns all shapes part of a group (any shape that was interconnected)
    if(by=='group'){
      # shapes over time for all shapes within main groups 
      recvect_groups <- lapply(groups$group_id,function(x) {
        group <- ids[which(ids$group_id==x),]
        set <- recvect[which(recvect$recid %in% group$recid),]
        return(set)
      })
      names(recvect_groups) <- groups$group
    } else { # by == "recname" restructures shapes per shape 
      # shapes over time for all shapes within main groups 
      recvect_groups <- lapply(ids$recid,function(x) {
        group <- ids[which(ids$recid==x),]
        group <- ids[which(ids$group_id==group$group_id),]
        set <- recvect[which(recvect$recid %in% group$recid),]
        return(set)
      })
      names(recvect_groups) <- ids$recname
    }
  }
  
  if(!is.null(recname)){ # subset the recnames of interest
    recvect_groups <- recvect_groups[names(recvect_groups) %in% recname]
  }
  
  # Keep list structure
  if(!is.list(recvect_groups)){
    recvect_groups <- list(recvect_groups)
  }

  if(extant){ # subset present day polygons only 
    recvect_groups <- lapply(recvect_groups, function(x) { 
      #x[which(x$recname %in% unique(x[x$period == 'AP0000000',]$recname)),]
      x[!grepl("^S-[A-Z]{2}[0-9]{7}-[0-9]+$",x$recname),]
    })
  }
  
  # only keep groups that have records
  recvect_groups <- recvect_groups[sapply(recvect_groups, nrow) != 0]
  
  # return as SpatVectorCollection
  return(svc(recvect_groups))
}


 