% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/PAM.hm.R
\name{PAM.hm}
\alias{PAM.hm}
\title{Main function to produce a heatmap using PAM clustering.}
\usage{
PAM.hm(
  x,
  project.folder = ".",
  nsheets = 1,
  dec = ".",
  header = TRUE,
  symbolcol = 1,
  sample.names = NULL,
  cluster.number = 4,
  trim = NULL,
  winsorize.mat = TRUE,
  cols = "BlueWhiteRed",
  dendrograms = "Both",
  autoadj = TRUE,
  pdf.height = 10,
  pdf.width = 10,
  labelheight = 0.25,
  labelwidth = 0.2,
  r.cex = 0.5,
  c.cex = 1,
  medianCenter = NULL,
  log = FALSE,
  do.log = FALSE,
  log.base = 2,
  metric = "manhattan",
  na.strings = "NA",
  makeFolder = TRUE,
  do.pdf = FALSE,
  do.png = FALSE,
  save.objects = FALSE
)
}
\arguments{
\item{x}{(\code{character}, \code{data.frame}, \code{numeric}). The name(s) of the input files(s) (character vector)
or a data object such as a \code{data.frame} or \code{numeric matrix}. See 'Details'.}

\item{project.folder}{(\code{character}). Name of the root folder inside which the results will be created if any files are to be saved. See 'Details'.}

\item{nsheets}{(\code{integer}). Number of sheets to be read if file is of type ".xls" or ".xlsx". All sheets starting from 1 up to the
given number in the respective data file will be read. If more than one file is read this must be be an integer vector with the
numbers of sheets in exactly the same order as the files.}

\item{dec}{(\code{character}). The decimal separator for numbers.}

\item{header}{(\code{logical}). Does the input file have a header row?}

\item{symbolcol}{(\code{character}). The name of the column with identifiers used as labels.}

\item{sample.names}{(\code{character}). A vector of names used for plot titles and output files.}

\item{cluster.number}{(\code{character} or \code{integer}). A vector of numbers used for PAM clustering (corresponds to argument
\code{k} in \code{\link[cluster]{pam}}). If a character vector, this is broken down to a numeric vector accepting
comma-separated strings in the form of, e.g, "4" and "2-5". The clustering algorithm then iterates through all given numbers.
See 'Details'.}

\item{trim}{(\code{numeric}). Value to "cut off" data distribution. Values at both ends of the distribution,
larger or smaller, respectively, will be made equal to \code{+/-trim}, i.e., data will be symmetrical around 0.
\code{NULL} means no trimming which is the default. If \code{trim} is \code{-1} (or any negative value) and
\code{winsorize.mat} is \code{TRUE} the matrix will be \emph{winsorized} and then the smaller of the two largest
absolute values at both ends of the distribution rounded to three digits will be used. If \code{winsorize.mat}
is \code{FALSE} the largest possible absolute integer, i.e., the smaller of the to extreme integers is used.
Trimming is disabled for only positive or only negative values.}

\item{winsorize.mat}{(\code{logical}). Should the matrix be \emph{winsorized} (cleaned of outliers) before plotting?
Defaults to \code{TRUE}. See 'Details'.}

\item{cols}{(\code{character}). Name of the colour palette.}

\item{dendrograms}{(\code{character}). Which dendrograms are to be plotted? One of "Vertical", "Horizontal",
"None" or "Both". Defaults to "Both".}

\item{autoadj}{(\code{logical}). Should label sizes and pdf dimensions be adjusted automatically? See 'Details'.}

\item{pdf.height}{(\code{numeric}). Heigth of the PDF device.}

\item{pdf.width}{(\code{numeric}). Width of the PDF device.}

\item{labelheight}{(\code{numeric} or \code{lcm(numeric)}). Relative or absolute height (using \code{lcm}, see \code{layout}) of the labels.}

\item{labelwidth}{(\code{numeric} or \code{lcm(numeric)}). Relative or absolute width (using \code{lcm}, see \code{layout}) of the labels.}

\item{r.cex}{(\code{numeric}). Font size for row labels.}

\item{c.cex}{(\code{numeric}). Font size for column labels.}

\item{medianCenter}{(\code{character}). If not \code{NULL}, how should data be median-centered? One of "grand",
"row" or "column". Defaults to \code{NULL}, no median-centering.}

\item{log}{(\code{logical}). Is the data on log-scale. (The log-base is given in argument \code{log.base}).}

\item{do.log}{(\code{logical}). Should data be log-transformed? (The log-base is given in argument \code{log.base}).}

\item{log.base}{(\code{numeric}). The log-base used for \code{log} and \code{do.log}.}

\item{metric}{(\code{character}). The metric metric to be used for calculating dissimilarities between observations.
The currently available options are "euclidean" and "manhattan". Euclidean distances are root sum-of-squares of differences,
and manhattan distances are the sum of absolute differences. Defaults to "manhattan".}

\item{na.strings}{(\code{character}). Character vector of strings to interpret as missing values when reading data files
with \code{read.table} or \code{readxlread_excel}. By default, \code{readxl} treats blank cells as missing data.}

\item{makeFolder}{(\code{logical}). Should the results folder be created?}

\item{do.pdf}{(\code{logical}). Should images be saved to PDFs?}

\item{do.png}{(\code{logical}). Should images be saved to PNGs?}

\item{save.objects}{(\code{logical}). Should R objects be save to disk?}
}
\value{
A list: Invisibly returns the results object from the PAM clustering.
}
\description{
This is the main wrapper function to be called by end users. It accepts a numeric matrix
    (or an object that can be coerced to a numeric matrix) or a number of data file formats and produces one or
    more PDFs with the plots.
}
\details{
Argument \code{x} can be a \code{data.frame} or numeric matrix to be used directly for plotting the heatmap.
    If it is a \code{data.frame} argument \code{symbolcol} sets the respective columns for symbols to be used as
    labels and the column where the numeric data starts.

    Matrices will be coerced to data frames.

    The read function accepts txt, tsv, csv and xls files.

    If PDF, PNG or R object files are to be saved, i.e., if the corresponding arguments are \code{TRUE}, a results
    folder will be created using time and date to create a unique name. The folder will be created in the directory
    set by argument \code{project.folder}. The reasoning behind that behaviour is that during development the
    heatmap was used as data analysis tool testing various \code{cluster.number} values with numerous files and
    comparing the results.

    The \code{cluster.number} argument defines the numbers of clusters when doing PAM. After processing it is passed
    one-by-one to argument \code{k} in \code{\link[cluster]{pam}}. The numbers can be defined in the form
    \code{c("2","4-7", "9")}, for example, depending on the experimental setup. An integer vector is coerced to
    character.

    If \code{autoadj} is \code{TRUE} character expansion (cex) for rows annd columns, pdf width and height and
    label width and height are adjusted automatically based on the dimensions of the data matrix and length
    (number of characters) of the labels.

    The default behavior regarding outliers is to \emph{winsorize} the matrix before plotting, i.e., shrink outliers
    to the unscattered part of the data by replacing extreme values at both ends of the distribution with less
    extreme values. This is done for the same reason as trimming but the data will not be symmetrical around 0.
}
\examples{
# Generate a random 10x10 matrix and plot it using default values
set.seed(1234)                                  # for reproducibility
mat <- matrix(rnorm(120), nrow = 20)            # standard normal
PAM.hm(mat, cluster.number = 3)

## Plot with more than one cluster number
PAM.hm(mat, cluster.number = 2:4)               # integer vector
PAM.hm(mat, cluster.number = c("2", "4-5"))     # character vector

# Using the 'trim' argument
## Introduce outlier to the matrix and plot w/o trimming or winsorization
mat[1] <- mat[1] * 10
PAM.hm(mat, cluster.number = 3, trim = NULL, winsorize = FALSE)

## calculate a trim value by getting the largest possible absolute integer and
## plot again
tr <- min(abs(ceiling(c(min(mat, na.rm = TRUE), max(mat, na.rm = TRUE)))),
    na.rm = TRUE)
PAM.hm(mat, cluster.number = 3, trim = tr, winsorize = FALSE)
## Note that the outlier is still visible but since it is less extreme
## it does not distort the colour scheme.

# An example reading data from an Excel file
# The function readxl::read_excel is used internally to read Excel files.
# The example uses their example data.
readxl_datasets <- readxl::readxl_example("datasets.xlsx")
PAM.hm(readxl_datasets, cluster.number = 4, symbolcol = 5)

}
\references{
Kaufman, L., & Rousseeuw, P. J. (Eds.). (1990). \emph{Finding Groups in Data: An Introduction to Cluster Analysis.}
    John Wiley & Sons, Inc. \doi{10.1002/9780470316801}
}
\seealso{
\code{\link[utils]{read.delim}}

\code{\link[readxl]{read_excel}}

\code{\link[cluster]{pam}}
}
