% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/seeker.R
\name{seeker}
\alias{seeker}
\title{Process RNA-seq data end to end}
\usage{
seeker(params, parentDir = ".", dryRun = FALSE)
}
\arguments{
\item{params}{Named list of parameters with components:
\itemize{
\item \code{study}: String used to name the output directory within \code{parentDir}.
\item \code{metadata}: Named list with components:
\itemize{
\item \code{run}: Logical indicating whether to fetch metadata. See
\code{\link[=fetchMetadata]{fetchMetadata()}}. If \code{TRUE}, saves a file
\code{parentDir}/\code{study}/metadata.csv. If \code{FALSE}, expects that file to
already exist. The unmodified fetched or found metadata is saved to a
file \code{parentDir}/\code{study}/metadata_original.csv. Following components
are only checked if \code{run} is \code{TRUE}.
\item \code{bioproject}: String indicating the study's bioproject accession.
\item \code{include}: Optional named list for specifying which rows of metadata to
include for further processing, with components:
\itemize{
\item \code{colname}: String indicating column in metadata
\item \code{values}: Vector indicating values within \code{colname}
}
\item \code{exclude}: Optional named list for specifying which rows of metadata to
exclude from further processing (superseding \code{include}), with components:
\itemize{
\item \code{colname}: String indicating column in metadata
\item \code{values}: Vector indicating values within \code{colname}
}
}
\item \code{fetch}: Named list with components:
\itemize{
\item \code{run}: Logical indicating whether to fetch files from SRA. See \code{\link[=fetch]{fetch()}}.
If \code{TRUE}, saves files to \code{parentDir}/\code{study}/fetch_output. Whether
\code{TRUE} or \code{FALSE}, expects metadata to have a column "run_accession", and
updates metadata with column "fastq_fetched" containing paths to files in
\code{parentDir}/\code{study}/fetch_output. Following components are only checked
if \code{run} is \code{TRUE}.
\item \code{keep}: Logical indicating whether to keep fastq.gz files when all
processing steps have completed. \code{NULL} indicates \code{TRUE}.
\item \code{overwrite}: Logical indicating whether to overwrite files that already
exist. \code{NULL} indicates to use the default in \code{\link[=fetch]{fetch()}}.
\item \code{keepSra}: Logical indicating whether to keep the ".sra" files. \code{NULL}
indicates to use the default in \code{\link[=fetch]{fetch()}}.
\item \code{prefetchCmd}: String indicating command for prefetch, which downloads
".sra" files. \code{NULL} indicates to use the default in \code{\link[=fetch]{fetch()}}.
\item \code{prefetchArgs}: Character vector indicating arguments to pass to
prefetch. \code{NULL} indicates to use the default in \code{\link[=fetch]{fetch()}}.
\item \code{fasterqdumpCmd}: String indicating command for fasterq-dump, which
uses ".sra" files to create ".fastq" files. \code{NULL} indicates to use the
default in \code{\link[=fetch]{fetch()}}.
\item \code{prefetchArgs}: Character vector indicating arguments to pass to
fasterq-dump. \code{NULL} indicates to use the default in \code{\link[=fetch]{fetch()}}.
\item \code{pigzCmd}: String indicating command for pigz, which converts ".fastq"
files to ".fastq.gz" files. \code{NULL} indicates to use the default in
\code{\link[=fetch]{fetch()}}.
\item \code{pigzArgs}: Character vector indicating arguments to pass to pigz. \code{NULL}
indicates to use the default in \code{\link[=fetch]{fetch()}}.
}
\item \code{trimgalore}: Named list with components:
\itemize{
\item \code{run}: Logical indicating whether to perform quality/adapter trimming of
reads. See \code{\link[=trimgalore]{trimgalore()}}. If \code{TRUE}, expects metadata to have a column
"fastq_fetched" containing paths to fastq files in
\code{parentDir}/\code{study}/fetch_output, saves trimmed files to
\code{parentDir}/\code{study}/trimgalore_output, and updates metadata with column
"fastq_trimmed". If \code{FALSE}, expects and does nothing. Following
components are only checked if \code{run} is \code{TRUE}.
\item \code{keep}: Logical indicating whether to keep trimmed fastq files when all
processing steps have completed. \code{NULL} indicates \code{TRUE}.
\item \code{cmd}: Name or path of the command-line interface. \code{NULL} indicates to
use the default in \code{\link[=trimgalore]{trimgalore()}}.
\item \code{args}: Additional arguments to pass to the command-line interface.
\code{NULL} indicates to use the default in \code{\link[=trimgalore]{trimgalore()}}.
\item \code{pigzCmd}: String indicating command for pigz, which converts ".fastq"
files to ".fastq.gz" files. \code{NULL} indicates to use the default in
\code{\link[=trimgalore]{trimgalore()}}.
}
\item \code{fastqc}: Named list with components:
\itemize{
\item \code{run}: Logical indicating whether to perform QC on reads. See \code{\link[=fastqc]{fastqc()}}.
If \code{TRUE} and \code{trimgalore$run} is \code{TRUE}, expects metadata to have a
column "fastq_trimmed" containing paths to fastq files in
\code{parentDir}/\code{study}/trimgalore_output. If \code{TRUE} and \code{trimgalore$run} is
\code{FALSE}, expects metadata to have a column "fastq_fetched" containing
paths to fastq files in \code{parentDir}/\code{study}/fetch_output. If \code{TRUE},
saves results to \code{parentDir}/\code{study}/fastqc_output. If \code{FALSE}, expects
and does nothing. Following components are only checked if \code{run} is
\code{TRUE}.
\item \code{keep}: Logical indicating whether to keep fastqc files when all
processing steps have completed. \code{NULL} indicates \code{TRUE}.
\item \code{cmd}: Name or path of the command-line interface. \code{NULL} indicates to
use the default in \code{\link[=fastqc]{fastqc()}}.
\item \code{args}: Additional arguments to pass to the command-line interface.
\code{NULL} indicates to use the default in \code{\link[=fastqc]{fastqc()}}.
}
\item \code{salmon}: Named list with components:
\itemize{
\item \code{run}: Logical indicating whether to quantify transcript abundances. See
\code{\link[=salmon]{salmon()}}. If \code{TRUE} and \code{trimgalore$run} is \code{TRUE}, expects metadata to
have a column "fastq_trimmed" containing paths to fastq files in
\code{parentDir}/\code{study}/trimgalore_output. If \code{TRUE} and \code{trimgalore$run} is
\code{FALSE}, expects metadata to have a column "fastq_fetched" containing
paths to fastq files in \code{parentDir}/\code{study}/fetch_output. If \code{TRUE},
saves results to \code{parentDir}/\code{study}/salmon_output and
\code{parentDir}/\code{study}/salmon_meta_info.csv. If \code{FALSE}, expects and does
nothing. Following components are only checked if \code{run} is \code{TRUE}.
\item \code{indexDir}: Directory that contains salmon index.
\item \code{sampleColname}: String indicating column in metadata containing sample
ids. \code{NULL} indicates "sample_accession", which should work for data
from SRA and ENA.
\item \code{keep}: Logical indicating whether to keep quantification results when
all processing steps have completed. \code{NULL} indicates \code{TRUE}.
\item \code{cmd}: Name or path of the command-line interface. \code{NULL} indicates to
use the default in \code{\link[=salmon]{salmon()}}.
\item \code{args}: Additional arguments to pass to the command-line interface.
\code{NULL} indicates to use the default in \code{\link[=salmon]{salmon()}}.
}
\item \code{multiqc}: Named list with components:
\itemize{
\item \code{run}: Logical indicating whether to aggregrate results of various
processing steps. See \code{\link[=multiqc]{multiqc()}}. If \code{TRUE}, saves results to
\code{parentDir}/\code{study}/multiqc_output. If \code{FALSE}, expects and does nothing.
Following components are only checked if \code{run} is \code{TRUE}.
\item \code{cmd}: Name or path of the command-line interface. \code{NULL} indicates to
use the default in \code{\link[=multiqc]{multiqc()}}.
\item \code{args}: Additional arguments to pass to the command-line interface.
\code{NULL} indicates to use the default in \code{\link[=multiqc]{multiqc()}}.
}
\item \code{tximport}: Named list with components:
\itemize{
\item \code{run}: Logical indicating whether to summarize transcript- or gene-level
estimates for downstream analysis. See \code{\link[=tximport]{tximport()}}. If \code{TRUE}, expects
metadata to have a column \code{sampleColname} of sample ids, and expects a
directory \code{parentDir}/\code{study}/salmon_output containing directories of
quantification results, and saves results to
\code{parentDir}/\code{study}/tximport_output.qs. If \code{FALSE}, expects and does
nothing. Following components are only checked if \code{run} is \code{TRUE}.
\item \code{tx2gene}: Optional named list with components:
\itemize{
\item \code{organism}: String indicating organism and thereby ensembl gene dataset.
See \code{\link[=getTx2gene]{getTx2gene()}}.
\item \code{version}: Optional number indicating ensembl version. \code{NULL} indicates
the latest version. See \code{\link[=getTx2gene]{getTx2gene()}}.
\item \code{filename}: Optional string indicating name of pre-existing text file
in \code{parentDir}/\code{params$study} containing mapping between transcripts
(first column) and genes (second column), with column names in the
first row. If \code{filename} is specified, \code{organism} and \code{version} must not
be specified.
}

If not \code{NULL}, saves a file \code{parentDir}/\code{study}/tx2gene.csv.gz.
\item \code{countsFromAbundance}: String indicating whether or how to estimate
counts using estimated abundances. See \code{\link[tximport:tximport]{tximport::tximport()}}.
\item \code{ignoreTxVersion}: Logical indicating whether to the version suffix on
transcript ids. \code{NULL} indicates to use \code{TRUE}. See
\code{\link[tximport:tximport]{tximport::tximport()}}.
}
}

\code{params} can be derived from a yaml file, see
\code{vignette("introduction", package = "seeker")}. The yaml representation
of \code{params} will be saved to \code{parentDir}/\code{params$study}/params.yml.}

\item{parentDir}{Directory in which to store the output, which will be a
directory named according to \code{params$study}.}

\item{dryRun}{Logical indicating whether to check the validity of inputs
without actually fetching or processing any data.}
}
\value{
Path to the output directory \code{parentDir}/\code{params$study}, invisibly.
}
\description{
This function selectively performs various steps to process RNA-seq data.
}
\seealso{
\code{\link[=fetchMetadata]{fetchMetadata()}}, \code{\link[=fetch]{fetch()}}, \code{\link[=trimgalore]{trimgalore()}}, \code{\link[=fastqc]{fastqc()}},
\code{\link[=salmon]{salmon()}}, \code{\link[=multiqc]{multiqc()}}, \code{\link[=tximport]{tximport()}}, \code{\link[=installSysDeps]{installSysDeps()}}, \code{\link[=seekerArray]{seekerArray()}}
}
