% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/stopwords.R
\name{selectFeatures}
\alias{selectFeatures}
\alias{selectFeatures.dfm}
\title{select features from an object}
\usage{
selectFeatures(x, features, ...)

\method{selectFeatures}{dfm}(x, features = NULL, selection = c("keep",
  "remove"), valuetype = c("glob", "regex", "fixed"),
  case_insensitive = TRUE, verbose = TRUE, ...)
}
\arguments{
\item{x}{object whose features will be selected}

\item{features}{one of: a character vector of features to be selected, a
\link{dfm} whose features will be used for selection, or a dictionary class
object whose values (not keys) will provide the features to be selected.
For \link{dfm} objects, see details in the Value section below.}

\item{...}{supplementary arguments passed to the underlying functions in
\code{\link[stringi]{stri_detect_regex}}.  (This is how
\code{case_insensitive} is passed, but you may wish to pass others.)}

\item{selection}{whether to keep or remove the features}

\item{valuetype}{how to interpret feature vector: \code{fixed} for words as
is; \code{"regex"} for regular expressions; or \code{"glob"} for
"glob"-style wildcard}

\item{case_insensitive}{ignore the case of dictionary values if \code{TRUE}}

\item{verbose}{if \code{TRUE} print message about how many features were
removed}
}
\value{
A dfm after the feature selection has been applied.

  When \code{features} is a \link{dfm-class} object, then the returned object
  will be identical in its feature set to the dfm supplied as the
  \code{features} argument.  This means that any features in \code{x} not in
  \code{features} will be discarded, and that any features in found in the
  dfm supplied as \code{features} but not found in \code{x} will be added
  with all zero counts.  This is useful when you have trained a model on one dfm, and
  need to project this onto a test set whose features must be identical.
}
\description{
This function selects or discards features from a dfm.variety of objects,
such as tokenized texts, a dfm, or a list of collocations.  The most common
usage for \code{removeFeatures} will be to eliminate stop words from a text
or text-based object, or to select only features from a list of regular
expression.
}
\note{
This function selects features based on their labels.  To select
  features based on the values of a the document-feature matrix, use
  \code{\link{trim}}.
}
\examples{
myDfm <- dfm(c("My Christmas was ruined by your opposition tax plan.",
               "Does the United_States or Sweden have more progressive taxation?"),
             toLower = FALSE, verbose = FALSE)
mydict <- dictionary(list(countries = c("United_States", "Sweden", "France"),
                          wordsEndingInY = c("by", "my"),
                          notintext = "blahblah"))
selectFeatures(myDfm, mydict)
selectFeatures(myDfm, mydict, case_insensitive = FALSE)
selectFeatures(myDfm, c("s$", ".y"), "keep", valuetype = "regex")
selectFeatures(myDfm, c("s$", ".y"), "remove", valuetype = "regex")
selectFeatures(myDfm, stopwords("english"), "keep", valuetype = "fixed")
selectFeatures(myDfm, stopwords("english"), "remove", valuetype = "fixed")

# selecting on a dfm
textVec1 <- c("This is text one.", "This, the second text.", "Here: the third text.")
textVec2 <- c("Here are new words.", "New words in this text.")
features(dfm1 <- dfm(textVec1))
features(dfm2a <- dfm(textVec2))
(dfm2b <- selectFeatures(dfm2a, dfm1))
identical(features(dfm1), features(dfm2b))
}
\seealso{
\code{\link{removeFeatures}}, \code{\link{trim}}
}

