% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/01_prepare.R
\name{fst_rm_stop_punct}
\alias{fst_rm_stop_punct}
\title{Remove stopwords and punctuation from CoNLL-U dataframe}
\usage{
fst_rm_stop_punct(
  data,
  stopword_list = "nltk",
  language = "fi",
  manual = FALSE,
  manual_list = ""
)
}
\arguments{
\item{data}{A dataframe of text in CoNLL-U format.}

\item{stopword_list}{A valid stopword list, default is `"nltk"`,
`"manual"` can be used to indicate that a manual list will be provided, or
`"none"` if you don't want to remove stopwords, known as 'source' in
`stopwords::stopwords`}

\item{language}{two-letter ISO code of the language for the stopword list}

\item{manual}{An optional boolean to indicate that a manual list will be
provided, `stopword_list = "manual"` can also or instead be used.}

\item{manual_list}{A manual list of stopwords.}
}
\value{
A dataframe of text in CoNLL-U format without stopwords and
 punctuation.
}
\description{
Removes stopwords and punctuation from a dataframe containing survey
text data which is already in CoNLL-U format.
}
\examples{
\dontrun{
c <- fst_format(child, question = 'q7', id = 'fsd_id')
fst_rm_stop_punct(c)
fst_rm_stop_punct(c, stopword_list = "snowball")
fst_rm_stop_punct(c, "stopwords-iso")

mlist <- c('en', 'et', 'ei', 'emme', 'ette', 'eivät', 'minä', 'minum')
mlist2 <- "en, et, ei, emme, ette, eivät, minä, minum"
fst_rm_stop_punct(c, manual = TRUE, manual_list = mlist)
fst_rm_stop_punct(c, stopword_list = "manual", manual_list = mlist)
unlink("finnish-ftb-ud-2.5-191206.udpipe")
}
}
