% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/text.R
\name{vectorize.words}
\alias{vectorize.words}
\title{Word vectorization}
\usage{
vectorize.words(
  corpus = NULL,
  ndim = 50,
  maxwords = NULL,
  mincount = 5,
  minphrasecount = NULL,
  window = 5,
  maxcooc = 10,
  maxiter = 10,
  epsilon = 0.01,
  lang = "en",
  stopwords = lang,
  ...
)
}
\arguments{
\item{corpus}{The corpus of documents (a vector of characters).}

\item{ndim}{The number of dimensions of the vector space.}

\item{maxwords}{The maximum number of words.}

\item{mincount}{Minimum word count to be considered as frequent.}

\item{minphrasecount}{Minimum collocation of words count to be considered as frequent.}

\item{window}{Window for term-co-occurence matrix construction.}

\item{maxcooc}{Maximum number of co-occurrences to use in the weighting function.}

\item{maxiter}{The maximum number of iteration to fit the GloVe model.}

\item{epsilon}{Defines early stopping strategy when fit the GloVe model.}

\item{lang}{The language of the documents (NULL if no stemming).}

\item{stopwords}{Stopwords, or the language of the documents. NULL if stop words should not be removed.}

\item{...}{Other parameters.}
}
\value{
The vectorized words.
}
\description{
Vectorize wrods from a corpus of documents.
}
\examples{
\dontrun{
text = loadtext ("http://mattmahoney.net/dc/text8.zip")
words = vectorize.words (text, minphrasecount = 50)
query.words (words, origin = "paris", sub = "france", add = "germany")
query.words (words, origin = "berlin", sub = "germany", add = "france")
query.words (words, origin = "new_zealand")
}
}
\seealso{
\code{\link{query.words}}, \code{\link[stopwords]{stopwords}}, \code{\link[text2vec]{vectorizers}}
}
