% Generated by roxygen2 (4.0.2): do not edit by hand
\name{stri_split}
\alias{stri_split}
\alias{stri_split_charclass}
\alias{stri_split_coll}
\alias{stri_split_fixed}
\alias{stri_split_regex}
\title{Split a String By Pattern Matches}
\usage{
stri_split(str, ..., regex, fixed, coll, charclass)

stri_split_fixed(str, pattern, n_max = -1L, omit_empty = FALSE,
  tokens_only = FALSE, simplify = FALSE)

stri_split_regex(str, pattern, n_max = -1L, omit_empty = FALSE,
  tokens_only = FALSE, simplify = FALSE, opts_regex = NULL)

stri_split_coll(str, pattern, n_max = -1L, omit_empty = FALSE,
  tokens_only = FALSE, simplify = FALSE, opts_collator = NULL)

stri_split_charclass(str, pattern, n_max = -1L, omit_empty = FALSE,
  tokens_only = FALSE, simplify = FALSE)
}
\arguments{
\item{str}{character vector with strings to search in}

\item{...}{additional arguments passed to the underlying functions;
\code{stri_split} only}

\item{pattern,regex,fixed,coll,charclass}{character vector defining search patterns;
for more details refer to \link{stringi-search}}

\item{n_max}{integer vector, maximal number of strings to return}

\item{omit_empty}{logical vector; determines whether empty
tokens should be removed from the result (\code{TRUE})
or replaced with \code{NA}s (\code{NA})}

\item{tokens_only}{single logical value;
may affect the result if \code{n_max} is positive, see Details}

\item{simplify}{single logical value;
if \code{TRUE}, then a character matrix is returned;
otherwise (the default), a list of character vectors is given, see Value}

\item{opts_regex}{a named list with \pkg{ICU} Regex settings
as generated with \code{\link{stri_opts_regex}}; \code{NULL}
for default settings;
\code{stri_split_regex} only}

\item{opts_collator}{a named list with \pkg{ICU} Collator's settings
as generated with \code{\link{stri_opts_collator}}; \code{NULL}
for default settings;
\code{stri_split_coll} only}
}
\value{
If \code{simplify == FALSE} (the default),
then the functions return a list of character vectors.

Otherwise, \code{\link{stri_list2matrix}} with \code{byrow=TRUE} argument
is called on the resulting object.
In such a case, a character matrix with an appropriate number of rows
(according to the length of \code{str}, \code{pattern}, etc.)
is returned.
}
\description{
Splits each element of \code{str} into substrings.
\code{pattern} indicates delimiters that separate the input into tokens.
The input data between the matches become the fields themselves.
}
\details{
Vectorized over \code{str}, \code{pattern}, \code{n_max}, and \code{omit_empty}.

If \code{n_max} is negative (default), then all pieces are extracted.
Otherwise, if \code{tokens_only} is \code{FALSE} (this is the default,
for compatibility with the \pkg{stringr} package), then \code{n_max - 1}
tokes are extracted (if possible) and the \code{n_max}-th string
gives the (non-split) remainder (see Examples).
On the other hand, if \code{tokens_only} is \code{TRUE},
then only full tokens (up to \code{n_max} pieces) are extracted.

\code{omit_empty} is applied during splitting: if it is set to \code{TRUE},
then tokens of zero length are ignored. Thus, empty strings will never
appear in the resulting vector.
On the other hand, if \code{omit_empty} is \code{NA}, then
empty tokes are substituted with missing strings.

Empty search patterns are not supported. If you would like to split a
string into individual characters, use e.g.
\code{\link{stri_split_boundaries}(str,
\link{stri_opts_brkiter}(type="character"))} for THE Unicode way.

\code{stri_split} is a convenience function.
It calls either \code{stri_split_regex},
\code{stri_split_fixed}, \code{stri_split_coll},
or \code{stri_split_charclass},
depending on the argument used.
Unless you are a very lazy person, please call the underlying functions
directly for better performance.
}
\examples{
\donttest{
stri_split_fixed("a_b_c_d", "_")
stri_split_fixed("a_b_c__d", "_")
stri_split_fixed("a_b_c__d", "_", omit_empty=TRUE)
stri_split_fixed("a_b_c__d", "_", n_max=2, tokens_only=FALSE) # "a" & remainder
stri_split_fixed("a_b_c__d", "_", n_max=2, tokens_only=TRUE) # "a" & "b" only
stri_split_fixed("a_b_c__d", "_", n_max=4, omit_empty=TRUE, tokens_only=TRUE)
stri_split_fixed("a_b_c__d", "_", n_max=4, omit_empty=FALSE, tokens_only=TRUE)
stri_split_fixed("a_b_c__d", "_", omit_empty=NA)
stri_split_fixed(c("ab_c", "d_ef_g", "h", ""), "_", n_max=1, tokens_only=TRUE, omit_empty=TRUE)
stri_split_fixed(c("ab_c", "d_ef_g", "h", ""), "_", n_max=2, tokens_only=TRUE, omit_empty=TRUE)
stri_split_fixed(c("ab_c", "d_ef_g", "h", ""), "_", n_max=3, tokens_only=TRUE, omit_empty=TRUE)

stri_list2matrix(stri_split_fixed(c("ab,c", "d,ef,g", ",h", ""), ",", omit_empty=TRUE))
stri_split_fixed(c("ab,c", "d,ef,g", ",h", ""), ",", omit_empty=TRUE, simplify=TRUE)
stri_split_fixed(c("ab,c", "d,ef,g", ",h", ""), ",", omit_empty=FALSE, simplify=TRUE)
stri_split_fixed(c("ab,c", "d,ef,g", ",h", ""), ",", omit_empty=NA, simplify=TRUE)

stri_split_regex(c("ab,c", "d,ef  ,  g", ",  h", ""),
   "\\\\p{WHITE_SPACE}*,\\\\p{WHITE_SPACE}*", omit_empty=NA, simplify=TRUE)

stri_split_charclass("Lorem ipsum dolor sit amet", "\\\\p{WHITE_SPACE}")
stri_split_charclass(" Lorem  ipsum dolor", "\\\\p{WHITE_SPACE}", n_max=3,
   omit_empty=c(FALSE, TRUE))

stri_split_regex("Lorem ipsum dolor sit amet",
   "\\\\p{Z}+") # see also stri_split_charclass
}
}
\seealso{
Other search_split: \code{\link{stri_split_boundaries}};
  \code{\link{stri_split_lines}},
  \code{\link{stri_split_lines1}},
  \code{\link{stri_split_lines1}};
  \code{\link{stringi-search}}
}

