% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/pubchem.R
\name{get_cid}
\alias{get_cid}
\title{Retrieve Pubchem Compound ID (CID)}
\usage{
get_cid(
  query,
  from = "name",
  domain = c("compound", "substance", "assay"),
  match = c("all", "first", "ask", "na"),
  verbose = TRUE,
  arg = NULL,
  first = NULL,
  ...
)
}
\arguments{
\item{query}{character; search term, one or more compounds.}

\item{from}{character; type of input. See details for more information.}

\item{domain}{character; query domain, can be one of \code{"compound"},
\code{"substance"}, \code{"assay"}.}

\item{match}{character; How should multiple hits be handled?, \code{"all"}
all matches are returned, \code{"first"} the first matching is returned,
\code{"ask"} enters an interactive mode and the user is asked for input,
\code{"na"} returns NA if multiple hits are found.}

\item{verbose}{logical; should a verbose output be printed on the console?}

\item{arg}{character; optinal arguments like "name_type=word" to match
individual words.}

\item{first}{deprecated. Use `match` instead.}

\item{...}{currently unused.}
}
\value{
a tibble.
}
\description{
Retrieve compound IDs (CIDs) from PubChem.
}
\details{
Valid values for the \code{from} argument depend on the
\code{domain}:
\itemize{
\item{\code{compound}: \code{"name"}, \code{"smiles"}, \code{"inchi"},
\code{"inchikey"}, \code{"formula"}, \code{"sdf"}, <xref>,
<structure search>, <fast search>.}
\item{\code{substance}: \code{"name"}, \code{"sid"},
\code{<xref>}, \code{"sourceid/<source id>"} or \code{"sourceall"}.}
\item{\code{assay}: \code{"aid"}, \code{<assay target>}.}
}

<structure search> is assembled as "{\code{substructure} |
\code{superstructure} | \code{similarity} | \code{identity}} / {\code{smiles}
 | \code{inchi} | \code{sdf} | \code{cid}}", e.g.
 \code{from = "substructure/smiles"}.

\code{<xref>} is assembled as "\code{xref}/\{\code{RegistryID} |
\code{RN} | \code{PubMedID} | \code{MMDBID} | \code{ProteinGI},
\code{NucleotideGI} | \code{TaxonomyID} | \code{MIMID} | \code{GeneID} |
\code{ProbeID} | \code{PatentID}\}", e.g. \code{from = "xref/RN"} will query
by CAS RN.

<fast search> is either \code{fastformula} or it is assembled as
"{\code{fastidentity} | \code{fastsimilarity_2d} | \code{fastsimilarity_3d} |
\code{fastsubstructure} | \code{fastsuperstructure}}/{\code{smiles} |
\code{smarts} | \code{inchi} | \code{sdf} | \code{cid}}", e.g.
\code{from = "fastidentity/smiles"}.

\code{<source id>} is any valid PubChem Data Source ID. When
\code{from = "sourceid/<source id>"}, the query is the ID of the substance in
the depositor's database.

If \code{from = "sourceall"} the query is one or more valid Pubchem
depositor names. Depositor names are not case sensitive.

Depositor names and Data Source IDs can be found at
\url{https://pubchem.ncbi.nlm.nih.gov/sources/}.

\code{<assay target>} is assembled as "\code{target}/\{\code{gi} |
\code{proteinname} | \code{geneid} | \code{genesymbol} | \code{accession}\}",
e.g. \code{from = "target/geneid"} will query by GeneID.
}
\note{
Please respect the Terms and Conditions of the National Library of
Medicine, \url{https://www.nlm.nih.gov/databases/download.html} the data
usage policies of National Center for Biotechnology Information,
\url{https://www.ncbi.nlm.nih.gov/home/about/policies/},
\url{https://pubchemdocs.ncbi.nlm.nih.gov/programmatic-access}, and the data
usage policies of the indicidual data sources
\url{https://pubchem.ncbi.nlm.nih.gov/sources/}.
}
\examples{
\donttest{
# might fail if API is not available
get_cid("Triclosan")
get_cid("Triclosan", arg = "name_type=word")
# from SMILES
get_cid("CCCC", from = "smiles")
# from InChI
get_cid("InChI=1S/CH5N/c1-2/h2H2,1H3", from = "inchi")
# from InChIKey
get_cid("BPGDAMSIGCZZLK-UHFFFAOYSA-N", from = "inchikey")
# from formula
get_cid("C26H52NO6P", from = "formula")
# from CAS RN
get_cid("56-40-6", from = "xref/rn")
# similarity
get_cid(5564, from = "similarity/cid")
get_cid("CCO", from = "similarity/smiles")
# from SID
get_cid("126534046", from = "sid", domain = "substance")
# sourceid
get_cid("VCC957895", from = "sourceid/23706", domain = "substance")
# sourceall
get_cid("Optopharma Ltd", from = "sourceall", domain = "substance")
# from AID (CIDs of substances tested in the assay)
get_cid(170004, from = "aid", domain = "assay")
# from GeneID (CIDs of substances tested on the gene)
get_cid(25086, from = "target/geneid", domain = "assay")

# multiple inputs
get_cid(c("Triclosan", "Aspirin"))

}
}
\references{
Wang, Y., J. Xiao, T. O. Suzek, et al. 2009 PubChem: A Public
Information System for
Analyzing Bioactivities of Small Molecules. Nucleic Acids Research 37:
623–633.

Kim, Sunghwan, Paul A. Thiessen, Evan E. Bolton, et al. 2016
PubChem Substance and Compound Databases. Nucleic Acids Research 44(D1):
D1202–D1213.

Kim, S., Thiessen, P. A., Bolton, E. E., & Bryant, S. H. (2015).
PUG-SOAP and PUG-REST: web services for programmatic access to chemical
information in PubChem. Nucleic acids research, gkv396.

Eduard Szöcs, Tamás Stirling, Eric R. Scott, Andreas Scharmüller,
Ralf B. Schäfer (2020). webchem: An R Package to Retrieve Chemical
Information from the Web. Journal of Statistical Software, 93(13).
\doi{10.18637/jss.v093.i13}.
}
