% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cdisc_validate.R
\name{detect_cdisc_domain}
\alias{detect_cdisc_domain}
\title{Detect CDISC Domain Type}
\usage{
detect_cdisc_domain(df, name_hint = NULL)
}
\arguments{
\item{df}{A data frame to analyze.}

\item{name_hint}{Optional character string with the dataset name (e.g., "DM",
"ADLB", or a filename like "adlb.xpt"). When provided and it matches a known
CDISC domain, that candidate receives a strong confidence boost. This makes
detection much more accurate when the filename is available.}
}
\value{
A list containing:
\item{standard}{Character: "SDTM", "ADaM", or "Unknown"}
\item{domain}{Character: domain code (e.g., "DM", "AE") or dataset name (e.g., "ADSL"), or NA}
\item{confidence}{Numeric between 0 and 1 indicating match quality}
\item{message}{Character: human-readable explanation}
}
\description{
Detects whether a data frame looks like an SDTM domain or ADaM dataset by comparing
column names against known CDISC standards. Calculates a confidence score based on
the percentage of expected variables present.

Auto-detection is a convenience for exploratory use. For anything important --
validation reports, regulatory submissions, scripted pipelines -- always pass
\code{domain} and \code{standard} explicitly. Datasets with common columns
(STUDYID, USUBJID, etc.) can match multiple domains, and a warning is issued
when the top two candidates score within 10 percentage points of each other.
}
\examples{
\donttest{
# Create a sample SDTM DM domain
dm <- data.frame(
  STUDYID = "STUDY001",
  USUBJID = "SUBJ001",
  SUBJID = "001",
  DMSEQ = 1,
  RACE = "WHITE",
  ETHNIC = "NOT HISPANIC OR LATINO",
  ARMCD = "ARM01",
  ARM = "Treatment A",
  stringsAsFactors = FALSE
)

result <- detect_cdisc_domain(dm)
print(result)
}
}
