% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/sql_cooc.R
\name{sql_cooc}
\alias{sql_cooc}
\title{Compute co-occurrence matrix on SQL file}
\usage{
sql_cooc(
  input_path,
  output_path,
  min_code_freq = 5,
  exclude_code_pattern = NULL,
  exclude_dict_pattern = NULL,
  codes_dict_fpaths = NULL,
  n_batch = 300,
  n_cores = 1,
  autoindex = FALSE,
  overwrite_output = FALSE,
  verbose = TRUE,
  verbose_max = verbose,
  ...
)
}
\arguments{
\item{input_path}{Input SQL file path.
Must contain monthly counts table 'df_monthly',
with columns 'Patient', 'Month', 'Parent_Code', 'Count'.
Also requires an index on column 'Patient' and a table of
the unique codes 'df_uniq_codes', but will perform it
automatically if parameter autoindex is TRUE
(can increase input file size by 40\%).}

\item{output_path}{Output SQL file path for co-occurrence sparse matrix.
Can overwrite with overwrite_output parameter.}

\item{min_code_freq}{Filter output matrix based on code frequency.}

\item{exclude_code_pattern}{Pattern of codes prefixes to exclude.
Will be used in SQL appended by '%' and in grep
prefixed by '^'.
For example, 'AB'.}

\item{exclude_dict_pattern}{Used in combination with codes_dict.
Pattern of codes prefixes to exclude, except if
they are found in codes_dict.
Will be used in SQL appended by '%' and/or in
grep prefixed by '^'.
For example, 'C[0-9]'.}

\item{codes_dict_fpaths}{Used in combination with exclude_dict_pattern.
Filepaths to define codes to avoid excluding using
exclude_dict_pattern.
First column of each file must define the code
identifiers.}

\item{n_batch}{Number of patients per batch.}

\item{n_cores}{Number of cores.}

\item{autoindex}{If table 'df_uniq_codes' not found in input_path,
index table 'df_monthly' on column 'Patient', and write
unique values of 'Parent_Code' to table 'df_uniq_codes'.}

\item{overwrite_output}{Should output_path be overwritten ?}

\item{verbose}{Prints batch progress.}

\item{verbose_max}{Prints memory usage at each batch.}

\item{...}{Passed to build_df_cooc}
}
\value{
None, side-effect is output SQL file creation.
}
\description{
Performs out-of-memory co-occurrence for large databases that would not fit
in RAM memory with the classic call to build_df_cooc.
Patients are batched using the n_batch parameter.
Co-occurrence sparse matrix output is written to a new SQL file.
Depending on number of codes considered, need to adjust n_batch and n_cores.
See vignette "Co-occurrence and PMI-SVD" for more details.
}
\examples{

df_ehr = data.frame(Patient = c(1, 1, 2, 1, 2, 1, 1, 3, 4),
                    Month = c(1, 1, 1, 2, 2, 3, 3, 4, 4),
                    Parent_Code = c('C1', 'C2', 'C2', 'C1', 'C1', 'C1',
                                    'C2', 'C3', 'C4'),
                    Count = 1:9)

library(RSQLite)

test_db_path = tempfile()
test_db = dbConnect(SQLite(), test_db_path)
dbWriteTable(test_db, 'df_monthly', df_ehr, overwrite = TRUE)

dbDisconnect(test_db)

output_db_path = tempfile()
sql_cooc(test_db_path, output_db_path, autoindex = TRUE)

test_db = dbConnect(SQLite(), output_db_path)
spm_cooc = dbGetQuery(test_db, 'select * from df_monthly;')
dbDisconnect(test_db)

spm_cooc

}
