% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/db_joins.R
\name{joins}
\alias{joins}
\alias{inner_join.tbl_sql}
\alias{left_join.tbl_sql}
\alias{right_join.tbl_sql}
\alias{full_join.tbl_sql}
\alias{semi_join.tbl_sql}
\alias{anti_join.tbl_sql}
\title{SQL Joins}
\usage{
\method{inner_join}{tbl_sql}(x, y, by = NULL, ...)

\method{left_join}{tbl_sql}(x, y, by = NULL, ...)

\method{right_join}{tbl_sql}(x, y, by = NULL, ...)

\method{full_join}{tbl_sql}(x, y, by = NULL, ...)

\method{semi_join}{tbl_sql}(x, y, by = NULL, ...)

\method{anti_join}{tbl_sql}(x, y, by = NULL, ...)
}
\arguments{
\item{x, y}{A pair of lazy data frames backed by database queries.}

\item{by}{A join specification created with \code{\link[dplyr:join_by]{join_by()}}, or a character
vector of variables to join by.

If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all
variables in common across \code{x} and \code{y}. A message lists the variables so
that you can check they're correct; suppress the message by supplying \code{by}
explicitly.

To join on different variables between \code{x} and \code{y}, use a \code{\link[dplyr:join_by]{join_by()}}
specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}.

To join by multiple variables, use a \code{\link[dplyr:join_by]{join_by()}} specification with
multiple expressions. For example, \code{join_by(a == b, c == d)} will match
\code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between
\code{x} and \code{y}, you can shorten this by listing only the variable names, like
\code{join_by(a, c)}.

\code{\link[dplyr:join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap
joins. See the documentation at \link[dplyr:join_by]{?join_by} for details on
these types of joins.

For simple equality joins, you can alternatively specify a character vector
of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a}
to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y},
use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}.

To perform a cross-join, generating all combinations of \code{x} and \code{y}, see
\code{\link[dplyr:cross_join]{cross_join()}}.}

\item{...}{Other parameters passed onto methods.}
}
\value{
Another \code{tbl_lazy}. Use \code{\link[dplyr:show_query]{show_query()}} to see the generated
query, and use \code{\link[dbplyr:collect.tbl_sql]{collect()}} to execute the query
and return data to R.
}
\description{
Overloads the dplyr \verb{*_join} to accept an \code{na_by} argument.
By default, joining using SQL does not match on \code{NA} / \code{NULL}.
dbplyr \verb{*_join}s has the option "na_matches = na" to match on \code{NA} / \code{NULL} but this is very inefficient in some
cases.
This function does the matching more efficiently:
If a column contains \code{NA} / \code{NULL}, the names of these columns can be passed via the \code{na_by} argument and
efficiently match as if "na_matches = na".
If no \code{na_by} argument is given is given, the function defaults to using \verb{dplyr::*_join}.
}
\examples{
\dontshow{if (requireNamespace("RSQLite", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
  library(dplyr, warn.conflicts = FALSE)
  library(dbplyr, warn.conflicts = FALSE)

  band_db <- tbl_memdb(dplyr::band_members)
  instrument_db <- tbl_memdb(dplyr::band_instruments)

  left_join(band_db, instrument_db) |>
    show_query()

  # Can join with local data frames by setting copy = TRUE
  left_join(band_db, dplyr::band_instruments, copy = TRUE)

  # Unlike R, joins in SQL don't usually match NAs (NULLs)
  db <- memdb_frame(x = c(1, 2, NA))
  label <- memdb_frame(x = c(1, NA), label = c("one", "missing"))
  left_join(db, label, by = "x")

  # But you can activate R's usual behaviour with the na_matches argument
  left_join(db, label, by = "x", na_matches = "na")

  # By default, joins are equijoins, but you can use `sql_on` to
  # express richer relationships
  db1 <- memdb_frame(x = 1:5)
  db2 <- memdb_frame(x = 1:3, y = letters[1:3])

  left_join(db1, db2) |> show_query()
  left_join(db1, db2, sql_on = "LHS.x < RHS.x") |> show_query()
\dontshow{\}) # examplesIf}
}
\seealso{
\link[dplyr:mutate-joins]{dplyr::mutate-joins} which this function wraps.

\link[dbplyr:join.tbl_sql]{dbplyr::join.tbl_sql} which this function wraps.

\link[dplyr:explain]{dplyr::show_query}
}
