% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/association-scores.R
\name{association-score-functions}
\alias{association-score-functions}
\alias{defaultAssociationScoreFunctions}
\alias{pmi}
\alias{mi2}
\alias{mi3}
\alias{logDice}
\alias{ll}
\title{Association score functions}
\usage{
defaultAssociationScoreFunctions()

pmi(O1, O2, O, N, E, window_size)

mi2(O1, O2, O, N, E, window_size)

mi3(O1, O2, O, N, E, window_size)

logDice(O1, O2, O, N, E, window_size)

ll(O1, O2, O, N, E, window_size)
}
\arguments{
\item{O1}{observed absolute frequency of node}

\item{O2}{observed absolute frequency of collocate}

\item{O}{observed absolute frequency of collocation}

\item{N}{corpus size}

\item{E}{expected absolute frequency of collocation (already adjusted to window size)}

\item{window_size}{total window size around node (left neighbour count + right neighbour count)}
}
\value{
\if{html}{\out{<div class="sourceCode">}}\preformatted{         association score
}\if{html}{\out{</div>}}
}
\description{
Functions to calculate different collocation association scores between
a node (target word) and words in a window around the it.
The functions are primarily used by \code{\link[=collocationScoreQuery]{collocationScoreQuery()}}.

\strong{pmi}: pointwise mutual information

\strong{mi2}: pointwise mutual information squared (Daille 1994), also referred to as mutual dependency
(Thanopoulos et al. 2002)

\strong{mi3}: pointwise mutual information cubed (Daille 1994), also referred to as log-frequency biased mutual dependency)
(Thanopoulos et al. 2002)

\strong{logDice}: log-Dice coefficient, a heuristic measure that is popular in lexicography (Rychlý 2008)

\strong{ll}: log-likelihood (Dunning 1993) using Stefan Evert's (2004) simplified implementation
}
\examples{
\dontrun{

new("KorAPConnection", verbose = TRUE) \%>\%
collocationScoreQuery("Perlen", c("verziertes", "Säue"),
  scoreFunctions = append(defaultAssociationScoreFunctions(),
     list(localMI = function(O1, O2, O, N, E, window_size) {
                       O * log2(O/E)
                    })))
}

}
\references{
Daille, B. (1994): Approche mixte pour l’extraction automatique de terminologie: statistiques lexicales et filtres linguistiques. PhD thesis, Université Paris 7.

Thanopoulos, A., Fakotakis, N., Kokkinakis, G. (2002): Comparative evaluation of collocation extraction metrics. In: Proc. of LREC 2002: 620–625.

Rychlý, Pavel (2008):  A lexicographer-friendly association score. In Proceedings of Recent Advances in Slavonic Natural Language Processing, RASLAN, 6–9. \url{https://www.fi.muni.cz/usr/sojka/download/raslan2008/13.pdf}.

Dunning, T. (1993): Accurate methods for the statistics of surprise and coincidence. Comput. Linguist. 19, 1 (March 1993), 61-74.

Evert, Stefan (2004): The Statistics of Word Cooccurrences: Word Pairs and Collocations. PhD dissertation, IMS, University of Stuttgart. Published in 2005, URN urn:nbn:de:bsz:93-opus-23714.
Free PDF available from \url{https://purl.org/stefan.evert/PUB/Evert2004phd.pdf}
}
\seealso{
Other collocation analysis functions: 
\code{\link{collocationAnalysis,KorAPConnection-method}},
\code{\link{collocationScoreQuery,KorAPConnection-method}},
\code{\link{synsemanticStopwords}()}
}
\concept{association-score-functions}
\concept{collocation analysis functions}
