%This file is part of the source code for
%SPGS: an R package for identifying statistical patterns in genomic sequences.
%Copyright (C) 2015  Universidad de Chile and INRIA-Chile
%
%This program is free software; you can redistribute it and/or modify
%it under the terms of the GNU General Public License as published by
%the Free Software Foundation; either version 2 of the License, or
%(at your option) any later version.
%
%This program is distributed in the hope that it will be useful,
%but WITHOUT ANY WARRANTY; without even the implied warranty of
%MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
%GNU General Public License for more details.
%
%A copy of Version 2 of the GNU Public License is available in the 
%share/licenses/gpl-2 file in the R installation directory or from 
%http://www.R-project.org/Licenses/GPL-2.

\name{chargaff.gibbs.test}
\alias{chargaff.gibbs.test}
\title{Test of CSPR for Dinucleotides Under Gibbs Distribution}
\description{
Performs a test of Chargaff's second parity rule (CSPR) for dinucleotides under a Gibbsian assumption on the DNA sequence, which was  proposed in Hart and Martínez (2012).
}
 \usage{
chargaff.gibbs.test(x, maxLag=200)
}
\arguments{
  \item{x}{either a character vector representing a DNA sequence in which each element contains a single nucleotide, or
a DNA sequence stored using the SeqFastadna class from the \pkg{\link[seqinr]{seqinr}} package.}
\item{maxLag}{The maximum number of lags (cylinder lengths) to use in computing
variances.  the default value is \samp{200}.}
}
\details{
This function performs a test of Chargaff's second parity rule for dinucleotides 
assuming the DNA sequence was generated by a Gibbs distribution.  Under the null 
hypothesis, the test statistic \eqn{\eta}{eta} is asymptotically 
\eqn{\chi^2}{chi-squared} on 5 degrees of freedom.

The test is set up as follows:

\eqn{H_0}{H0}:  the sequence complies with CSPR for dinucleotides \cr
\eqn{H_1}{H1}:  the sequence does not comply with CSPR for dinucleotides
}
\value{
A list with class "htest" containing the following components:

\item{statistic}{the value of the test statistic.}
\item{p.value}{the p-value of the test.}
\item{method}{a character string indicating what type of test was performed.}
\item{data.name}{a character string giving the name of the data.}
\item{FHat}{the 5-element vector \eqn{n\hat F}{nF^} used in calculating the test statistic.}
\item{pairs}{the stochastic matrix of dinucleotide counts used to derive \eqn{n\hat F}{nF^}.}
\item{v}{The asymptotic covariance matrix of \eqn{n\hat F}{nF^}.}
\item{n}{the length of the DNA sequence.}
\item{cutoff}{the actual number of lags used by the algorithm to calculate covariances.}
\item{maxCutoff}{the value specified for the maxLag parameter when the test was performed.}
}
\references{
Hart, A.G. and Martínez, S. (2012)
A Gibbs approach to Chargaff's second parity rule.
\emph{J. Stat. Phys.} \bold{146(2)}, 408-422.
}
\author{
Andrew Hart and Servet Martínez
}
\seealso{
\code{\link{chargaff0.test}}, \code{\link{chargaff1.test}}, 
\code{\link{chargaff2.test}}, \code{\link{agct.test}}, 
\code{\link{ag.test}}
}
\examples{
#Demonstration on real bacterial sequence
data(nanoarchaeum)
chargaff.gibbs.test(nanoarchaeum)

#Simulate synthetic DNA sequence that does not satisfy Chargaff's second parity rule
trans.mat <- matrix(c(.4, .1, .4, .1, .2, .1, .6, .1, .4, .1, .3, .2, .1, .2, .4, .3), 
ncol=4, byrow=TRUE)
seq <- simulateMarkovChain(500000, trans.mat, states=c("a", "c", "g", "t"))
chargaff.gibbs.test(seq)
}
\keyword{htest}

