calc_BD <- function(bial, populations, outgroup, keep.site.info=FALSE, dxy.table=FALSE){

# Patterson's D
# f

# pop 1: populations[[1]] :
# pop 2: populations[[2]] : 
# pop 3: populations[[3]] : archaic population

if(length(populations)!=3){
stop("This statistic requires 3 populations; the third population is the archaic population")
}

if(outgroup[1]==FALSE || length(outgroup[1])==0){
stop("This statistic needs an outgroup ! (set.outgroup)")
}

# calculate frequencies of the derived alleles.
freqs  <- jointfreqdist(bial,populations,outgroup,keep.all.sites=TRUE)
freqs  <- freqs$jfd
#print(freqs)


# calc the B_d
p      <- freqs[1,]
q      <- freqs[2,]

# dxy for each site and each pop vs archaic pop 3

dxy_pop13 <- apply(bial,2,function(x){
	pop1 <- x[populations[[1]]]
	pop3 <- x[populations[[3]]]
	ones <- sum(pop1==1,na.rm=TRUE) * sum(pop3==0,na.rm=TRUE)
	zero <- sum(pop1==0,na.rm=TRUE) * sum(pop3==1,na.rm=TRUE)
	dxy  <- (ones + zero)/(sum(!is.na(pop1))*sum(!is.na(pop3)))
	return(dxy)		
})

dxy_pop23 <- apply(bial,2,function(x){
	pop2 <- x[populations[[2]]]
	pop3 <- x[populations[[3]]]
	ones <- sum(pop2==1,na.rm=TRUE) * sum(pop3==0,na.rm=TRUE)
	zero <- sum(pop2==0,na.rm=TRUE) * sum(pop3==1,na.rm=TRUE)
	dxy  <- (ones + zero)/(sum(!is.na(pop2))*sum(!is.na(pop3)))
	return(dxy)		
})

dxy_pop12 <- apply(bial,2,function(x){
	pop1 <- x[populations[[1]]]
	pop2 <- x[populations[[2]]]
	ones <- sum(pop1==1,na.rm=TRUE) * sum(pop2==0,na.rm=TRUE)
	zero <- sum(pop1==0,na.rm=TRUE) * sum(pop2==1,na.rm=TRUE)
	dxy  <- (ones + zero)/(sum(!is.na(pop1))*sum(!is.na(pop2)))
	return(dxy)		
})


dxy_pop123 <- apply(bial,2,function(x){
	pop1 <- x[c(populations[[1]],populations[[2]])]
	pop2 <- x[populations[[3]]]
	ones <- sum(pop1==1,na.rm=TRUE) * sum(pop2==0,na.rm=TRUE)
	zero <- sum(pop1==0,na.rm=TRUE) * sum(pop2==1,na.rm=TRUE)
	dxy  <- (ones + zero)/(sum(!is.na(pop1))*sum(!is.na(pop2)))
	return(dxy)		
})


if(dxy.table[[1]]!=FALSE){
#print("Using table")
ids       <- match(dxy_pop12,as.numeric(names(dxy.table[[1]])))
dxy_pop12 <- dxy.table[[1]][ids]

ids       <- match(dxy_pop13,as.numeric(names(dxy.table[[2]])))
dxy_pop13 <- dxy.table[[2]][ids]

ids       <- match(dxy_pop23,as.numeric(names(dxy.table[[3]])))
dxy_pop23 <- dxy.table[[3]][ids]
}else{
#print("Not using table")
}

d13   <- dxy_pop13 #/(dxy_pop13+dxy_pop23)
d23   <- dxy_pop23 #/(dxy_pop13+dxy_pop23)	
d12   <- dxy_pop12
d123  <- dxy_pop123

#d12   <- abs(p-q)
#d13   <- abs(p-freqs[3,])
#d23   <- abs(q-freqs[3,])

alpha   <- (d12-d23)^2
beta    <- (d12-d13)^2
theta   <- (d13-d23)^2 

root <- freqs[3,]
root[root!=0] <- 1

# ---------------------------------
# google docs version
BABA    <- ( (p     + alpha )     * ((1-q) + beta) ) * freqs[3,]
ABBA    <- ( ((1-p) + alpha )     * (q     + beta) ) * freqs[3,] 

#BABA  <- p*d23 
#ABBA  <- q*d13  

#BABA    <- ( (p     + d23^2 )     * ((1-q) + d13^2) ) * freqs[3,]
#ABBA    <- ( ((1-p) + d23^2 )     * (q     + d13^2) ) * freqs[3,] 


#surprisingly very good
# BABA    <- p*(1-q)*freqs[3,] + alpha*beta
# ABBA    <- (1-p)*q*freqs[3,] + alpha*beta

#BABA <- p*(1-q)*freqs[3,]
#ABBA <- q*(1-p)*freqs[3,]


DENOM   <- ABBA + BABA 

##################### 
#BABA  <-  (p*beta ) * freqs[3,]
#ABBA  <-  (q*alpha) * freqs[3,]

#DENOM <- d12*freqs[3,] + d12*(1-freqs[3,])
#DENOM  <- BABA + ABBA
#DENOM <- p*d23 + q*d13

#####################

########################################################################
# with abs()
##################### works good

#BABA  <- (p*beta    + (1-q)*alpha  )*freqs[3,]
#ABBA  <- (q*alpha   + (1-p)*beta   )*freqs[3,]

#BABA  <- p*d23
#ABBA  <- q*d13 

#BABA  <- (p*d23 + (d23-q))
#ABBA  <- (q*d13 + (d13-p))


#ABBA  <- freqs[3,]*d12 +      q*d13 - p*2*d23
#BABA  <- freqs[3,]*d12 +      p*d23 - q*2*d13

#PP <-  dbinom(p*8,8,(freqs[3,]*8 + p*8)/16) 
#QQ <-  dbinom(q*8,8,(freqs[3,]*8 + q*8)/16)

#PQ <-  dbinom(p*8,8,(freqs[3,]*8 + q*8)/16)
#QP <-  dbinom(q*8,8,(freqs[3,]*8 + p*8)/16)

#BABA <- PP*(1-QQ)*freqs[3,]
#ABBA <- QQ*(1-PP)*freqs[3,]

#BABA  <- XBABA*(1-XABBA)*freqs[3,]
#ABBA  <- XABBA*(1-XBABA)*freqs[3,]

#CP  <- dbinom(freqs[3,]*8,8,(freqs[3,]*8 + p*8)/16)
#CQ  <- dbinom(freqs[3,]*8,8,(freqs[3,]*8 + q*8)/16)

#BABA <- (PP*(1-QQ))*freqs[3,]
#ABBA <- (QQ*(1-PP))*freqs[3,]

#DENOM <- d12*freqs[3,]  

#DENOM <- ABBA + BABA 

#DENOM  <- p*(d12-d23)^2 + q*(d12-d13)^2

#DENOM <- (p + alpha) * (q + beta)

#DENOM <- sqrt(theta)  #+ alpha*beta

#####################

### two proprtion Z: 
#N1    <- 2 
#N2    <- 2 

#PP    <- (BABA*N1 + ABBA*N2)/(N1+N2)

#DENOM <- sqrt(PP*(1-PP)*(1/N1 + 1/N2))
###################### 

# cool with abs()
# DENOM <- sqrt(p*beta + q*alpha)

# cool with abs()
# DENOM <- (alpha+beta)*freqs[3,]

# no
#DENOM <- (p*beta+q*alpha)*freqs[3,]

#ok
#DENOM <- sqrt((alpha+beta)*freqs[3,])
#print(DENOM)

#no with ()^2
#DENOM <- sqrt(p*beta + q*alpha)

#no
#DENOM <- sqrt((1-BABA)*ABBA + BABA*(1-ABBA))

#no
#DENOM <- (1-BABA)*ABBA + BABA*(1-ABBA)

# no, nothing with ABBA BABA
#DENOM <- sqrt(BABA*alpha + ABBA*beta)


#works with abs(...)
#DENOM <- freqs[3,]*d12 + alpha*beta 
#no
#DENOM  <- abs(p*d23 - q*d13)
#
#maxp  <- rbind(freqs[3,],p)
#maxp  <- apply(maxp,2,max)
  
#maxq  <- rbind(freqs[3,],q)
#maxq  <- apply(maxq,2,max)

#DENOM <- sqrt(maxp * maxq)

#root <- freqs[3,]
#root[root!=0] <- 1

# good
#BABA <- p*(1-q)*freqs[3,] + alpha*beta
#ABBA <- (1-p)*q*freqs[3,] + alpha*beta

# This is Patt-D
#BABA <- p*d23 - q*d13 + freqs[3,]*d12
#ABBA <- q*d13 - p*d23 + freqs[3,]*d12


#good, very good with abs(d12-d23) ...
#BABA    <- (p*(1-q) + p*alpha      + (1-q)*beta ) * freqs[3,]
#ABBA    <- ((1-p)*q + (1-p)*alpha  + q*beta     ) * freqs[3,] 

#no
#BABA    <- (p*d23      + (1-q)*d13 ) * freqs[3,]
#ABBA    <- ((1-p)*d23  + q*d13     ) * freqs[3,] 

#BABA    <- ( (p     + beta  )     * ((1-q) + alpha ) ) * freqs[3,]
#ABBA    <- ( ((1-p) + alpha )     * (q     + beta  ) ) * freqs[3,] 


#BABA <- (p*(1-q))*freqs[3,]
#ABBA <- ((1-p)*q)*freqs[3,]

#DENOM <- ((1-p)*(1-q) + p*q) * (freqs[3,])

# ok
#BABA <- (p*(1-q) + p*(1-freqs[3,]) + (1-q)*freqs[3,]) * freqs[3,]  
#ABBA <- ((1-p)*q + (1-p)*freqs[3,] + q*(1-freqs[3,])) * freqs[3,]

#BABA    <- (p*(1-q) + (1-q)*alpha +       p *beta  ) * freqs[3,]
#ABBA    <- ((1-p)*q +     q*alpha +    (1-p)*beta  ) * freqs[3,] 


#no
#BABA <- freqs[3,]*d12 + p*d23
#ABBA <- freqs[3,]*d12 + q*d13


#NEUTRAL  <- (#p*q*freqs[3,]
	    #+ (1-p)*(1-q)*freqs[3,]
#              p*q*(1-freqs[3,])
	    #+ p*(1-q)*(1-freqs[3,])
	    #+ (1-p)*q*(1-freqs[3,])
#	    )
	

#BABA <- p*(1-q)*freqs[3,] #+ NEUTRAL 
#ABBA <- q*(1-p)*freqs[3,] #+ NEUTRAL


#BABA    <- ( (p     + beta )     * ((1-q) + alpha) ) * freqs[3,]
#ABBA    <- ( ((1-p) + beta )     * (q     + alpha) ) * freqs[3,]

# not symmetric
#BABA    <- ( (p     + beta  )     * ((1-q) + alpha) ) * freqs[3,]
#ABBA    <- ( ((1-p) + alpha )     * (q     + beta ) ) * freqs[3,]

# extreme single SNP values, but gives perfect accumulated Bd fraction
# BABA    <-  p *  d23
# ABBA    <-  q *  d13 

# questionable for single SNPs - +
#BABA    <- ( (p     * alpha  )     + ((1-q) * beta) ) * freqs[3,]
#ABBA    <- ( ((1-p) * alpha  )     + (q     * beta) ) * freqs[3,]

#######################################################################################

#print("-----")
#cat("d12", d12,"\n")
#cat("d13", d13, "\n")
#cat("d23", d23, "\n")
#print("----------")
#cat("alpha:", alpha,"\n")
#cat("beta:" , beta, "\n")
#print("----")

#cat("alpha-beta:", alpha-beta,"\n")

#print("p1")
#print(p)

#print("p2")
#print(q)

#print("ABBA-BABA")
#print(ABBA-BABA)

#print("ABBA+BABA")
#print(ABBA+BABA)

#print("ABBA")
#print(ABBA)

#print("BABA")
#print(BABA)

#print("DENOM")
#print(DENOM)
######################################################

#print(BABA)
sum_ABBA <- sum(ABBA,na.rm=TRUE)
sum_BABA <- sum(BABA,na.rm=TRUE) 

# 
D     <- (sum_ABBA - sum_BABA)/sum(DENOM, na.rm=TRUE)#(sum_ABBA + sum_BABA) #/valid.sites

#if(keep.site.info){
 D_site <- (ABBA - BABA)/DENOM #(ABBA + BABA)
 ABBA_site <- ABBA
 BABA_site <- BABA

 x      <- D_site
 Bd_dir <- (sum((d12-d23), na.rm=TRUE) + sum((d12-d13), na.rm=TRUE))

#D <- mean(D_site, na.rm=TRUE)

#}else{
# D_site      <- NULL
# ABBA_site   <- NULL
# BABA_site   <- NULL
#}

# calc f_BD
freqs23    <- freqs[2:3,,drop=FALSE]
maxfreqs23 <- apply(freqs23,2,max)


# f_d denominator
maxBABA <- (d23 * p     + d13 * (1-maxfreqs23))  *maxfreqs23
maxABBA <- (d23 * (1-p) + d13 * maxfreqs23)      *maxfreqs23

sum_maxABBA <- sum(maxABBA, na.rm=TRUE)
sum_maxBABA <- sum(maxBABA, na.rm=TRUE)

f <- (sum_ABBA - sum_BABA)/(sum_maxABBA - sum_maxBABA)

return(list(Bd_dir=Bd_dir, D=D, f=f, D_site=D_site, ABBA=ABBA_site, BABA=BABA_site))

}
