# Example 1: overdispersion


n	 <- 10000

mu.wh <- sample(c(4,12),size=n,replace=T,prob=c(.75,.25))
y.wh  <- rpois(n,mu.wh)
y.eq  <- rpois(n,6)


par(mfcol=c(1,2))
hist(y.wh)
hist(y.eq)

mean(y.eq)
mean(y.wh)

var(y.eq)
var(y.wh)

var(mu.wh)
mean(mu.wh)

var(mu.wh) + mean(mu.wh)



----------------------------------------------------------------
# Example 2: sampling distribution


#	1. poisson sampling

mu.11 <- 60 ; mu.12 <- 40 ; mu.21 <- 150 ; mu.22 <- 50

n.11 <- rpois(1,mu.11)
n.12 <- rpois(1,mu.12)
n.21 <- rpois(1,mu.21)
n.22 <- rpois(1,mu.22)

x <- c(rep("1",n.11),rep("1",n.12),rep("2",n.21),rep("2",n.22))
y <- c(rep("1",n.11),rep("2",n.12),rep("1",n.21),rep("2",n.22))

table(x,y)


#	2. multinomial sampling

pi.11 <- 60  / 300 ; pi.12 <- 40  / 300 
pi.21 <- 150 / 300 ; pi.22 <- 50  / 300

n.vec <- rmultinom(1,300,prob=c(pi.11,pi.12,pi.21,pi.22))
n.11 <- n.vec[1,1]
n.12 <- n.vec[2,1]
n.21 <- n.vec[3,1]
n.22 <- n.vec[4,1]

x <- c(rep("1",n.11),rep("1",n.12),rep("2",n.21),rep("2",n.22))
y <- c(rep("1",n.11),rep("2",n.12),rep("1",n.21),rep("2",n.22))

table(x,y)
sum(table(x,y))

#	3. product multinomial sampling

pi.11 <- 60  / 100  ;  pi.12 <- 40  / 100
pi.21 <- 150 / 200  ;  pi.22 <- 50  / 200

n.vec <- rmultinom(1,100,prob=c(pi.11,pi.12))
n.11 <- n.vec[1,1]
n.12 <- n.vec[2,1]

n.vec <- rmultinom(1,200,prob=c(pi.21,pi.22))
n.21 <- n.vec[1,1]
n.22 <- n.vec[2,1]

x <- c(rep("1",n.11),rep("1",n.12),rep("2",n.21),rep("2",n.22))
y <- c(rep("1",n.11),rep("2",n.12),rep("1",n.21),rep("2",n.22))

table(x,y)
apply(table(x,y),1,sum)

#	4. hypergeometric sampling

n.11 <-  rhyper(1,100,200,210)
n.12 <-  100 - n.11
n.21 <-  210 - n.11
n.22 <-  200 - n.21

x <- c(rep("1",n.11),rep("1",n.12),rep("2",n.21),rep("2",n.22))
y <- c(rep("1",n.11),rep("2",n.12),rep("1",n.21),rep("2",n.22))

table(x,y)
apply(table(x,y),1,sum)
apply(table(x,y),2,sum)


----------------------------------------------------------------


#  Example 3: Mendel's theories -- chi squared test for goodness of fit

n.1  <- 6022
n.2  <- 2001

mu.1 <- (n.1 + n.2) * .75
mu.2 <- (n.1 + n.2) * .25

X.sq <- (n.1 - mu.1)^2 / mu.1 +  (n.2 - mu.2)^2 / mu.2  

X.sq
1-pchisq(X.sq,1)

chisq.test(c(n.1,n.2),p=c(.75,.25))


#	Test statistic distribution for Binomial sampling


n1.vec	<- rbinom(10000,8000,0.25)
n2.vec	<- 8000 - n1.vec

X.sq.vec	<-  (n1.vec  - 8000*0.25)^2 / (8000*0.25) +  (n2.vec  - 8000*0.75)^2 / (8000*0.75) 

summary(X.sq.vec)


plot(ecdf(X.sq.vec))
lines(seq(0,15,length = 500),pchisq(seq(0,15,length = 500),1),col=2)








#   Example 4: Florida death penalty example

vic.race<-c("white","black") 
def.race<-vic.race 
death.penalty<-c("yes", "no") 
datalabel<-list(defendant=def.race,death=death.penalty,victim=vic.race) 
table.2.6<- expand.grid(defendant=def.race,death=death.penalty,victim=vic.race) 
data<-c(53, 11, 414, 37, 0, 4, 16, 139) 
table.2.6<-cbind(table.2.6,count=data) 
xtabs(count~defendant+death+victim ,data=table.2.6) 

tbl.1 <- xtabs(count~defendant+death+victim ,data=table.2.6)[,,1] 
tbl.2 <- xtabs(count~defendant+death+victim ,data=table.2.6)[,,2]
tbl.m <- apply(xtabs(count~defendant+death+victim ,data=table.2.6),c(1,2),sum) 

(tbl.1[1,1]*tbl.1[2,2]) / (tbl.1[1,2]*tbl.1[2,1])  # strata 1  Odds Ratio
(tbl.2[1,1]*tbl.2[2,2]) / (tbl.2[1,2]*tbl.2[2,1])  # strata 2  Odds Ratio

(tbl.m[1,1]*tbl.m[2,2]) / (tbl.m[1,2]*tbl.m[2,1])   # marginal Odds Ratio


----------------------------------------------------------------

#   Example 5:  Agresti table 2.8 Example


income<-c("<15000","15000-25000","25000-40000",">40000") 
jobsat<-c("VD","LD","MS","VS") 
table.2.8<-expand.grid(income=income,jobsat=jobsat) 
data<-c(1,2,1,0,3,3,6,1,10,10,14,9,6,7,12,11) 
table.2.8<-cbind(table.2.8,count=data) 
(temp<-xtabs(count~income+jobsat,table.2.8)) 

row.mat <- outer(1:4,rep(1,4))
col.mat <- t(row.mat)

D <- 0
C <- 0
T <- 0

for(i in 1:4)
for(j in 1:4)
{
	C <- C + temp[i,j] * sum(temp * (row.mat > i) * (col.mat > j))  
	D <- D + temp[i,j] * sum(temp * (row.mat > i) * (col.mat < j))  
	T <- T + temp[i,j] * ( (temp[i,j] + 1) / 2 +
				  sum(temp * as.numeric( (row.mat == i & col.mat > j) | (col.mat == j & row.mat > i) ) )  )
}

D ; C  ; (C-D) / (C+D)

C + D + T

sum(temp)
97 * 96 / 2