# Excercise 3 Question 1:


row.p <- c(.6,.4)
col.p <- c(.4,.3,.3)
pi.tbl <- outer(row.p,col.p)
n <- 1000

pois.tbl    <- array(dim=c(2,3))  # poisson sampling
multi.tbl   <- array(dim=c(2,3))  # multinomial sampling
pmulti.tbl  <- array(dim=c(2,3))  # product multinomial sampling
mhyper.tbl  <- array(dim=c(2,3))  # multi-hypergeometric sampling


pois.X2    <- rep(NA,1000)
multi.X2   <- rep(NA,1000)
pmulti.X2  <- rep(NA,1000)
mhyper.X2  <- rep(NA,1000)

for(i in 1:1000)
{
pois.tbl   <- array(rpois(12,pi.tbl*n),dim=c(2,3))  

multi.tbl  <- array(table(sample(1:6,n,replace=T,prob=pi.tbl)),dim=c(2,3))  

pmulti.tbl <- rbind(table(sample(1:3,n*row.p[1],replace=T,prob=col.p)),
                    table(sample(1:3,n*row.p[2],replace=T,prob=col.p)))

mhyper.tbl[1,] <- table(sample(c(rep(1,n*col.p[1]),rep(2,n*col.p[2]),rep(3,n*col.p[3])),
			            size=n*row.p[1],replace=F))
mhyper.tbl[2,] <- n*col.p - mhyper.tbl[1,]

pois.X2[i]   <- chisq.test(pois.tbl)$stat
multi.X2[i]  <- chisq.test(multi.tbl)$stat
pmulti.X2[i] <- chisq.test(pmulti.tbl)$stat
mhyper.X2[i] <- chisq.test(mhyper.tbl)$stat
}


#	Distribution plots

boxplot(list(pois.X2,multi.X2,pmulti.X2,mhyper.X2))

plot(density(pois.X2))
lines(density(multi.X2),col=2)
lines(density(pmulti.X2),col=3)
lines(density(mhyper.X2),col=4)

chisq.2 <- rchisq(10000,2)
chisq.5 <- rchisq(10000,5)

lines(density(chisq.2),col=2,lty=2)
lines(density(chisq.5),col=3,lty=2)



#	Kolmogorov-smirnov test

ks.test(chisq.2,"pchisq", 2)
ks.test(chisq.5,"pchisq", 2)

ks.test(chisq.2,"pchisq", 5)
ks.test(chisq.5,"pchisq", 5)

ks.test(pois.X2,"pchisq", 2)
ks.test(multi.X2,"pchisq", 2)
ks.test(pmulti.X2,"pchisq", 2)
ks.test(mhyper.X2,"pchisq", 2)

ks.test(pois.X2,"pchisq", 5)
ks.test(multi.X2,"pchisq", 5)
ks.test(pmulti.X2,"pchisq", 5)
ks.test(mhyper.X2,"pchisq", 5)


# Excercise 3 Question 2:



Wald.ci<-function(Table, aff.response, alpha=.05){ 
# Gives two-sided Wald CI's for odds ratio, difference in proportions and relative risk. 
# Table is a 2x2 table of counts with rows giving the treatment populations 
# aff.response is a string like "c(1,1)" giving the cell of the beneficial response and the 
# treatment category 
# alpha is significance level 
pow<-function(x, a=-1) x^a 
z.alpha<-qnorm(1-alpha/2) 
if(is.character(aff.response)) 
where<-eval(parse(text=aff.response)) 
else where<-aff.response 
Next<-as.numeric(where==1) + 1 
# OR 
odds.ratio<-Table[where[1],where[2]]*Table[Next[1],Next[2]]/(Table[where[1],Next[2]]*Table[Next[1],where[2]]) 
se.OR<-sqrt(sum(pow(Table))) 
ci.OR<-exp(log(odds.ratio) + c(-1,1)*z.alpha*se.OR) 
# difference of proportions 
p1<-Table[where[1],where[2]]/(n1<-Table[where[1],Next[2]] + Table[where[1],where[2]]) 
p2<-Table[Next[1],where[2]]/(n2<-Table[Next[1],where[2]]+Table[Next[1],Next[2]]) 
se.diff<-sqrt(p1*(1-p1)/n1 + p2*(1-p2)/n2) 
ci.diff<-(p1-p2) + c(-1,1)*z.alpha*se.diff 
# relative risk 
RR<-p1/p2 
se.RR<-sqrt((1-p1)/(p1*n1) + (1-p2)/(p2*n2)) 
ci.RR<-exp(log(RR) + c(-1,1)*z.alpha*se.RR) 
list(OR=list(odds.ratio=odds.ratio, CI=ci.OR), proportion.difference=list(diff=p1-p2, CI=ci.diff), relative.risk=list(relative.risk=RR,CI=ci.RR)) 
} 




# 	Multiple odds ratio

religion.counts<-c(178,570,138,138,648,252,108,442,252)
table.3.2<-cbind(expand.grid(list(Highest.Degree=c("<HS","HS or JH", "Bachelor or Grad"),
Religious.Beliefs=c("Fund", "Mod", "Lib"))),count=religion.counts) 

(table.3.2.array<-tapply(table.3.2$count,table.3.2[,1:2], sum) )


r1 <- 1; r2 <- 2; c1 <- 1; c2 <- 2  
table.3.2.array[c(r1,r2),c(c1,c2)]
Wald.ci(table.3.2.array[c(r1,r2),c(c1,c2)],aff.response=c(1,1))
Wald.ci(table.3.2.array[c(r1,r2),c(c1,c2)],aff.response=c(1,1))$OR$CI


#  [1,1] <-> others
table.3.2.array
r1 <- 1; r2 <- 2; c1 <- 1; c2 <- 2 ; table.3.2.array[c(r1,r2),c(c1,c2)] ; Wald.ci(table.3.2.array[c(r1,r2),c(c1,c2)],aff.response=c(1,1))$OR$CI
r1 <- 1; r2 <- 2; c1 <- 1; c2 <- 3 ; table.3.2.array[c(r1,r2),c(c1,c2)] ; Wald.ci(table.3.2.array[c(r1,r2),c(c1,c2)],aff.response=c(1,1))$OR$CI
r1 <- 1; r2 <- 3; c1 <- 1; c2 <- 2 ; table.3.2.array[c(r1,r2),c(c1,c2)] ; Wald.ci(table.3.2.array[c(r1,r2),c(c1,c2)],aff.response=c(1,1))$OR$CI
r1 <- 1; r2 <- 3; c1 <- 1; c2 <- 3 ; table.3.2.array[c(r1,r2),c(c1,c2)] ; Wald.ci(table.3.2.array[c(r1,r2),c(c1,c2)],aff.response=c(1,1))$OR$CI

#  [2,1] <-> others
table.3.2.array
r1 <- 2; r2 <- 3; c1 <- 1; c2 <- 2 ; table.3.2.array[c(r1,r2),c(c1,c2)] ; Wald.ci(table.3.2.array[c(r1,r2),c(c1,c2)],aff.response=c(1,1))$OR$CI
r1 <- 2; r2 <- 3; c1 <- 1; c2 <- 3 ; table.3.2.array[c(r1,r2),c(c1,c2)] ; Wald.ci(table.3.2.array[c(r1,r2),c(c1,c2)],aff.response=c(1,1))$OR$CI

#  [1,2] <-> others
table.3.2.array
r1 <- 1; r2 <- 2; c1 <- 2; c2 <- 3 ; table.3.2.array[c(r1,r2),c(c1,c2)] ; Wald.ci(table.3.2.array[c(r1,r2),c(c1,c2)],aff.response=c(1,1))$OR$CI
r1 <- 1; r2 <- 3; c1 <- 2; c2 <- 3 ; table.3.2.array[c(r1,r2),c(c1,c2)] ; Wald.ci(table.3.2.array[c(r1,r2),c(c1,c2)],aff.response=c(1,1))$OR$CI

#  [2,2] <-> others
table.3.2.array
r1 <- 2; r2 <- 3; c1 <- 2; c2 <- 3 ; table.3.2.array[c(r1,r2),c(c1,c2)] ; Wald.ci(table.3.2.array[c(r1,r2),c(c1,c2)],aff.response=c(1,1))$OR$CI


#	Partitioning the Deviance  


table.3.2<-cbind(expand.grid(list(Highest.Degree=c("<HS","HS or JH", "Bachelor or Grad"),
			Religious.Beliefs=c("Fund", "Mod", "Lib"))),count=c(178,570,138,138,648,252,108,442,252))

(dev.0 <- glm(count~Highest.Degree+Religious.Beliefs, family="poisson", data=table.3.2)$deviance )

table.3.2.1 <-cbind(expand.grid(list(Highest.Degree=c("<HS","HS or JH"),Religious.Beliefs=c("Fund", "Mod"))),count=c(178,570,138,648))
table.3.2.2<-cbind(expand.grid(list(Highest.Degree=c("<HS","HS or JH"),Religious.Beliefs=c("Fund or Mod", "Lib"))),count=c(316,1218,108,442))
table.3.2.3<-cbind(expand.grid(list(Highest.Degree=c("<JH", "Bachelor or Grad"),Religious.Beliefs=c("Fund", "Mod"))),count=c(748,138,786,252))
table.3.2.4<-cbind(expand.grid(list(Highest.Degree=c("<JH", "Bachelor or Grad"),Religious.Beliefs=c("Fund or Mod", "Lib"))),count=c(1534,390,550,252))

tapply(table.3.2$count,table.3.2[,1:2], sum) 

tapply(table.3.2.2$count,table.3.2.2[,1:2], sum) 
tapply(table.3.2.3$count,table.3.2.3[,1:2], sum) 
tapply(table.3.2.4$count,table.3.2.4[,1:2], sum) 

(dev.1 <- glm(count~Highest.Degree+Religious.Beliefs, family="poisson", data=table.3.2.1)$deviance )
(dev.2 <- glm(count~Highest.Degree+Religious.Beliefs, family="poisson", data=table.3.2.2)$deviance )
(dev.3 <- glm(count~Highest.Degree+Religious.Beliefs, family="poisson", data=table.3.2.3)$deviance )
(dev.4 <- glm(count~Highest.Degree+Religious.Beliefs, family="poisson", data=table.3.2.4)$deviance )


dev.1 + dev.2 + dev.3 + dev.4

dev.0