#	Table 3.1 data

Drug<-c("Placebo","Aspirin") 
Infarction<-c("yes","no") 
table.3.1<-expand.grid(drug=Drug,infarction=Infarction) 
Data<-c(28,18,656,658) # note capital D (see comment below) 
table.3.1<-cbind(table.3.1,count=Data) 
( temp <- tapply(table.3.1$count,table.3.1[,1:2], sum)  ) # turn data frame into an array 



# A:	Normal approximation CIs

# A1.  odds ratio CI

( or.tilde  <- (temp[1,1]+.5)*(temp[2,2]+.5)/ ((temp[1,2]+.5)* (temp[2,1]+.5))  )
( or.hat    <- temp[1,1] * temp[2,2] / (temp[1,2] * temp[2,1]) )
( log.or.se <- (1/temp[1,1] + 1/temp[2,2] + 1/temp[1,2] + 1/temp[2,1])^.5 )

( ci.ul <- log(or.hat) + qnorm(.975)*log.or.se )
( ci.ll <- log(or.hat) - qnorm(.975)*log.or.se )

exp(ci.ul)
exp(ci.ll)

# A2.  difference of prop CI

( p.2.in.1 <- temp[1,2]  / ( temp[1,1] + temp[1,2]))
( p.2.in.2 <- temp[2,2]  / ( temp[2,1] + temp[2,2]))

p.2.in.1 - p.2.in.2
(pd.se <- (p.2.in.1*(1-p.2.in.1)/(temp[1,1]+temp[1,2]) + p.2.in.2*(1-p.2.in.2)/(temp[2,1]+temp[2,2]) )^.5)

p.2.in.1 - p.2.in.2 + qnorm(.975) * pd.se
p.2.in.1 - p.2.in.2 - qnorm(.975) * pd.se

# A3.  logit CI

p.2.in.1 / (1-p.2.in.1)
temp[1,2]  / temp[1,1]

(   logit    <- log(p.2.in.1 / (1-p.2.in.1))  )
(   logit.se <- 1/(p.2.in.1 / (1-p.2.in.1))   )

logit + qnorm(.975)*logit.se
logit -qnorm(.975)*logit.se



# B: CI constructing function written ny LAURA THOMPSON


Wald.ci<-function(Table, aff.response, alpha=.05){ 
# Gives two-sided Wald CI's for odds ratio, difference in proportions and relative risk. 
# Table is a 2x2 table of counts with rows giving the treatment populations 
# aff.response is a string like "c(1,1)" giving the cell of the beneficial response and the 
# treatment category 
# alpha is significance level 
pow<-function(x, a=-1) x^a 
z.alpha<-qnorm(1-alpha/2) 
if(is.character(aff.response)) 
where<-eval(parse(text=aff.response)) 
else where<-aff.response 
Next<-as.numeric(where==1) + 1 
# OR 
odds.ratio<-Table[where[1],where[2]]*Table[Next[1],Next[2]]/(Table[where[1],Next[2]]*Table[Next[1],where[2]]) 
se.OR<-sqrt(sum(pow(Table))) 
ci.OR<-exp(log(odds.ratio) + c(-1,1)*z.alpha*se.OR) 
# difference of proportions 
p1<-Table[where[1],where[2]]/(n1<-Table[where[1],Next[2]] + Table[where[1],where[2]]) 
p2<-Table[Next[1],where[2]]/(n2<-Table[Next[1],where[2]]+Table[Next[1],Next[2]]) 
se.diff<-sqrt(p1*(1-p1)/n1 + p2*(1-p2)/n2) 
ci.diff<-(p1-p2) + c(-1,1)*z.alpha*se.diff 
# relative risk 
RR<-p1/p2 
se.RR<-sqrt((1-p1)/(p1*n1) + (1-p2)/(p2*n2)) 
ci.RR<-exp(log(RR) + c(-1,1)*z.alpha*se.RR) 
list(OR=list(odds.ratio=odds.ratio, CI=ci.OR), proportion.difference=list(diff=p1-p2, CI=ci.diff), relative.risk=list(relative.risk=RR,CI=ci.RR)) 
} 

temp
Wald.ci(temp,aff.response=c(1,1))



# C:  Profile likelihood CIs:

n11 <- temp[1,1]
n21 <- temp[2,1]
n12 <- temp[1,2]
n22 <- temp[2,2]

rr <- 1

p1g1 <- seq(0.001,.099,length=99)
p1g2 <- seq(0.001,.099,length=99)

log.lik <- array(dim=c(99,99))

for(i in 1:99)
for(j in 1:99)
	log.lik[i,j] <-  n11*log(p1g1[i]) + n12*log(1-p1g1[i]) + n21*log(p1g2[j]) + n22*log(1-p1g2[j])

contour(p1g1,p1g2,log.lik,xlab="Prob infarc. given placebo",ylab="Prob infarc. given aspirin",
			main="Table 3.1: log likelihood -- product mult. sampling")
points(n11 / (n11+n12), n21 / (n21+n22),col=2,pch=4)

p1g1.mle 	<- n11 / (n11+n12)
p1g2.mle 	<- n21 / (n21+n22)

max(log.lik)
(log.lik.mle <- n11*log(p1g1.mle) + n12*log(1-p1g1.mle) + n21*log(p1g2.mle) + n22*log(1-p1g2.mle))

contour(p1g1,p1g2,log.lik - log.lik.mle,xlab="Prob infarc. given placebo",ylab="Prob infarc. given aspirin")
contour(p1g1,p1g2,log.lik - log.lik.mle,xlab="Prob infarc. given placebo",ylab="Prob infarc. given aspirin",levels=-10:-1)

points(p1g1.mle,p1g2.mle,col=2,pch=4)

# Add line RR = mle 

abline(0,p1g2.mle / p1g1.mle,col=3)

# Profile likelihood for RR = mle

prof.log.lik <- n11*log(p1g1) + n12*log(1-p1g1) + n21*log(     p1g1 / (p1g1.mle/p1g2.mle)) +
                                                  n22*log( 1 - p1g1 / (p1g1.mle/p1g2.mle))

plot(p1g1,prof.log.lik - log.lik.mle,xlab="Pr(infarc.) given placebo",
			ylab="Profile log-likelihood",main="RR = mle",type="l")
p1g1[prof.log.lik == max(prof.log.lik)]
max(prof.log.lik)

# Add line RR = 1 

contour(p1g1,p1g2,log.lik - log.lik.mle,xlab="Pr(infarc.) given placebo",
			ylab="Pr(infarc.) given aspirin",
			main="Table 3.1,  prod mult. sampling,  log.lik - max(log.lik)",
			levels=c(-qchisq(0.95,1)/2,-4,-6,-10,-20))
points(p1g1.mle,p1g2.mle,col=2,pch=4)
abline(0,1,col=4)

prof.log.lik <- n11*log(p1g1) + n12*log(1-p1g1) + n21*log(p1g1 / 1) +  n22*log( 1 - p1g1 / 1)
plot(p1g1,prof.log.lik - log.lik.mle,xlab="Prob infarc. given placebo",
			ylab="Profile log-likelihood",main="RR = 1",type="l")
p1g1[prof.log.lik == max(prof.log.lik)]
max(prof.log.lik)

r 	<- seq(0.80,3,by = 0.01)
d.ll 	<- r * 0

for(i in 1:length(r))
{
	prof.log.lik <- n11*log(p1g1) + n12*log(1-p1g1) + n21*log(p1g1/r[i])+ n22*log(1 - p1g1/r[i])
	d.ll[i] <- max(prof.log.lik) - log.lik.mle 
}	

plot(r,d.ll)
abline(-qchisq(0.95,1)/2,0)
(r.ll <- min(r[d.ll  > -qchisq(0.95,1)/2]))
(r.ul <- max(r[d.ll  > -qchisq(0.95,1)/2]))

contour(p1g1,p1g2,log.lik - log.lik.mle,xlab="prob.  infarc. given placebo",
			ylab="prob.  infarc. given aspirin",
			main="Table 3.1,  prod mult. sampling, relative risk 0.95 CI",
			levels=c(-qchisq(0.95,1)/2,-4,-6,-10,-20))
points(p1g1.mle,p1g2.mle,col=2,pch=4)
abline(0,1/r.ll,col=4)
abline(0,1/r.ul,col=4)





#	D:  odds ratio CI simulation

#	real parameter values:

p.11 <- .08 ; p.12 <- .42 ; p.21 <- .05 ; p.22 <- .45
( or.real <- (p.11 * p.22) / (p.12 * p.21) )  

ind      <- rep(0,1000)
p11.hat  <- rep(0,1000)
or.hat   <- rep(0,1000)
or.til   <- rep(0,1000)
or.ci    <- array(dim=c(1000,2))

for(i in 1:1000)
{
	Data <- table(sample(1:4,size=100,replace=T,prob=c(p.11,p.21,p.12,p.22))) # multinomial sampling
#	Data <- table(sample(1:4,size=1000,replace=T,prob=c(p.11,p.21,p.12,p.22))) # multinomial sampling
	temp <- tapply(Data,table.3.1[,1:2], sum)    

	a <- Wald.ci(temp,aff.response=c(1,1))$OR$CI
	or.hat[i] <- temp[1,1]*temp[2,2] / (temp[1,2]*temp[2,1])
	or.til[i] <- (temp[1,1]+.5)*(temp[2,2]+.5) / ((temp[1,2]+.5)*(temp[2,1]+.5))
	if(a[1] < or.real && or.real < a[2]) ind[i] <- 1
	if(a[1] < or.real && or.real < a[2]) ind[i] <- 1
	or.ci[i,] <- a
	p11.hat[i] <- temp[1,1] / 1000
}

# D1:  validity of CI

t.test(ind)

par(mfcol=c(1,1))

plot(1:100,or.hat[1:100],ylim=range(or.ci[1:100,]),xlab="sample",ylab="Odds ratio",pch="-")
arrows(1:100,or.ci[1:100,1],1:100,or.ci[1:100,2],code=3,angle=90,length=1/50)
abline(or.real,0)

ord <- order(or.hat[1:100])
plot(1:100,or.hat[ord],ylim=range(or.ci[1:100,]),xlab="ordered samples",ylab="Odds ratio",pch="-")
arrows(1:100,or.ci[ord,1],1:100,or.ci[ord,2],code=3,angle=90,length=1/50)
abline(or.real,0)
mean(ind[1:100])

plot(1:100,log(or.hat[ord]),ylim=range(log(or.ci[1:100,])),xlab="ordered samples",ylab="Log(odds ratio)",pch="-")
arrows(1:100,log(or.ci[ord,1]),1:100,log(or.ci[ord,2]),code=3,angle=90,length=1/50)
abline(log(or.real),0)

plot(or.til,or.hat)
abline(0,1)

sum(abs(or.hat-1))
sum(abs(or.til-1))

par(mfcol=c(2,1))
hist(or.hat)
hist(log(or.hat))