# Table 3.1 data Drug<-c("Placebo","Aspirin") Infarction<-c("yes","no") table.3.1<-expand.grid(drug=Drug,infarction=Infarction) Data<-c(28,18,656,658) # note capital D (see comment below) table.3.1<-cbind(table.3.1,count=Data) ( temp <- tapply(table.3.1$count,table.3.1[,1:2], sum) ) # turn data frame into an array # A: Normal approximation CIs # A1. odds ratio CI ( or.tilde <- (temp[1,1]+.5)*(temp[2,2]+.5)/ ((temp[1,2]+.5)* (temp[2,1]+.5)) ) ( or.hat <- temp[1,1] * temp[2,2] / (temp[1,2] * temp[2,1]) ) ( log.or.se <- (1/temp[1,1] + 1/temp[2,2] + 1/temp[1,2] + 1/temp[2,1])^.5 ) ( ci.ul <- log(or.hat) + qnorm(.975)*log.or.se ) ( ci.ll <- log(or.hat) - qnorm(.975)*log.or.se ) exp(ci.ul) exp(ci.ll) # A2. difference of prop CI ( p.2.in.1 <- temp[1,2] / ( temp[1,1] + temp[1,2])) ( p.2.in.2 <- temp[2,2] / ( temp[2,1] + temp[2,2])) p.2.in.1 - p.2.in.2 (pd.se <- (p.2.in.1*(1-p.2.in.1)/(temp[1,1]+temp[1,2]) + p.2.in.2*(1-p.2.in.2)/(temp[2,1]+temp[2,2]) )^.5) p.2.in.1 - p.2.in.2 + qnorm(.975) * pd.se p.2.in.1 - p.2.in.2 - qnorm(.975) * pd.se # A3. logit CI p.2.in.1 / (1-p.2.in.1) temp[1,2] / temp[1,1] ( logit <- log(p.2.in.1 / (1-p.2.in.1)) ) ( logit.se <- 1/(p.2.in.1 / (1-p.2.in.1)) ) logit + qnorm(.975)*logit.se logit -qnorm(.975)*logit.se # B: CI constructing function written ny LAURA THOMPSON Wald.ci<-function(Table, aff.response, alpha=.05){ # Gives two-sided Wald CI's for odds ratio, difference in proportions and relative risk. # Table is a 2x2 table of counts with rows giving the treatment populations # aff.response is a string like "c(1,1)" giving the cell of the beneficial response and the # treatment category # alpha is significance level pow<-function(x, a=-1) x^a z.alpha<-qnorm(1-alpha/2) if(is.character(aff.response)) where<-eval(parse(text=aff.response)) else where<-aff.response Next<-as.numeric(where==1) + 1 # OR odds.ratio<-Table[where[1],where[2]]*Table[Next[1],Next[2]]/(Table[where[1],Next[2]]*Table[Next[1],where[2]]) se.OR<-sqrt(sum(pow(Table))) ci.OR<-exp(log(odds.ratio) + c(-1,1)*z.alpha*se.OR) # difference of proportions p1<-Table[where[1],where[2]]/(n1<-Table[where[1],Next[2]] + Table[where[1],where[2]]) p2<-Table[Next[1],where[2]]/(n2<-Table[Next[1],where[2]]+Table[Next[1],Next[2]]) se.diff<-sqrt(p1*(1-p1)/n1 + p2*(1-p2)/n2) ci.diff<-(p1-p2) + c(-1,1)*z.alpha*se.diff # relative risk RR<-p1/p2 se.RR<-sqrt((1-p1)/(p1*n1) + (1-p2)/(p2*n2)) ci.RR<-exp(log(RR) + c(-1,1)*z.alpha*se.RR) list(OR=list(odds.ratio=odds.ratio, CI=ci.OR), proportion.difference=list(diff=p1-p2, CI=ci.diff), relative.risk=list(relative.risk=RR,CI=ci.RR)) } temp Wald.ci(temp,aff.response=c(1,1)) # C: Profile likelihood CIs: n11 <- temp[1,1] n21 <- temp[2,1] n12 <- temp[1,2] n22 <- temp[2,2] rr <- 1 p1g1 <- seq(0.001,.099,length=99) p1g2 <- seq(0.001,.099,length=99) log.lik <- array(dim=c(99,99)) for(i in 1:99) for(j in 1:99) log.lik[i,j] <- n11*log(p1g1[i]) + n12*log(1-p1g1[i]) + n21*log(p1g2[j]) + n22*log(1-p1g2[j]) contour(p1g1,p1g2,log.lik,xlab="Prob infarc. given placebo",ylab="Prob infarc. given aspirin", main="Table 3.1: log likelihood -- product mult. sampling") points(n11 / (n11+n12), n21 / (n21+n22),col=2,pch=4) p1g1.mle <- n11 / (n11+n12) p1g2.mle <- n21 / (n21+n22) max(log.lik) (log.lik.mle <- n11*log(p1g1.mle) + n12*log(1-p1g1.mle) + n21*log(p1g2.mle) + n22*log(1-p1g2.mle)) contour(p1g1,p1g2,log.lik - log.lik.mle,xlab="Prob infarc. given placebo",ylab="Prob infarc. given aspirin") contour(p1g1,p1g2,log.lik - log.lik.mle,xlab="Prob infarc. given placebo",ylab="Prob infarc. given aspirin",levels=-10:-1) points(p1g1.mle,p1g2.mle,col=2,pch=4) # Add line RR = mle abline(0,p1g2.mle / p1g1.mle,col=3) # Profile likelihood for RR = mle prof.log.lik <- n11*log(p1g1) + n12*log(1-p1g1) + n21*log( p1g1 / (p1g1.mle/p1g2.mle)) + n22*log( 1 - p1g1 / (p1g1.mle/p1g2.mle)) plot(p1g1,prof.log.lik - log.lik.mle,xlab="Pr(infarc.) given placebo", ylab="Profile log-likelihood",main="RR = mle",type="l") p1g1[prof.log.lik == max(prof.log.lik)] max(prof.log.lik) # Add line RR = 1 contour(p1g1,p1g2,log.lik - log.lik.mle,xlab="Pr(infarc.) given placebo", ylab="Pr(infarc.) given aspirin", main="Table 3.1, prod mult. sampling, log.lik - max(log.lik)", levels=c(-qchisq(0.95,1)/2,-4,-6,-10,-20)) points(p1g1.mle,p1g2.mle,col=2,pch=4) abline(0,1,col=4) prof.log.lik <- n11*log(p1g1) + n12*log(1-p1g1) + n21*log(p1g1 / 1) + n22*log( 1 - p1g1 / 1) plot(p1g1,prof.log.lik - log.lik.mle,xlab="Prob infarc. given placebo", ylab="Profile log-likelihood",main="RR = 1",type="l") p1g1[prof.log.lik == max(prof.log.lik)] max(prof.log.lik) r <- seq(0.80,3,by = 0.01) d.ll <- r * 0 for(i in 1:length(r)) { prof.log.lik <- n11*log(p1g1) + n12*log(1-p1g1) + n21*log(p1g1/r[i])+ n22*log(1 - p1g1/r[i]) d.ll[i] <- max(prof.log.lik) - log.lik.mle } plot(r,d.ll) abline(-qchisq(0.95,1)/2,0) (r.ll <- min(r[d.ll > -qchisq(0.95,1)/2])) (r.ul <- max(r[d.ll > -qchisq(0.95,1)/2])) contour(p1g1,p1g2,log.lik - log.lik.mle,xlab="prob. infarc. given placebo", ylab="prob. infarc. given aspirin", main="Table 3.1, prod mult. sampling, relative risk 0.95 CI", levels=c(-qchisq(0.95,1)/2,-4,-6,-10,-20)) points(p1g1.mle,p1g2.mle,col=2,pch=4) abline(0,1/r.ll,col=4) abline(0,1/r.ul,col=4) # D: odds ratio CI simulation # real parameter values: p.11 <- .08 ; p.12 <- .42 ; p.21 <- .05 ; p.22 <- .45 ( or.real <- (p.11 * p.22) / (p.12 * p.21) ) ind <- rep(0,1000) p11.hat <- rep(0,1000) or.hat <- rep(0,1000) or.til <- rep(0,1000) or.ci <- array(dim=c(1000,2)) for(i in 1:1000) { Data <- table(sample(1:4,size=100,replace=T,prob=c(p.11,p.21,p.12,p.22))) # multinomial sampling # Data <- table(sample(1:4,size=1000,replace=T,prob=c(p.11,p.21,p.12,p.22))) # multinomial sampling temp <- tapply(Data,table.3.1[,1:2], sum) a <- Wald.ci(temp,aff.response=c(1,1))$OR$CI or.hat[i] <- temp[1,1]*temp[2,2] / (temp[1,2]*temp[2,1]) or.til[i] <- (temp[1,1]+.5)*(temp[2,2]+.5) / ((temp[1,2]+.5)*(temp[2,1]+.5)) if(a[1] < or.real && or.real < a[2]) ind[i] <- 1 if(a[1] < or.real && or.real < a[2]) ind[i] <- 1 or.ci[i,] <- a p11.hat[i] <- temp[1,1] / 1000 } # D1: validity of CI t.test(ind) par(mfcol=c(1,1)) plot(1:100,or.hat[1:100],ylim=range(or.ci[1:100,]),xlab="sample",ylab="Odds ratio",pch="-") arrows(1:100,or.ci[1:100,1],1:100,or.ci[1:100,2],code=3,angle=90,length=1/50) abline(or.real,0) ord <- order(or.hat[1:100]) plot(1:100,or.hat[ord],ylim=range(or.ci[1:100,]),xlab="ordered samples",ylab="Odds ratio",pch="-") arrows(1:100,or.ci[ord,1],1:100,or.ci[ord,2],code=3,angle=90,length=1/50) abline(or.real,0) mean(ind[1:100]) plot(1:100,log(or.hat[ord]),ylim=range(log(or.ci[1:100,])),xlab="ordered samples",ylab="Log(odds ratio)",pch="-") arrows(1:100,log(or.ci[ord,1]),1:100,log(or.ci[ord,2]),code=3,angle=90,length=1/50) abline(log(or.real),0) plot(or.til,or.hat) abline(0,1) sum(abs(or.hat-1)) sum(abs(or.til-1)) par(mfcol=c(2,1)) hist(or.hat) hist(log(or.hat))