# Question 1 # answer sampling scheme for noninformative student (ans.tru <- sample(LETTERS[1:4],100,replace=T) ) (ans.vec <- sample(LETTERS[1:4],100,replace=T)) ans.tbl <- outer(ans.vec,LETTERS[1:4],"==") dimnames(ans.tbl) <- list(paste("Question",1:100),LETTERS[1:4]) ans.tbl ans.tbl + 0 # indicator table X(i,j) ans.vec == ans.tru sum(ans.vec == ans.tru) apply(ans.tbl,2,sum) # A: sampling distribution ans.right <- rep(NA,10000) ans.ABCD <- array(dim=c(10000,4)) for(i in 1:10000) { ans.vec <- sample(LETTERS[1:4],100,replace=T) ans.tbl <- outer(ans.vec,LETTERS[1:4],"==") ans.right[i] <- sum(ans.vec == ans.tru) ans.ABCD[i,] <- apply(ans.tbl,2,sum) } dimnames(ans.ABCD) <- list(paste("rep",1:10000),LETTERS[1:4]) summary(ans.right) hist(ans.right) sum(ans.right==50) var(ans.right) mean(ans.right) sqrt(var(ans.right)) # B: theoretic results 100 * 0.25 100 * 0.25 * 0.75 sqrt(100 * 0.25 * 0.75) dbinom(50,100,.25) 1-pbinom(49,100,.25) # C + D summary(ans.ABCD) var(ans.ABCD) cor(ans.ABCD) 100 * .25 100 * .25 * .75 100 * ( - .25 * .25) 100 * ( - .25 * .25) / (100 * .25 * .75) # Question 2 Weather <-c("Sharav","Pleasant","Rain") Umbrella <-c("umb=0","umb=1") Near.acc <-c("acc=0","acc=1") p.weath <- c(.2,.6,.2) p.umb.weath <- c(.10,.05,.80) p.acc.weath <- c(.20,.10,.70) daily.umb.acc <- array(dim=c(2,2,365)) dimnames(daily.umb.acc) <- list(Umbrella,Near.acc,paste("Day",1:365)) daily.weath <- rep(NA,365) for(i in 1:365) { w <- sample(1:3,1,prob=p.weath) p.1p <- p.umb.weath[w] p.p1 <- p.acc.weath[w] p.tbl <- outer(c(1-p.1p,p.1p),c(1-p.p1,p.p1)) daily.umb.acc[,,i] <- rpois(4,100*p.tbl) daily.weath[i] <- w } Weather[daily.weath[1:10]] daily.umb.acc[,,1:10] table(daily.weath) (tbl.sharv <- apply(daily.umb.acc[,,daily.weath == 1],c(1,2),sum) ) # Sharav days table (tbl.plsnt <- apply(daily.umb.acc[,,daily.weath == 2],c(1,2),sum) ) # Pleasant days table (tbl.rain <- apply(daily.umb.acc[,,daily.weath == 3],c(1,2),sum) ) # Rain days table tbl.sharv + tbl.plsnt + tbl.rain library(vcd) # load vcd package oddsratio(tbl.sharv,log=F) oddsratio(tbl.plsnt,log=F) oddsratio(tbl.rain,log=F) oddsratio(tbl.sharv + tbl.plsnt + tbl.rain,log=F) # Question 3 Seat.belt <-c("No","Yes") Fatality <-c("Yes","No") Lvls <- expand.grid(seat.belt=Seat.belt,fatality=Fatality) Count <-c(1601,510,162527,412368) (tbl <- tapply(Count,Lvls,sum)) (p.1.in.1 <- tbl[1,1] / (tbl[1,1] + tbl[1,2])) (p.1.in.2 <- tbl[2,1] / (tbl[2,1] + tbl[2,2])) ( df.1 <- p.1.in.1 - p.1.in.2 ) # diff in prop ( rr.1 <- p.1.in.1 / p.1.in.2 ) # relative risk ( or.1 <- (tbl[1,1]*tbl[2,2]) / (tbl[1,2]*tbl[2,1]) ) # odds ratio data.matrix <- data.frame(seat.belt = c(rep(0,tbl[1,1]),rep(0,tbl[1,2]),rep(1,tbl[2,1]),rep(1,tbl[2,2])), fatality = c(rep(0,tbl[1,1]),rep(1,tbl[1,2]),rep(0,tbl[2,1]),rep(1,tbl[2,2]))) dim(data.matrix) table(data.matrix) (n <- sum(table(data.matrix))) # Emulate prospective sampling i.0 <- sample((1:n)[data.matrix$seat.belt == 0],size = 10000, replace = F) i.1 <- sample((1:n)[data.matrix$seat.belt == 1],size = 10000, replace = F) (tbl.2 <- table(data.matrix[c(i.0,i.1),])) (p.1.in.1 <- tbl.2[1,1] / (tbl.2[1,1] + tbl.2[1,2])) (p.1.in.2 <- tbl.2[2,1] / (tbl.2[2,1] + tbl.2[2,2])) ( df.2 <- p.1.in.1 - p.1.in.2 ) # diff in prop ( rr.2 <- p.1.in.1 / p.1.in.2 ) # relative risk ( or.2 <- (tbl.2[1,1]*tbl.2[2,2]) / (tbl.2[1,2]*tbl.2[2,1]) ) # odds ratio # Emulate retrospective sampling i.0 <- sample((1:n)[data.matrix$fatality == 0],size = 1000, replace = F) i.1 <- sample((1:n)[data.matrix$fatality == 1],size = 1000, replace = F) (tbl.2 <- table(data.matrix[c(i.0,i.1),])) (p.1.in.1 <- tbl.2[1,1] / (tbl.2[1,1] + tbl.2[1,2])) (p.1.in.2 <- tbl.2[2,1] / (tbl.2[2,1] + tbl.2[2,2])) ( df.2 <- p.1.in.1 - p.1.in.2 ) # diff in prop ( rr.2 <- p.1.in.1 / p.1.in.2 ) # relative risk ( or.2 <- (tbl.2[1,1]*tbl.2[2,2]) / (tbl.2[1,2]*tbl.2[2,1]) ) # odds ratio