# Question 1

# 	answer sampling scheme for noninformative student

(ans.tru <- sample(LETTERS[1:4],100,replace=T) )

(ans.vec <- sample(LETTERS[1:4],100,replace=T)) 
ans.tbl <- outer(ans.vec,LETTERS[1:4],"==")
dimnames(ans.tbl) <- list(paste("Question",1:100),LETTERS[1:4])

ans.tbl
ans.tbl + 0  # indicator table X(i,j)

ans.vec == ans.tru
sum(ans.vec == ans.tru)

apply(ans.tbl,2,sum)

#	A: sampling distribution

ans.right <- rep(NA,10000)
ans.ABCD  <- array(dim=c(10000,4))

for(i in 1:10000)
{
ans.vec <- sample(LETTERS[1:4],100,replace=T) 
ans.tbl <- outer(ans.vec,LETTERS[1:4],"==")

ans.right[i] <- sum(ans.vec == ans.tru)
ans.ABCD[i,] <- apply(ans.tbl,2,sum)
}

dimnames(ans.ABCD) <- list(paste("rep",1:10000),LETTERS[1:4])

summary(ans.right)
hist(ans.right)
sum(ans.right==50)

var(ans.right)
mean(ans.right)
sqrt(var(ans.right))

# B: theoretic results

100  * 0.25 
100  * 0.25 * 0.75
sqrt(100  * 0.25 * 0.75)

dbinom(50,100,.25)
1-pbinom(49,100,.25)

# C + D

summary(ans.ABCD)
var(ans.ABCD)
cor(ans.ABCD)


100 * .25
100 * .25 * .75
100 * ( - .25 * .25)

100 * ( - .25 * .25)  / (100 * .25 * .75)


# Question 2


Weather       <-c("Sharav","Pleasant","Rain") 
Umbrella      <-c("umb=0","umb=1") 
Near.acc 	  <-c("acc=0","acc=1") 

p.weath	<- c(.2,.6,.2)
p.umb.weath <-  c(.10,.05,.80)
p.acc.weath <-  c(.20,.10,.70)

daily.umb.acc <- array(dim=c(2,2,365))
dimnames(daily.umb.acc) <- list(Umbrella,Near.acc,paste("Day",1:365))
daily.weath   <- rep(NA,365)

for(i in 1:365)
{

	w <- sample(1:3,1,prob=p.weath)
	p.1p <- p.umb.weath[w]
	p.p1 <- p.acc.weath[w]

	p.tbl <- outer(c(1-p.1p,p.1p),c(1-p.p1,p.p1))

	daily.umb.acc[,,i] <- rpois(4,100*p.tbl)
	daily.weath[i]     <- w
}

Weather[daily.weath[1:10]]

daily.umb.acc[,,1:10]

table(daily.weath)

(tbl.sharv <- apply(daily.umb.acc[,,daily.weath == 1],c(1,2),sum) )   #  Sharav   days table
(tbl.plsnt <- apply(daily.umb.acc[,,daily.weath == 2],c(1,2),sum) )   #  Pleasant days table
(tbl.rain  <- apply(daily.umb.acc[,,daily.weath == 3],c(1,2),sum) )   #  Rain  days table
tbl.sharv + tbl.plsnt + tbl.rain

library(vcd)  # load vcd package

oddsratio(tbl.sharv,log=F)
oddsratio(tbl.plsnt,log=F)
oddsratio(tbl.rain,log=F)
oddsratio(tbl.sharv + tbl.plsnt + tbl.rain,log=F)



# Question 3

Seat.belt <-c("No","Yes") 
Fatality  <-c("Yes","No") 

Lvls   <- expand.grid(seat.belt=Seat.belt,fatality=Fatality) 
Count  <-c(1601,510,162527,412368)
(tbl <- tapply(Count,Lvls,sum))

(p.1.in.1 <- tbl[1,1]  / (tbl[1,1] + tbl[1,2]))
(p.1.in.2 <- tbl[2,1]  / (tbl[2,1] + tbl[2,2]))

( df.1 <- p.1.in.1  -  p.1.in.2 )  	# diff in prop
( rr.1 <- p.1.in.1  /  p.1.in.2 )  	# relative risk
( or.1 <- (tbl[1,1]*tbl[2,2]) / (tbl[1,2]*tbl[2,1]) )	# odds ratio


data.matrix <- data.frame(seat.belt  = c(rep(0,tbl[1,1]),rep(0,tbl[1,2]),rep(1,tbl[2,1]),rep(1,tbl[2,2])),
				  fatality   = c(rep(0,tbl[1,1]),rep(1,tbl[1,2]),rep(0,tbl[2,1]),rep(1,tbl[2,2])))

dim(data.matrix)
table(data.matrix)
(n <- sum(table(data.matrix)))


#	Emulate prospective sampling 

i.0 <- sample((1:n)[data.matrix$seat.belt == 0],size = 10000, replace = F)
i.1 <- sample((1:n)[data.matrix$seat.belt == 1],size = 10000, replace = F)

(tbl.2 <- table(data.matrix[c(i.0,i.1),]))

(p.1.in.1 <- tbl.2[1,1]  / (tbl.2[1,1] + tbl.2[1,2]))
(p.1.in.2 <- tbl.2[2,1]  / (tbl.2[2,1] + tbl.2[2,2]))

( df.2 <- p.1.in.1  -  p.1.in.2 )  	# diff in prop
( rr.2 <- p.1.in.1  /  p.1.in.2 )  	# relative risk
( or.2 <- (tbl.2[1,1]*tbl.2[2,2]) / (tbl.2[1,2]*tbl.2[2,1]) )	# odds ratio

#	Emulate retrospective sampling 

i.0 <- sample((1:n)[data.matrix$fatality == 0],size = 1000, replace = F)
i.1 <- sample((1:n)[data.matrix$fatality == 1],size = 1000, replace = F)

(tbl.2 <- table(data.matrix[c(i.0,i.1),]))

(p.1.in.1 <- tbl.2[1,1]  / (tbl.2[1,1] + tbl.2[1,2]))
(p.1.in.2 <- tbl.2[2,1]  / (tbl.2[2,1] + tbl.2[2,2]))

( df.2 <- p.1.in.1  -  p.1.in.2 )  	# diff in prop
( rr.2 <- p.1.in.1  /  p.1.in.2 )  	# relative risk
( or.2 <- (tbl.2[1,1]*tbl.2[2,2]) / (tbl.2[1,2]*tbl.2[2,1]) )	# odds ratio