# Question 2 date() cov.prp <- rep(0,100) for(j in 1:100) { for(i in 1:1000) { x <- rnorm(4,mean=0,sd=1) y <- rnorm(6,mean=0,sd=6) a <- t.test(x,y)$conf.int # a <- t.test(x,y,var.equal=T)$conf.int if(a[1] < 0 & 0 < a[2]) cov.prp[j] <- cov.prp[j]+1/1000 } } t.test(cov.prp) date() # Question 3 x <- c(4,5,3,4,3,4,5,4,4) y <- c(9,7,5,3) var(x) var(y) t.test(x,y) t.test(x,y, var.equal = T) # Sample posterior distribution of theta based on non-informative prior (s.2.x <- var(x)) (n.x <- length(x)) (y.bar.x <- mean(x)) (s.2.y <- var(y)) (n.y <- length(y)) (y.bar.y <- mean(y)) sigma.2.x <- s.2.x / ( rchisq(100000,n.x-1) / (n.x-1)) mu.x <- rnorm(100000,mean=y.bar.x,sd = sqrt(sigma.2.x/n.x)) sigma.2.y <- s.2.y / ( rchisq(100000,n.y-1) / (n.y-1)) mu.y <- rnorm(100000,mean=y.bar.y,sd = sqrt(sigma.2.y/n.y)) theta.smp <- mu.x - mu.y boxplot(list(mu.x,mu.y,theta.smp),names=c("mu.X","mu.Y","Theta")) boxplot(list(mu.x,mu.y,theta.smp),names=c("mu.X","mu.Y","Theta"),ylim=c(-10,15)) t.test(x) quantile(mu.x,probs = c(0.025,0.5,0.975)) t.test(y) quantile(mu.y,probs = c(0.025,0.5,0.975)) t.test(x,y) quantile(theta.smp,probs = c(0.025,0.5,0.975)) sort(theta.smp)[c(2500,97500)] hist(theta.smp) range(theta.smp) quantile(theta.smp,probs = c(0.001,0.999)) sum(theta.smp > 60) sum(theta.smp < -40) theta.ser <- seq(-40,60,by = 0.01) k <- length(theta.ser) theta.cdf <- (rank(c(theta.ser,theta.smp))[1:k] - (1:k)) / 10^5 par(mfcol=c(1,1)) plot(theta.ser,theta.cdf,type="l") plot(theta.ser,theta.cdf,type="l",xlim=c(-10,15)) abline(h=c(0.025,0.5,0.975),lty=2,col=3) approx(theta.cdf,theta.ser,xout = c(0.025,0.5,0.975))$y t.test(x,y) # Use some theory to produce posterior distribution of theta based on non-informative prior # i.e. use (mu - y.bar) / sqrt(s.2 / n) ~ t epsilon <- 10^(-8) qt(c(epsilon,1-epsilon),df=n.x-1) qt(c(epsilon,1-epsilon),df=n.x-1) * sqrt(s.2.x / n.x) + y.bar.x mu.ser.x <- seq(-5,15,by = 0.001) p.mu.ser.x <- pt( (mu.ser.x - y.bar.x) / sqrt(s.2.x / n.x), df = n.x -1) mu.ser.x.mid <- mu.ser.x[-1] - 0.001 / 2 p.mu.ser.x.mid <- diff(p.mu.ser.x) plot(mu.ser.x.mid,p.mu.ser.x.mid,type="l") plot(mu.ser.x.mid,p.mu.ser.x.mid,type="l",xlim=c(2,7)) # compute P(theta < a) = p(mu.x - mu.y < a) # = sum_{b} p(mu.x = b, mu.x - mu.y < a) # = sum_{b} p(mu.x = b, b - a < mu.y) length(theta.ser) p.theta.ser <- rep(NA,10001) date() for(i in 1:10001) { p.mu.y.ser <- 1 - pt( ((mu.ser.x.mid - theta.ser[i]) - y.bar.y) / sqrt(s.2.y / n.y), df = n.y-1) p.theta.ser[i] <- sum(p.mu.ser.x.mid * p.mu.y.ser) } date() plot(theta.ser,p.theta.ser,type="l") lines(theta.ser,theta.cdf,col=2) approx(p.theta.ser,theta.ser,xout = c(0.025,0.5,0.975))$y approx(theta.cdf,theta.ser,xout = c(0.025,0.5,0.975))$y