#  Question 2


date()
cov.prp <- rep(0,100)
for(j in 1:100)
{
	for(i in 1:1000)
	{

		x <- rnorm(4,mean=0,sd=1)
		y <- rnorm(6,mean=0,sd=6)

		a <- t.test(x,y)$conf.int
#		a <- t.test(x,y,var.equal=T)$conf.int
		if(a[1] < 0 & 0 < a[2])
			cov.prp[j] <- cov.prp[j]+1/1000
	}
}
t.test(cov.prp)
date()




# Question 3


x <- c(4,5,3,4,3,4,5,4,4)
y <- c(9,7,5,3)

var(x)
var(y)

t.test(x,y)
t.test(x,y, var.equal = T)


# Sample posterior distribution of theta based on non-informative prior

(s.2.x   <- var(x))
(n.x     <- length(x))
(y.bar.x <- mean(x))

(s.2.y   <- var(y))
(n.y     <- length(y))
(y.bar.y <- mean(y))


sigma.2.x <- s.2.x / ( rchisq(100000,n.x-1) / (n.x-1))
mu.x  <- rnorm(100000,mean=y.bar.x,sd = sqrt(sigma.2.x/n.x)) 	

sigma.2.y <- s.2.y / ( rchisq(100000,n.y-1) / (n.y-1))
mu.y  <- rnorm(100000,mean=y.bar.y,sd = sqrt(sigma.2.y/n.y)) 	

theta.smp <- mu.x - mu.y

boxplot(list(mu.x,mu.y,theta.smp),names=c("mu.X","mu.Y","Theta"))
boxplot(list(mu.x,mu.y,theta.smp),names=c("mu.X","mu.Y","Theta"),ylim=c(-10,15))

t.test(x)
quantile(mu.x,probs = c(0.025,0.5,0.975))

t.test(y)
quantile(mu.y,probs = c(0.025,0.5,0.975))

t.test(x,y)
quantile(theta.smp,probs = c(0.025,0.5,0.975))
sort(theta.smp)[c(2500,97500)]

hist(theta.smp)
range(theta.smp)

quantile(theta.smp,probs = c(0.001,0.999))
sum(theta.smp > 60)
sum(theta.smp < -40)

theta.ser  <- seq(-40,60,by = 0.01)
k       <- length(theta.ser)
theta.cdf  <- (rank(c(theta.ser,theta.smp))[1:k] - (1:k)) / 10^5

par(mfcol=c(1,1))
plot(theta.ser,theta.cdf,type="l")
plot(theta.ser,theta.cdf,type="l",xlim=c(-10,15))
abline(h=c(0.025,0.5,0.975),lty=2,col=3)
approx(theta.cdf,theta.ser,xout = c(0.025,0.5,0.975))$y
t.test(x,y)



# Use some theory to produce posterior distribution of theta based on non-informative prior

#   i.e.   use  (mu - y.bar) / sqrt(s.2 / n) ~ t

epsilon <- 10^(-8) 
qt(c(epsilon,1-epsilon),df=n.x-1) 
qt(c(epsilon,1-epsilon),df=n.x-1) * sqrt(s.2.x / n.x) + y.bar.x

mu.ser.x   <- seq(-5,15,by = 0.001)
p.mu.ser.x <- pt( (mu.ser.x - y.bar.x) / sqrt(s.2.x / n.x), df = n.x -1)
mu.ser.x.mid   <- mu.ser.x[-1] - 0.001 / 2
p.mu.ser.x.mid <- diff(p.mu.ser.x)

plot(mu.ser.x.mid,p.mu.ser.x.mid,type="l")
plot(mu.ser.x.mid,p.mu.ser.x.mid,type="l",xlim=c(2,7))


# compute P(theta < a)	= p(mu.x - mu.y < a)
#				= sum_{b}  p(mu.x = b, mu.x - mu.y < a)
#                      	= sum_{b}  p(mu.x = b, b - a < mu.y)


length(theta.ser)
p.theta.ser <- rep(NA,10001)

date()
for(i in 1:10001)
{
	p.mu.y.ser <- 1 - pt( ((mu.ser.x.mid - theta.ser[i]) - y.bar.y) / sqrt(s.2.y / n.y), df = n.y-1)
	p.theta.ser[i] <- sum(p.mu.ser.x.mid * p.mu.y.ser)
}
date()

plot(theta.ser,p.theta.ser,type="l")
lines(theta.ser,theta.cdf,col=2)


approx(p.theta.ser,theta.ser,xout = c(0.025,0.5,0.975))$y
approx(theta.cdf,theta.ser,xout = c(0.025,0.5,0.975))$y