###############################
		#					#
		#	    Question 1		#
		#					#
		###############################




y <- c(1.73,1,1.73,2,3,4.36,2.65,2.65,3.32)
x <- c(0.25,0.22,0.36,0.23,0.37,0.46,0.27,0.44,0.31)


plot(x,y)

lm.1 <- lm(y~x)  			#  full model = intercept + slope 
lm.2 <- lm(y~x+0)			#  no intercept model

summary(lm.1)

abline(lm.1,col=2)
abline(lm.2,col=3)


sum(x*y) / sum(x^2)		# estimate of beta
summary(lm.2)

sum(lm.1$res)			# sum of full model residuals
sum(lm.2$res)			# sum of no-intercept model residuals

MSE <- sum(lm.2$res^2) / (9-1)  # MSE
MSE  
sqrt(MSE)				  # check if result same as R output

MSE / sum(x^2)			  # beta variance estimator	
vcov(lm.2)				  # R estimated coeff. covariance matrix
sqrt(vcov(lm.2))			  # beta std  - matching R output	
 	





		###############################
		#					#
		#	    Question 2		#
		#					#
		###############################



y.hat.nrm		<- rep(NA,1000)
epsilon.nrm	<- rep(NA,1000)
SSE			<- rep(NA,1000)
SSR			<- rep(NA,1000)

x			<- 1:10


#	Simulation 1: beta_0 = 2 and beta_1 = 1.5


for(i in 1:1000)
{
	error.vec	<- rnorm(10,sd = 3)
	y.1		<- 2 + 1.5*x + error.vec
	lm.1		<- lm(y.1 ~ x)

	y.hat.nrm[i]	<- sum(lm.1$fit^2)
	epsilon.nrm[i]	<- sum(error.vec^2)
	SSE[i]		<- sum(lm.1$res^2)
	SSR[i]		<- sum((lm.1$fit-mean(y.1))^2)
}


boxplot(list(y.hat.nrm,epsilon.nrm,SSE,SSR))


x.vec <- seq(0,300,length=5000)
#x.vec <- seq(0,80,length=5000)

plot(x.vec,pchisq(x.vec,df=10),col=3,lty=2,type="l",ylim=c(0,1))
lines(x.vec,pchisq(x.vec,df=8),col=4,lty=2,type="l")

lines(ecdf(y.hat.nrm/9),col=2)
lines(ecdf(epsilon.nrm/9),col=3)
lines(ecdf(SSE/9),col=4)
lines(ecdf(SSR/9),col=5)


#	y.hat.nrm and SSR have non-central chi-squared distribution

lines(x.vec,pchisq(x.vec,df=2,ncp = sum((2+1.5*x)^2)/9),col=2,lty=2)
lines(x.vec,pchisq(x.vec,df=1,ncp = sum( ((2+1.5*x) - mean(2+1.5*x))^2)/9),col=5,lty=2)


# K-S tests showing that epsilon.nrm/9 has 10-df chi-squared distribution
  
ks.test(epsilon.nrm/9,"pchisq",df=9)
ks.test(epsilon.nrm/9,"pchisq",df=10)
ks.test(epsilon.nrm/9,"pchisq",df=11)


#	Simulation 2: beta_0 = 2 and beta_1 = 0  -- an example in which SSR is central chi-squared

for(i in 1:1000)
{
	error.vec	<- rnorm(10,sd = 3)
	y.2		<- 2 + 0 *x + error.vec
	lm.2		<- lm(y.2 ~ x)

	y.hat.nrm[i]	<- sum(lm.2$fit^2)
	epsilon.nrm[i]	<- sum(error.vec^2)
	SSE[i]		<- sum(lm.2$res^2)
	SSR[i]		<- sum((lm.2$fit-mean(y.2))^2)
}

x.vec <- seq(0,40,length=5000)
plot(x.vec,pchisq(x.vec,df=10),col=3,lty=2,type="l",ylim=c(0,1))
lines(x.vec,pchisq(x.vec,df=8),col=4,lty=2,type="l")
lines(x.vec,pchisq(x.vec,df=1),col=5,lty=2,type="l")

lines(ecdf(y.hat.nrm/9),col=2)
lines(ecdf(epsilon.nrm/9),col=3)
lines(ecdf(SSE/9),col=4)
lines(ecdf(SSR/9),col=5)


#	Simulation 3: beta_0 = 0 and beta_1 = 0  -- an example in which y.hat.nrn. is central chi-squared

for(i in 1:1000)
{
	error.vec	<- rnorm(10,sd = 3)
	y.3		<- 0 + 0 *x + error.vec
	lm.3		<- lm(y.3 ~ x)

	y.hat.nrm[i]	<- sum(lm.3$fit^2)
	epsilon.nrm[i]	<- sum(error.vec^2)
	SSE[i]		<- sum(lm.3$res^2)
	SSR[i]		<- sum((lm.3$fit-mean(y.3))^2)
}




plot(x.vec,pchisq(x.vec,df=10),col=3,lty=2,type="l",ylim=c(0,1))
lines(x.vec,pchisq(x.vec,df=8),col=4,lty=2,type="l")
lines(x.vec,pchisq(x.vec,df=2),col=2,lty=2,type="l")
lines(x.vec,pchisq(x.vec,df=1),col=5,lty=2,type="l")

lines(ecdf(y.hat.nrm/9),col=2)
lines(ecdf(epsilon.nrm/9),col=3)
lines(ecdf(SSE/9),col=4)
lines(ecdf(SSR/9),col=5)



#	Simulation 4: linear model in which SSE is non-central chi-squared

for(i in 1:1000)
{
	error.vec	<- rnorm(10,sd = 3)
	y.4		<- 1 + 0.2 * x^2 + error.vec
	lm.4		<- lm(y.4 ~ x)

	SSE[i]		<- sum(lm.4$res^2)
}


plot(x.vec,pchisq(x.vec,df=8),col=4,lty=2,type="l",ylim=c(0,1))

lines(ecdf(SSE/9),col=4)


# Question: what is the non-centrality parameter?



		###############################
		#					#
		#	    Question 3		#
		#					#
		###############################


A.mat		<- cbind(c(1,2),c(0,3),c(-2,-4))
mu.vec	<- c(3,1)


A.mat %*% t(A.mat)

#Z.smp	<- array(rnorm(3*10^5),dim=c(10^5,3))
Z.smp	<- array(rnorm(3*10^2),dim=c(10^2,3))

W.smp	<- A.mat %*% t(Z.smp) + mu.vec 

cov(t(W.smp))
apply(W.smp,1,mean)

V.1	<- -Z.smp[,1]
V.2	<- ifelse(abs(Z.smp[,1])<= 1, Z.smp[,1], - Z.smp[,1])

hist(V.2)

plot(V.1,V.2)
abline(0,1,col=2)

plot(V.1,V.1 + V.2)
abline(0,1,col=2)
abline(h = 0,col=2)


hist(V.1 + V.2)