# 	Targil 7, Question 1


y <- c(1.73,1,1.73,2,3,4.36,2.65,2.65,3.32)
x <- c(0.25,0.22,0.36,0.23,0.37,0.46,0.27,0.44,0.31)

lm.1 <- lm(y~x)  			#  full model = intercept + slope 
lm.2 <- lm(y~x+0)			#  no intercept model

sum(x*y) / sum(x^2)		# estimate of beta
summary(lm.2)

sum(lm.1$res)			# sum of full model residuals
sum(lm.2$res)			# sum of no-intercept model residuals

MSE <- sum(lm.2$res^2) / (9-1)  # MSE
MSE  
sqrt(MSE)				  # check if result same as R output

MSE / sum(x^2)			  # beta variance estimator	
vcov(lm.2)				  # R estimated coeff. covariance matrix
sqrt(vcov(lm.2))			  # beta std  - matching R output	
 	




# 	Targil 7, Question 2


#la.rent <- read.table("D:/Courses/regression/data sets/LARENT.txt",header=F)

rent.price <- la.rent[,1]
area.bath  <- la.rent[,6]
dist.beach <- la.rent[,8]

lm.1 <- lm(rent.price ~ area.bath + dist.beach)

# aleph

SSE <- sum((rent.price  - lm.1$fitted)^2)
SSR <- sum((lm.1$fitted - mean(rent.price))^2)
SST <- sum((rent.price  - mean(rent.price))^2)

SSE ; SSR ; SSE + SSR
SST

# bet 

MSE <- SSE / (26-3)
MSR <- SSR / 2

MSR / MSE
qf(0.95,2,23)

summary(lm.1)

# gimel 

y <- rent.price
x.mat <- cbind(intercept=rep(1,26),area.bath,dist.beach)

solve( t(x.mat) %*% x.mat ) %*% t(x.mat) %*% y
MSE * solve( t(x.mat) %*% x.mat )

vcov(lm.1)




#   Targil 7:  Question 4


x.1 <-  la.rent[,2]
x.2 <-  la.rent[,3]
x.3 <-  la.rent[,4]

ssr.0  <- rep(NA,1000)
ssr.1  <- rep(NA,1000)
ssr.2  <- rep(NA,1000)

nrm2.0 <- rep(NA,1000)
nrm2.1 <- rep(NA,1000)
nrm2.2 <- rep(NA,1000)

for(i in 1:1000)
{
	y <- rnorm(26) 
	y.pred <- lm(y ~ x.1 + x.2 + x.3)$fit
	ssr.0[i] <- sum((y.pred-mean(y))^2)
	nrm2.0[i] <- sum(y.pred^2)
}

for(i in 1:1000)
{
	y <- rnorm(26) + .25
	y.pred <- lm(y ~ x.1 + x.2 + x.3)$fit
	ssr.1[i] <- sum((y.pred-mean(y))^2)
	nrm2.1[i] <- sum(y.pred^2)
}

for(i in 1:1000)
{
	y <- rnorm(26)  - 1.8  + 0.005*x.1
	y.pred <- lm(y ~ x.1 + x.2 + x.3)$fit
	ssr.2[i] <- sum((y.pred-mean(y))^2)
	nrm2.2[i] <- sum(y.pred^2)
}


boxplot(list(ssr.0,ssr.1,ssr.2,nrm2.0,nrm2.1,nrm2.2))

#  SSR cdfs


x.vec <- seq(0,15,length=5000)
q.vec <- ppoints(1000)

plot(x.vec,pchisq(x.vec,df=3),col=3,lty=2,type="l",ylim=c(0,1))
lines(x.vec,pchisq(x.vec,df=4),col=4,lty=2,type="l",ylim=c(0,1))

lines(sort(ssr.0),q.vec,col=2)
lines(sort(ssr.1),q.vec,col=5)
lines(sort(ssr.2),q.vec,col=6)

#  NRM^2 cdfs

plot(x.vec,pchisq(x.vec,df=3),col=3,lty=2,type="l",ylim=c(0,1))
lines(x.vec,pchisq(x.vec,df=4),col=4,lty=2,type="l",ylim=c(0,1))

lines(sort(nrm2.0),q.vec,col=2)
lines(sort(nrm2.1),q.vec,col=5)
lines(sort(nrm2.2),q.vec,col=6)



# K-S tests


ks.test(ssr.0,"pchisq",df=3)
ks.test(ssr.0,"pchisq",df=4)

ks.test(ssr.1,"pchisq",df=3)
ks.test(ssr.1,"pchisq",df=4)

ks.test(ssr.2,"pchisq",df=3)
ks.test(ssr.2,"pchisq",df=4)

ks.test(nrm2.0,"pchisq",df=3)
ks.test(nrm2.0,"pchisq",df=4)

ks.test(nrm2.1,"pchisq",df=3)
ks.test(nrm2.1,"pchisq",df=4)

ks.test(nrm2.2,"pchisq",df=3)
ks.test(nrm2.2,"pchisq",df=4)


# What is the distriubtion of the non chisquare sum-of-squares?


lines(x.vec,pchisq(x.vec,df=4,ncp=sum(rep(.25,26)^2)),col=5,lty=2,type="l",ylim=c(0,1))
lines(x.vec,pchisq(x.vec,df=4,ncp=sum( (-1.8 + 0.005*x.1)^2)),col=6,lty=2,type="l",ylim=c(0,1))



# non-central chisquare K-S for nrm2.1

mn.vec <- rep(.25,26)
ks.test(nrm2.1,"pchisq",df=4,ncp=sum(mn.vec^2))


# non-central chisquare K-S for nrm2.2 and ssr.2

mn.vec <- -1.8 + 0.005*x.1
ks.test(nrm2.2,"pchisq",df=4,ncp=sum(mn.vec^2))

ks.test(ssr.2,"pchisq",df=3,ncp=sum(mn.vec^2))
ks.test(ssr.2,"pchisq",df=3,ncp=sum((mn.vec-mean(mn.vec))^2))