# Targil 7, Question 1 y <- c(1.73,1,1.73,2,3,4.36,2.65,2.65,3.32) x <- c(0.25,0.22,0.36,0.23,0.37,0.46,0.27,0.44,0.31) lm.1 <- lm(y~x) # full model = intercept + slope lm.2 <- lm(y~x+0) # no intercept model sum(x*y) / sum(x^2) # estimate of beta summary(lm.2) sum(lm.1$res) # sum of full model residuals sum(lm.2$res) # sum of no-intercept model residuals MSE <- sum(lm.2$res^2) / (9-1) # MSE MSE sqrt(MSE) # check if result same as R output MSE / sum(x^2) # beta variance estimator vcov(lm.2) # R estimated coeff. covariance matrix sqrt(vcov(lm.2)) # beta std - matching R output # Targil 7, Question 2 #la.rent <- read.table("D:/Courses/regression/data sets/LARENT.txt",header=F) rent.price <- la.rent[,1] area.bath <- la.rent[,6] dist.beach <- la.rent[,8] lm.1 <- lm(rent.price ~ area.bath + dist.beach) # aleph SSE <- sum((rent.price - lm.1$fitted)^2) SSR <- sum((lm.1$fitted - mean(rent.price))^2) SST <- sum((rent.price - mean(rent.price))^2) SSE ; SSR ; SSE + SSR SST # bet MSE <- SSE / (26-3) MSR <- SSR / 2 MSR / MSE qf(0.95,2,23) summary(lm.1) # gimel y <- rent.price x.mat <- cbind(intercept=rep(1,26),area.bath,dist.beach) solve( t(x.mat) %*% x.mat ) %*% t(x.mat) %*% y MSE * solve( t(x.mat) %*% x.mat ) vcov(lm.1) # Targil 7: Question 4 x.1 <- la.rent[,2] x.2 <- la.rent[,3] x.3 <- la.rent[,4] ssr.0 <- rep(NA,1000) ssr.1 <- rep(NA,1000) ssr.2 <- rep(NA,1000) nrm2.0 <- rep(NA,1000) nrm2.1 <- rep(NA,1000) nrm2.2 <- rep(NA,1000) for(i in 1:1000) { y <- rnorm(26) y.pred <- lm(y ~ x.1 + x.2 + x.3)$fit ssr.0[i] <- sum((y.pred-mean(y))^2) nrm2.0[i] <- sum(y.pred^2) } for(i in 1:1000) { y <- rnorm(26) + .25 y.pred <- lm(y ~ x.1 + x.2 + x.3)$fit ssr.1[i] <- sum((y.pred-mean(y))^2) nrm2.1[i] <- sum(y.pred^2) } for(i in 1:1000) { y <- rnorm(26) - 1.8 + 0.005*x.1 y.pred <- lm(y ~ x.1 + x.2 + x.3)$fit ssr.2[i] <- sum((y.pred-mean(y))^2) nrm2.2[i] <- sum(y.pred^2) } boxplot(list(ssr.0,ssr.1,ssr.2,nrm2.0,nrm2.1,nrm2.2)) # SSR cdfs x.vec <- seq(0,15,length=5000) q.vec <- ppoints(1000) plot(x.vec,pchisq(x.vec,df=3),col=3,lty=2,type="l",ylim=c(0,1)) lines(x.vec,pchisq(x.vec,df=4),col=4,lty=2,type="l",ylim=c(0,1)) lines(sort(ssr.0),q.vec,col=2) lines(sort(ssr.1),q.vec,col=5) lines(sort(ssr.2),q.vec,col=6) # NRM^2 cdfs plot(x.vec,pchisq(x.vec,df=3),col=3,lty=2,type="l",ylim=c(0,1)) lines(x.vec,pchisq(x.vec,df=4),col=4,lty=2,type="l",ylim=c(0,1)) lines(sort(nrm2.0),q.vec,col=2) lines(sort(nrm2.1),q.vec,col=5) lines(sort(nrm2.2),q.vec,col=6) # K-S tests ks.test(ssr.0,"pchisq",df=3) ks.test(ssr.0,"pchisq",df=4) ks.test(ssr.1,"pchisq",df=3) ks.test(ssr.1,"pchisq",df=4) ks.test(ssr.2,"pchisq",df=3) ks.test(ssr.2,"pchisq",df=4) ks.test(nrm2.0,"pchisq",df=3) ks.test(nrm2.0,"pchisq",df=4) ks.test(nrm2.1,"pchisq",df=3) ks.test(nrm2.1,"pchisq",df=4) ks.test(nrm2.2,"pchisq",df=3) ks.test(nrm2.2,"pchisq",df=4) # What is the distriubtion of the non chisquare sum-of-squares? lines(x.vec,pchisq(x.vec,df=4,ncp=sum(rep(.25,26)^2)),col=5,lty=2,type="l",ylim=c(0,1)) lines(x.vec,pchisq(x.vec,df=4,ncp=sum( (-1.8 + 0.005*x.1)^2)),col=6,lty=2,type="l",ylim=c(0,1)) # non-central chisquare K-S for nrm2.1 mn.vec <- rep(.25,26) ks.test(nrm2.1,"pchisq",df=4,ncp=sum(mn.vec^2)) # non-central chisquare K-S for nrm2.2 and ssr.2 mn.vec <- -1.8 + 0.005*x.1 ks.test(nrm2.2,"pchisq",df=4,ncp=sum(mn.vec^2)) ks.test(ssr.2,"pchisq",df=3,ncp=sum(mn.vec^2)) ks.test(ssr.2,"pchisq",df=3,ncp=sum((mn.vec-mean(mn.vec))^2))