#la.rent <- read.table("C:/Data/courses/regression/data sets/LARENT.txt",header=F) rent.price <- la.rent[,1] area.bath <- la.rent[,6] dist.beach <- la.rent[,8] dist.ucla <- la.rent[,9] vec.1 <- rep(1,26) lm.0 <- lm(rent.price ~ vec.1 + 0) lm.1 <- lm(rent.price ~ area.bath + dist.beach + dist.ucla ) lm.2 <- lm(rent.price ~ vec.1 + area.bath + dist.beach + dist.ucla + 0) summary(lm.1) anova(lm.1) # Excercise 8, question - 1 anova(lm.0) anova(lm.1) anova(lm.2) # ANOVA table of lm.2 has an additional row (the 1st row) sum((rent.price - lm.1$fitted)^2) # SSE for lm.1 sum((rent.price - lm.2$fitted)^2) # SSE for lm.2 # == SS last row of ANOVA table sum((mean(rent.price) - lm.1$fitted)^2) # SSR for lm.1 sum((mean(rent.price) - lm.2$fitted)^2) # SSR for lm.2 sum(anova(lm.2)[[2]][2:4]) # == sum of SS in rows 2-4 in ANOVA table of lm.2 sum((mean(rent.price) - rent.price)^2) # SSR for lm.1 sum(anova(lm.1)[[2]]) # == sum of all SS in ANOVA table sum(rent.price^2) # SSR for lm.2 sum(anova(lm.2)[[2]]) # == sum of all SS in ANOVA table # Excercise 8, question - 2 lm.3 <- lm(rent.price ~ area.bath + dist.ucla + dist.beach) summary(lm.3) anova(lm.3) lm.4 <- lm(rent.price ~ area.bath) anova(lm.4,lm.1) anova(lm.4,lm.1)$RSS anova(lm.4,lm.1)$RSS[1] - anova(lm.4,lm.1)$RSS[2] ( (anova(lm.4,lm.1)$RSS[1] - anova(lm.4,lm.1)$RSS[2]) / 2) / ( anova(lm.4,lm.1)$RSS[2] / 22) # Excercise 8, question - 3 rent.price[20] # Model x-matrices x.mat0 <- cbind(vec.1) x.mat1 <- cbind(vec.1,area.bath,dist.beach,dist.ucla) # Model coeff estimator vectors lm.0$coeff lm.1$coeff lm.2$coeff vcov(lm.0) vcov(lm.1) vcov(lm.2) # ==> models lm.1 & lm.2 are equivalent # Model 20th observation prediction t(x.mat0[20,]) %*% lm.0$coeff t(x.mat1[20,]) %*% lm.1$coeff mn.20.0 <- t(x.mat0[20,]) %*% lm.0$coeff mn.20.1 <- t(x.mat1[20,]) %*% lm.1$coeff # Model 20th observation variance t(x.mat0[20,]) %*% vcov(lm.0) %*% x.mat0[20,] t(x.mat1[20,]) %*% vcov(lm.1) %*% x.mat1[20,] sd.20.0 <- sqrt(t(x.mat0[20,]) %*% vcov(lm.0) %*% x.mat0[20,]) sd.20.1 <- sqrt(t(x.mat1[20,]) %*% vcov(lm.1) %*% x.mat1[20,]) # R - model 20th observation fit and sd lm.0$fit[20] lm.1$fit[20] lm.2$fit[20] sd.20.0 ; sd.20.1 predict(lm.0,se.fit=T) predict(lm.1,se.fit=T) predict(lm.2,se.fit=T) # Model 20th confidence interval for mean of Y given X mn.20.0 ; mn.20.0 + sd.20.0*qt(0.975,25) ; mn.20.0 - sd.20.0*qt(0.975,25) mn.20.1 ; mn.20.1 + sd.20.1*qt(0.975,22) ; mn.20.1 - sd.20.1*qt(0.975,22) # R - Model 20th confidence interval predict(lm.0,interval="confidence")[20,] predict(lm.1,interval="confidence")[20,] rent.price[20] # Model 20th "confidence interval" for new observation predict(lm.1,interval="confidence")[20,] predict(lm.1,interval="prediction")[20,] predict(lm.1,se.fit=T) sd.pred.20.1 <- sqrt(predict(lm.1,se.fit=T)$residual.scale^2 + sd.20.1^2) # residual.scal^2 == MSE mn.20.1 + sd.pred.20.1*qt(0.975,22) ; mn.20.1 - sd.pred.20.1*qt(0.975,22) # Scatter plots of fitted + confidence bands in relation to fitted and index. conf.1 <- predict(lm.1,interval="confidence") pred.1 <- predict(lm.1,interval="prediction") ord <- order(lm.1$fit) plot(lm.1$fit,rent.price) lines(lm.1$fit[ord],conf.1[ord,1],col=3) lines(lm.1$fit[ord],conf.1[ord,2],col=2) lines(lm.1$fit[ord],conf.1[ord,3],col=2) lines(lm.1$fit[ord],pred.1[ord,2],col=4) lines(lm.1$fit[ord],pred.1[ord,3],col=4) conf.0 <- predict(lm.0,interval="confidence") pred.0 <- predict(lm.0,interval="prediction") plot(rent.price,ylim=range(pred.0)) lines(conf.0[,1],col=3) lines(conf.0[,2],col=2) lines(conf.0[,3],col=2) lines(pred.0[,2],col=4) lines(pred.0[,3],col=4) # Scatter plots of fitted + confidence intervals in relation to fitted and explanatory variables. plot(lm.1$fit,rent.price) points(lm.1$fit,pred.1[,1],col=3) segments(pred.1[,1],pred.1[,2],pred.1[,1],pred.1[,3],col=2) plot(area.bath,rent.price) points(area.bath,pred.1[,1],col=3) segments(area.bath,pred.1[,2],area.bath,pred.1[,3],col=2) conf.0 <- predict(lm.0,interval="confidence") pred.0 <- predict(lm.0,interval="prediction") plot(lm.0$fit,rent.price) points(lm.0$fit,conf.0[,1],col=3) segments(conf.0[,1],conf.0[,2],conf.0[,1],conf.0[,3],col=2) plot(lm.0$fit,rent.price) points(lm.0$fit,pred.0[,1],col=3) segments(pred.0[,1],pred.0[,2],pred.0[,1],pred.0[,3],col=2) plot(area.bath,rent.price) points(area.bath,pred.0[,1],col=3) segments(area.bath,pred.0[,2],area.bath,pred.0[,3],col=2)