# LA rent example la.rent <- read.table("D:/Courses/regression/data sets/LARENT.txt") rent.price <- la.rent[,1] area.bath <- la.rent[,6] dist.beach <- la.rent[,8] dist.ucla <- la.rent[,9] ############################################ lm.12 <- lm(rent.price ~ area.bath + dist.beach) summary(lm.12) y <- rent.price x.mat <- cbind(intercept=rep(1,26),area.bath,dist.beach) solve( t(x.mat) %*% x.mat ) %*% t(x.mat) %*% y MSE <- sum(lm.12$res^2) / (26 - 3) sqrt(diag(MSE*solve( t(x.mat) %*% x.mat ))) ############################################ predict(lm.12,interval = "confidence")[1:4,] (lambda.3 <- model.matrix(lm.12)[3,]) (yhat.3 <- t(lambda.3) %*% lm.1$coef) (se.3 <- sqrt( MSE * t(lambda.3) %*% solve( t(x.mat) %*% x.mat) %*% lambda.3)) yhat.3 - se.3 * qt(0.975,23) yhat.3 + se.3 * qt(0.975,23) ############################################ summary(lm.12) (SST <- sum((y - mean(y))^2)) (SSE <- sum((y - lm.12$fit)^2)) (SSR <- sum((lm.12$fit - mean(y))^2)) (SSE + SSR) (F <- (SSR / 2)/(SSE / 23)) 1-pf(F,2,23) # R-squared 1 - SSE/SST # adjusted R-squared 1 - (SSE/(26-3))/(SST/(26-1)) ############################################ lm.1 <- lm(rent.price ~ area.bath) summary(lm.1) summary(lm.1)$fstatistic sqrt(summary(lm.1)$fstatistic[1]) 2*(1-pt(1.2,13)) 1-pf(1.2^2,1,13) ############################################ lm.123 <- lm(rent.price ~ area.bath + dist.beach + dist.ucla) summary(lm.123) (SSE.1 <- sum((y - lm.1$fit)^2)) (SSE.123 <- sum((y - lm.123$fit)^2)) (F = ((SSE.1 - SSE.123)/(24-22))/(SSE.123/22)) 1 - pf(F,2,22) anova(lm.1,lm.123) ############################################ lm.123 <- lm(rent.price ~ area.bath + dist.beach + dist.ucla) lm.12 <- lm(rent.price ~ area.bath + dist.beach) lm.1 <- lm(rent.price ~ area.bath ) lm.int <- lm(rent.price ~ 1) summary(lm.int) mean(y) sd(y) (SSE.int <- sum((y - lm.int$fit)^2)) var(y)*25 SSE.1 <- sum((y - lm.1$fit)^2) SSE.12 <- sum((y - lm.12$fit)^2) SSE.123 <- sum((y - lm.123$fit)^2) SSE.int - SSE.1 SSE.1 - SSE.12 SSE.12 - SSE.123 SSE.123 anova(lm.123) (SSE.int - SSE.1) / (SSE.123/22) (SSE.1 - SSE.12) / (SSE.123/22) (SSE.12 - SSE.123)/(SSE.123/22) ############################################ vec.1 <- rep(1,26) lm.12.noint <- lm(rent.price ~ vec.1 + area.bath + dist.beach + 0) summary(lm.12) summary(lm.12.noint) anova(lm.12) anova(lm.12.noint) sum((mean(y)*vec.1)^2) (SST.noint <- sum(y^2)) (SSR.noint <- sum(lm.12.noint$fit^2)) (SSE.noint <- sum(lm.12.noint$res^2)) SSR.noint + SSE.noint SSR.noint/SST.noint 1 - (SSE.noint/(26-3))/(SST.noint/26) (SSR.noint/3)/(SSE.noint/(26-3)) ############################################ (SSE.123 <- sum((y - lm.123$fit)^2)) (F = ((SSE.1 - SSE.123)/(24-22))/(SSE.123/22)) anova(lm.1,lm.123) 1 - pf(F,2,22) # Geometric interpretation of R^2 lm.1 <- lm(rent.price ~ area.bath + dist.beach) summary(lm.1) X.1 <- 0.5*area.bath - 3*dist.beach X.2 <- 1.5*area.bath - 14.5*dist.beach lm.1a <- lm(rent.price ~ X.1 + X.2) summary(lm.1a) lm.1$fit[1:4] lm.1a$fit[1:4] (SSE.1 <- sum(lm.1$res^2)) (SSE.1a <- sum(lm.1a$res^2)) (SST <- sum((rent.price - mean(rent.price))^2)) SST - SSE.1 (SSR <- sum((lm.1$fit - mean(rent.price))^2)) SSR / SST # Regression without intercept lm.3 <- lm(rent.price ~ area.bath + dist.beach + 0) summary(lm.3) x.mat <- cbind(area.bath,dist.beach) solve( t(x.mat) %*% x.mat ) %*% t(x.mat) %*% y vec.1 <- rep(1,26) lm.1b <- lm(rent.price ~ vec.1 + area.bath + dist.beach + 0) summary(lm.1b) (SSE.1b <- sum(lm.1b$res^2)) (SST.1b <- sum(rent.price^2)) SST.1b - SSE.1b (SSR.1b <- sum(lm.1b$fit^2)) SSR.1b / SST.1b anova(lm.1) anova(lm.1b) sum(rep(mean(rent.price)^2,26)) sum(anova(lm.1b)[,2]) lm.123 <- lm(rent.price ~ area.bath + dist.beach + dist.ucla) summary(lm.123)