setwd("C:/Users/user/R Scripts/Linear Models/Data Files") library(leaps) library(MASS) par(pty = "s", oma = c(0., 0., 4., 0.)) cars <- read.table("Cars.dat", col.names = c("S", "C", "tt", "G", "disp", "hp", "cb", "drat", "wt", "qmt", "mpg")) # print(cars) attach(cars) S <- as.factor(S) tt <- as.factor(tt) n<-length(mpg) par(mfrow = c(2., 2.)) plot(disp, mpg) plot(hp, mpg) plot(drat, mpg) plot(wt, mpg) # print(cor(cbind(log(disp), log(wt), log(hp), log(drat)))) # print(1. - 1./diag(solve(cor(cbind(log(disp), log(wt), log(hp), log( # drat)))))) mtext("Data on the original scale", 3., out = T) ldisp <- log(disp) lhp <- log(hp) lwt <- log(wt) ldrat <- log(drat) plot(ldisp, mpg) plot(lhp, mpg) plot(ldrat, mpg) plot(lwt, mpg) mtext("Data on the log scale", 3., out = T) #------------------------------------------------------------------- # MAIN EFFECTS LINEAR MODEL #------------------------------------------------------------------ # # car.lm <- lm(mpg ~ S + C + tt + G + disp + hp + cb + drat + wt, model = T) print(summary(car.lm, correlation = F)) par(mfrow = c(2., 2.)) plot(car.lm, which=c(1:2,4)) boxcox(car.lm) #------------------------------------------------------------------ # LOG-MAIN EFFECTS LINEAR MODEL #------------------------------------------------------------------- # # lcar.lm <- lm(mpg ~ S + C + tt + G + ldisp + lhp + cb + ldrat + lwt, model = T) print(summary(lcar.lm, correlation = F)) par(mfrow = c(2., 2.)) plot(lcar.lm,which=c(1:2,4)) boxcox(lcar.lm) #------------------------------------------------------------------- # MAIN EFFECTS LOG-LOG LINEAR MODEL #------------------------------------------------------------------- # # lmpg <- log(mpg) # llcar.lm <- lm(lmpg ~ S + C + tt + G + ldisp + lhp + cb + ldrat + lwt, # model = T) llcar.lm <- lm(lmpg ~ S + C + tt + G + ldisp + lhp + cb + ldrat + lwt, model = T) print(summary(llcar.lm, correlation = F)) par(mfrow = c(2., 2.)) plot(llcar.lm,which=c(1:2,4)) #------------------------------------------------------------------ # ALL POSSIBLE REGRESSIONS #------------------------------------------------------------------- all.car<-regsubsets(lmpg ~ S + C + tt + G + ldisp + lhp + cb + ldrat + lwt, data=cars,method="exhaustive",nbest=1,nvmax=10) all.car.summary <- summary(all.car) rss=all.car.summary$rss r2=all.car.summary$rsq adjr2=all.car.summary$adjr2 cp=all.car.summary$cp bic=all.car.summary$bic ric=n*log(rss/n)+2*log(10)*(2:10) print(round(cbind(all.car.summary$which[,-1],rss,r2,adjr2,cp,bic,ric),3)) lcar.all<-lm(lmpg~lhp+lwt,model=T) print(summary(lcar.all, correlation = F)) par(pty = "s", oma = c(0., 0., 4., 0.)) par(mfrow = c(2., 2.)) plot(lcar.all,which=c(1:2,4)) print(anova(llcar.lm,lcar.all)) #------------------------------------------------------------------- # BACKWARD ELIMINATION #------------------------------------------------------------------- # # llcar.lm <- lm(lmpg ~ S + C + tt + G + ldisp + lhp + cb + ldrat + lwt, model = T) # scale<-summary(llcar.lm)$sigma^2 scale<-0 step.car <- step(llcar.lm, trace = T, scale = scale) print(step.car$anova) print(summary(step.car)) print(anova(step.car,lcar.all)) print(anova(step.car,llcar.lm)) #------------------------------------------------------------------ # FORWARD SELECTION #------------------------------------------------------------------- # # # step.car <- step(init.lm, scope = list(upper = llcar.lm), direction = # "forward", trace = F) init.lm <- lm(log(mpg) ~ 1., model = T) step.car <- step(init.lm, scope = list(upper = update(llcar.lm, ~ . + lhp * lwt)), direction = "forward", trace = F, scale = scale) # print(summary(step.car, correlation = F)) # print(step.car$anova) #------------------------------------------------------------------- # STEPWISE PROCEDURE #------------------------------------------------------------------- # print(summary(step.car, correlation = F)) step.car <- step(llcar.lm, scope = list(lower = init.lm, upper = ~ .^2.), direction = "both", , scale = scale , trace = F) print(step.car$anova) step1.car <- update(step.car, ~ + .^2) print(anova(step1.car, step.car)) print(summary(step.car, correlation = F)) par(mfrow = c(2., 2.)) plot(step.car,which=c(1:2,4)) #------------------------------------------------------------------- # MAIN EFFECTS LOG-LOG MODEL WITH OMITTED INFLUENCIAL OBSERVATION #------------------------------------------------------------------- # # par(mfrow=c(2,2)) weights <- rep(1., length(mpg)) weights[4.] <- 0. all1.car<-regsubsets(lmpg ~ S + C + tt + G + ldisp + lhp + cb + ldrat + lwt, data=cars,method="exhaustive",nbest=1,nvmax=10,weights=weights) all1.car.summary <- summary(all1.car) rss=all1.car.summary$rss r2=all1.car.summary$rsq adjr2=all1.car.summary$adjr2 cp=all1.car.summary$cp bic=all1.car.summary$bic ric=n*log(rss/n)+2*log(10)*(2:10) print(round(cbind(all1.car.summary$which[,-1],rss,r2,adjr2,cp,bic,ric),3)) lcar1.all<-lm(lmpg~lhp+lwt,model=T,subset=(weights>0)) print(summary(lcar1.all, correlation = F)) par(pty = "s", oma = c(0., 0., 4., 0.)) par(mfrow = c(2., 2.)) plot(lcar1.all,which=c(1:2,4)) # llcar1.lm <- update(llcar.lm, subset = (weights > 0.)) # step1.car <- step(llcar1.lm, trace = F) # print(step1.car$anova) # print(summary(step1.car, correlation = F)) #--------------------------------------------------------------------- # LASSO #-------------------------------------------------------------------- # library(glmnet) par(mfrow=c(2,2)) grid <- seq(0,.5,.01) lcar.lasso0 <- glmnet(model.matrix(llcar.lm),lmpg,alpha=1,lambda=grid) plot(lcar.lasso0,xvar=c("lambda"),label=TRUE) lcar.lasso.cv <- cv.glmnet(model.matrix(llcar.lm),lmpg,alpha=1,nfolds=n) plot(lcar.lasso.cv) mtext("Lasso",3, out=T) lambda.cv <- lcar.lasso.cv$lambda.min cat("lambda.cv =", lambda.cv,"\n\n") lcar.lasso <- glmnet(model.matrix(llcar.lm),lmpg,alpha=1,lambda=lambda.cv) print(round(predict(lcar.lasso,s=lambda.cv,type="coefficients"),5)) lcar.lasso.pred <- predict(lcar.lasso,s=lambda.cv,newx=model.matrix(llcar.lm)) rss.lasso <- sum((lmpg-lcar.lasso.pred)^2) cv.lasso <- min(lcar.lasso.cv$cvm) cat("RSS =",round(rss.lasso,5), "CV =", round(cv.lasso,5), "\n\n") #---------------------------------------------------------------------- # SLOPE #---------------------------------------------------------------------- library(SLOPE) par(mfrow=c(2,2)) lcar.slope <- SLOPE(cbind(S,C,tt,G,ldisp,lhp,cb,ldrat,lwt),lmpg) #---------------------------------------------------------------------- # PRINCIPLE COMPONENT REGRESSION #---------------------------------------------------------------------- par(mfrow=c(2,2)) library(pls) lcar.pcr <- pcr(lmpg~S + C + tt + G + ldisp + lhp + cb + ldrat + lwt,scale=T,validation="LOO") # lcar.pcr <- pcr(lmpg~ldisp+lhp+ldrat+lwt,scale=T) print(lcar.pcr$projection) print(summary(lcar.pcr)) rss.pcr <- sum((lcar.pcr$residuals[,,3])^2) cv.pcr=lcar.pcr$validation$PRESS[3]/n cat("3 components: RSS =",round(rss.pcr,5),"CV =", round(cv.pcr,5),"\n\n") plot(lcar.pcr$fitted[,,3],lcar.pcr$residuals[,,3],xlab="Residuals",ylab="Fitted values") qqnorm(lcar.pcr$residuals[,,3]) mtext("Principal component regression", side=3,cex=1.5,outer=T,line=-1) #---------------------------------------------------------------------- # PARTIAL LEAST SQUARES #---------------------------------------------------------------------- # par(mfrow=c(2,2)) lcar.plsr <- plsr(lmpg~S + C + tt + G + ldisp + lhp + cb +ldrat + lwt,scale=T, validation="LOO") # lcar.pr <- plsr(lmpg~ldisp+lhp+ldrat+lwt,scale=T) print(lcar.plsr$projection) print(summary(lcar.plsr)) rss.plsr <- sum((lcar.plsr$residuals[,,3])^2) cv.plsr=lcar.plsr$validation$PRESS[3]/n cat("3 components: RSS =",round(rss.plsr,5),"CV =", round(cv.plsr,5),"\n\n") plot(lcar.plsr$fitted[,,3],lcar.plsr$residuals[,,3],xlab="Residuals",ylab="Fitted values") qqnorm(lcar.plsr$residuals[,,3]) mtext("Partial least squares", side=3,outer=T,line=-36,cex=1.5)