setwd("C:/Users/user/R Scripts/Linear Models/Data Files")

      library(leaps)
      library(MASS)
      

	par(pty = "s", oma = c(0., 0., 4., 0.))
	cars <- read.table("Cars.dat", col.names = c("S", "C", "tt", "G", 
		"disp", "hp", "cb", "drat", "wt", "qmt", "mpg"))
	#	print(cars)
	attach(cars)
	S <- as.factor(S)
	tt <- as.factor(tt)
      n<-length(mpg)
	par(mfrow = c(2., 2.))
	plot(disp, mpg)
	plot(hp, mpg)
	plot(drat, mpg)
	plot(wt, mpg)
#	print(cor(cbind(log(disp), log(wt), log(hp), log(drat))))
#	print(1. - 1./diag(solve(cor(cbind(log(disp), log(wt), log(hp), log(
#		drat))))))
	mtext("Data on the original scale", 3., out = T)
	ldisp <- log(disp)
	lhp <- log(hp)
	lwt <- log(wt)
	ldrat <- log(drat)
	plot(ldisp, mpg)
	plot(lhp, mpg)
	plot(ldrat, mpg)
	plot(lwt, mpg)
	mtext("Data on the log scale", 3., out = T)
	#-------------------------------------------------------------------
	#                 MAIN EFFECTS LINEAR MODEL
	#------------------------------------------------------------------
	#
	#	
	car.lm <- lm(mpg ~ S + C + tt + G + disp + hp + cb + drat + wt, model
		 = T)
	print(summary(car.lm, correlation = F))
	par(mfrow = c(2., 2.))
	plot(car.lm, which=c(1:2,4))
	boxcox(car.lm)
	#------------------------------------------------------------------
	#                 LOG-MAIN EFFECTS LINEAR MODEL
	#-------------------------------------------------------------------
	#
	#
	lcar.lm <- lm(mpg ~ S + C + tt + G + ldisp + lhp + cb + 
		ldrat + lwt, model = T)
	print(summary(lcar.lm, correlation = F))
	par(mfrow = c(2., 2.))
	plot(lcar.lm,which=c(1:2,4))
	boxcox(lcar.lm)
	#-------------------------------------------------------------------
	#                 MAIN EFFECTS LOG-LOG LINEAR MODEL
	#-------------------------------------------------------------------
	#
	#
	lmpg <- log(mpg)
	#	llcar.lm <- lm(lmpg ~ S + C + tt + G + ldisp + lhp + cb + ldrat + lwt, 
	#		model = T)
	llcar.lm <- lm(lmpg ~ S + C + tt + G + ldisp + lhp + cb +
		ldrat + lwt, model = T)
	print(summary(llcar.lm, correlation = F))
	par(mfrow = c(2., 2.))
	plot(llcar.lm,which=c(1:2,4))
      #------------------------------------------------------------------
      #                  ALL POSSIBLE REGRESSIONS
      #-------------------------------------------------------------------

      all.car<-regsubsets(lmpg ~ S + C + tt + G + ldisp + lhp + cb +
                ldrat + lwt, data=cars,method="exhaustive",nbest=1,nvmax=10)
      all.car.summary <- summary(all.car)
      rss=all.car.summary$rss
      r2=all.car.summary$rsq
      adjr2=all.car.summary$adjr2
      cp=all.car.summary$cp
      bic=all.car.summary$bic
      ric=n*log(rss/n)+2*log(10)*(2:10)
      print(round(cbind(all.car.summary$which[,-1],rss,r2,adjr2,cp,bic,ric),3))

      lcar.all<-lm(lmpg~lhp+lwt,model=T)
	print(summary(lcar.all, correlation = F))
	par(pty = "s", oma = c(0., 0., 4., 0.))
	par(mfrow = c(2., 2.))
	plot(lcar.all,which=c(1:2,4))
  
      print(anova(llcar.lm,lcar.all))

	#-------------------------------------------------------------------
	#                      BACKWARD ELIMINATION
	#-------------------------------------------------------------------
	#
	#
	llcar.lm <- lm(lmpg ~ S + C + tt + G + ldisp + lhp + cb +
		ldrat + lwt, model = T)
	#	scale<-summary(llcar.lm)$sigma^2
		scale<-0
	step.car <- step(llcar.lm, trace = T, scale = scale)
	print(step.car$anova)
		print(summary(step.car))	
      print(anova(step.car,lcar.all))
      print(anova(step.car,llcar.lm))
	#------------------------------------------------------------------
	#                      FORWARD SELECTION
	#-------------------------------------------------------------------
	#
	#
	#		step.car <- step(init.lm, scope = list(upper = llcar.lm), direction = 
	#		"forward", trace = F)	
	init.lm <- lm(log(mpg) ~ 1., model = T)
	step.car <- step(init.lm, scope = list(upper = update(llcar.lm,  ~ . +
		lhp * lwt)), direction = "forward", trace = F, scale = scale)
	#	print(summary(step.car, correlation = F))	#
	print(step.car$anova)
	#-------------------------------------------------------------------
	#                      STEPWISE PROCEDURE
	#-------------------------------------------------------------------
	#
	print(summary(step.car, correlation = F))
	step.car <- step(llcar.lm, scope = list(lower = init.lm, upper =  ~
		.^2.), direction = "both",  , scale = scale
		, trace = F)
	print(step.car$anova)
		step1.car <- update(step.car,  ~  + .^2)
		print(anova(step1.car, step.car))
	print(summary(step.car, correlation = F))
	par(mfrow = c(2., 2.))
	plot(step.car,which=c(1:2,4))
	#-------------------------------------------------------------------
	#   MAIN EFFECTS LOG-LOG MODEL WITH OMITTED INFLUENCIAL OBSERVATION
	#-------------------------------------------------------------------
	#
	#
      par(mfrow=c(2,2))
	weights <- rep(1., length(mpg))
	weights[4.] <- 0.
      all1.car<-regsubsets(lmpg ~ S + C + tt + G + ldisp + lhp + cb +
                ldrat + lwt, data=cars,method="exhaustive",nbest=1,nvmax=10,weights=weights)
      all1.car.summary <- summary(all1.car)
      rss=all1.car.summary$rss
      r2=all1.car.summary$rsq
      adjr2=all1.car.summary$adjr2
      cp=all1.car.summary$cp
      bic=all1.car.summary$bic
      ric=n*log(rss/n)+2*log(10)*(2:10)
      print(round(cbind(all1.car.summary$which[,-1],rss,r2,adjr2,cp,bic,ric),3))

      lcar1.all<-lm(lmpg~lhp+lwt,model=T,subset=(weights>0))
	print(summary(lcar1.all, correlation = F))
	par(pty = "s", oma = c(0., 0., 4., 0.))
	par(mfrow = c(2., 2.))
	plot(lcar1.all,which=c(1:2,4))



#	llcar1.lm <- update(llcar.lm, subset = (weights > 0.))
#	step1.car <- step(llcar1.lm, trace = F)
#	print(step1.car$anova)
#	print(summary(step1.car, correlation = F))



      #---------------------------------------------------------------------
      #                   LASSO
      #--------------------------------------------------------------------
      #
      library(glmnet)
      par(mfrow=c(2,2)) 
      grid <- seq(0,.5,.01)
      lcar.lasso0 <- glmnet(model.matrix(llcar.lm),lmpg,alpha=1,lambda=grid)
      plot(lcar.lasso0,xvar=c("lambda"),label=TRUE)
      lcar.lasso.cv <- cv.glmnet(model.matrix(llcar.lm),lmpg,alpha=1,nfolds=n)
      plot(lcar.lasso.cv)
      mtext("Lasso",3, out=T)
      lambda.cv <- lcar.lasso.cv$lambda.min
      cat("lambda.cv =", lambda.cv,"\n\n")
      lcar.lasso <- glmnet(model.matrix(llcar.lm),lmpg,alpha=1,lambda=lambda.cv)
      print(round(predict(lcar.lasso,s=lambda.cv,type="coefficients"),5))

      lcar.lasso.pred <- predict(lcar.lasso,s=lambda.cv,newx=model.matrix(llcar.lm)) 
      rss.lasso <- sum((lmpg-lcar.lasso.pred)^2)
      cv.lasso <- min(lcar.lasso.cv$cvm)
      cat("RSS =",round(rss.lasso,5), "CV =", round(cv.lasso,5), "\n\n")

      #----------------------------------------------------------------------
      #                       SLOPE
      #----------------------------------------------------------------------     
      library(SLOPE)
      par(mfrow=c(2,2))
      lcar.slope <- SLOPE(cbind(S,C,tt,G,ldisp,lhp,cb,ldrat,lwt),lmpg)
 
  

      #----------------------------------------------------------------------
      #                     PRINCIPLE COMPONENT REGRESSION
      #----------------------------------------------------------------------
      par(mfrow=c(2,2))
      library(pls)
      lcar.pcr <- pcr(lmpg~S + C + tt + G + ldisp + lhp + cb + ldrat + lwt,scale=T,validation="LOO")
#     lcar.pcr <- pcr(lmpg~ldisp+lhp+ldrat+lwt,scale=T)
      print(lcar.pcr$projection)
      print(summary(lcar.pcr))
      rss.pcr <- sum((lcar.pcr$residuals[,,3])^2)
      cv.pcr=lcar.pcr$validation$PRESS[3]/n
      cat("3 components:  RSS =",round(rss.pcr,5),"CV =", round(cv.pcr,5),"\n\n")
      plot(lcar.pcr$fitted[,,3],lcar.pcr$residuals[,,3],xlab="Residuals",ylab="Fitted values")
      qqnorm(lcar.pcr$residuals[,,3])
      mtext("Principal component regression", side=3,cex=1.5,outer=T,line=-1)

      
      #----------------------------------------------------------------------
      #                        PARTIAL LEAST SQUARES
      #----------------------------------------------------------------------
 #    par(mfrow=c(2,2))  
      lcar.plsr <- plsr(lmpg~S + C + tt + G + ldisp + lhp + cb +ldrat + lwt,scale=T, validation="LOO")
 #     lcar.pr <- plsr(lmpg~ldisp+lhp+ldrat+lwt,scale=T)
      print(lcar.plsr$projection)
      print(summary(lcar.plsr))
      rss.plsr <- sum((lcar.plsr$residuals[,,3])^2)
      cv.plsr=lcar.plsr$validation$PRESS[3]/n
      cat("3 components:  RSS =",round(rss.plsr,5),"CV =", round(cv.plsr,5),"\n\n")
            plot(lcar.plsr$fitted[,,3],lcar.plsr$residuals[,,3],xlab="Residuals",ylab="Fitted values")
      qqnorm(lcar.plsr$residuals[,,3])
      mtext("Partial least squares", side=3,outer=T,line=-36,cex=1.5)