library(ElemStatLearn)
library(leaps)
library(glmnet)
library(boot)



#	1. Analyze prostate data




xp	<- scale(prostate[,1:8])	# SCALE coeffS!!!!

full.model	<- lm(prostate[,9] ~ xp, subset = prostate$train)

summary(full.model)
nobs(full.model)
logLik(full.model)
AIC(full.model)
BIC(full.model)

SSE <- sum(full.model$res^2)			# Model SSE
- (67/2) * log(2*pi*SSE/67) - 67/2		# Model log-likelihood for the MLE
2*8 - 2*logLik(full.model)			# Model AIC
log(67)*10  - 2*logLik(full.model)		# Model BIC


train <- data.frame(cbind(xp,lpsa = prostate[,9])[prostate[,10] == TRUE,] )
#test  <- data.frame(cbind(xp,lpsa = prostate[,9])[prostate[,10] != TRUE,] )


regfit.all	<- regsubsets(lpsa ~., train,nbest = 1)
reg.summary	<- summary(regfit.all)

t(summary(regfit.all)$which)
round(summary(regfit.all)$adjr2,3)
round(summary(regfit.all)$rss,3)


# Training set error 

MSE.mat	<- array(dim=c(100,8))
MSE.train	<- rep(NA,8)

for(i in 1:8)
{
	col.ind	<- match( names(coef(regfit.all,i)[-1]), names(coef(regfit.all,8)[-1]))
	train.sub  	<- data.frame(cbind(xp[,col.ind],lpsa = prostate[,9])[prostate[,10] == TRUE,] )

	glm.fit		<- glm(lpsa ~ ., data = train.sub)
	MSE.train[i]	<-  summary(glm.fit)$disp
	for(j in 1:100)	MSE.mat[j,i]	<- cv.glm(train.sub,glm.fit,K=10)$delta[1]
}

boxplot(split(MSE.mat,rep(1:8,each = 100)),ylim = c(0.4,0.8))
lines(1:8, MSE.train,type = "b",col =3)

# Test set error

MSE.test	<- rep(NA,8)

for(i in 1:8)
{
	col.ind	<- match( names(coef(regfit.all,i)[-1]), names(coef(regfit.all,8)[-1]))
	test.sub  	<- cbind(1,xp[,col.ind])[!prostate[,10],] 

	MSE.test[i]	<- mean( ( prostate[!prostate[,10],9] - test.sub %*% cbind(coef(regfit.all,i)))^2)	
}

MSE.test




lines(MSE.test,col=2,type = "b")






############################################################################################################################

#	3. Ridge Regression 


x.train	<- model.matrix(lpsa~.,train)[,-1]
y.train	<- train$lpsa

x.test	<- model.matrix(lpsa~.,test)[,-1]
y.test	<- test$lpsa

grid		<- 10^seq(10,-4,length=100)
ridge.mod	<- glmnet(x.train,y.train,alpha=0,lambda=grid)

coef(ridge.mod)[,100]
full.model$coef


#	Use CV to set lambda for Ridge Regression 

#set.seed(1)
cv.out	<- cv.glmnet(x.train,y.train,alpha=0)
plot(cv.out)
(bestlam	<- cv.out$lambda.min)


ridge.pred	<- predict(ridge.mod,s=bestlam,newx=x.test)
mean((ridge.pred-y.test)^2)




############################################################################################################################

#	4. Lasso


lasso.mod	<- glmnet(x.train,y.train,alpha=1,lambda=grid)
plot(lasso.mod)

#set.seed(1)
cv.out	<- cv.glmnet(x.train,y.train,alpha=1)
plot(cv.out)
(bestlam	<- cv.out$lambda.min)

lasso.pred	<- predict(lasso.mod,s=bestlam,newx=x.test)
mean((lasso.pred-y.test)^2)

predict(lasso.mod,type = "coefficients",s=bestlam,newx=x.test)