#		Question 1

# load MASS package


library(MASS)



y <- c(1.73,1,1.73,2,3,4.36,2.65,2.65,3.32)
x <- c(0.25,0.22,0.36,0.23,0.37,0.46,0.27,0.44,0.31)


y.2 <- c(3,1,3,4,9,19,7,7,11)

boxcox(y.2 ~ x,lambda=seq(-1,2,length=101))  # MASS package function which computes the likelihood 
								# profile for the box-cox transform


#   Question 2

# population.data <- read.table("E:/Courses/regression/data sets/population.txt",header=T)

attach(population.data)
year.sq <- year^2
lm.1 <- lm(pop ~ year + year.sq)


#  2nd order polynomial fit

summary(lm.1)
par(mfcol=c(1,1))
plot(year,pop)
lines(year,lm.1$fit)



#	leverage points    

par(mfcol=c(1,2))
plot(hat(model.matrix(lm.1)),ylab="",main="leverages")
abline(h=2*3/23)
plot(cooks.distance(lm.1),ylab="",main="Cooks distance")
abline(h=1/2)



#	Residual plots

par(mfcol=c(1,3))
plot(lm.1$res,ylab="",main="residuals")
plot(rstandard(lm.1),ylab="",main="standardized res.")
plot(rstudent(lm.1),ylab="",main="studentized res.")

par(mfcol=c(1,3))
boxplot(lm.1$res,ylab="",main="residuals")
boxplot(rstudent(lm.1),ylab="",main="studentized res.")
qqnorm(rstudent(lm.1),ylab="",main="studentized res.")



#    Detect error correlation

par(mfcol=c(1,1))
plot(rstudent(lm.1),type="b")
abline(0,0)

library(tseries)
runs.test(factor(lm.1$res>0))   	

(np	<- sum(lm.1$res > 0))
(nm	<- sum(lm.1$res < 0))

(runs.mn	<- 2*np*nm / (np+nm) + 1)	
(runs.sd	<- sqrt( 2*np*nm*(2*np*nm - np - nm)/((np+nm)^2*(np+nm-1)) ))
 (6 - runs.mn) / runs.sd	

library(lmtest)
dwtest(lm.1,alternative = "two.sided")	




#  Question 3


x <- rnorm(9)
y <- 2 + 3 * x  + rnorm(9)

lm.1 <- lm(y~x)
summary(lm.1)


#  Add mean(x) mean(y) observations to the data as the last (10th) observation

x.mnx <- c(x,mean(x))
y.mny <- c(y,mean(y))

lm.2 <- lm(y.mny~x.mnx)
summary(lm.2)


# Study leverage of 10th observation

par(mfcol=c(1,2))
plot(hat(model.matrix(lm.2)),ylab="",main="leverages")
abline(h=0.2)
plot(cooks.distance(lm.2),ylab="",main="Cooks distance")
abline(h=1/2)

round(cooks.distance(lm.2),6)

x <- model.matrix(lm.2)
h <- x %*% solve(t(x)%*%x) %*% t(x)
h[10,]