# Question 1 # load MASS package library(MASS) y <- c(1.73,1,1.73,2,3,4.36,2.65,2.65,3.32) x <- c(0.25,0.22,0.36,0.23,0.37,0.46,0.27,0.44,0.31) y.2 <- c(3,1,3,4,9,19,7,7,11) boxcox(y.2 ~ x,lambda=seq(-1,2,length=101)) # MASS package function which computes the likelihood # profile for the box-cox transform # Question 2 # population.data <- read.table("E:/Courses/regression/data sets/population.txt",header=T) attach(population.data) year.sq <- year^2 lm.1 <- lm(pop ~ year + year.sq) # 2nd order polynomial fit summary(lm.1) par(mfcol=c(1,1)) plot(year,pop) lines(year,lm.1$fit) # leverage points par(mfcol=c(1,2)) plot(hat(model.matrix(lm.1)),ylab="",main="leverages") abline(h=2*3/23) plot(cooks.distance(lm.1),ylab="",main="Cooks distance") abline(h=1/2) # Residual plots par(mfcol=c(1,3)) plot(lm.1$res,ylab="",main="residuals") plot(rstandard(lm.1),ylab="",main="standardized res.") plot(rstudent(lm.1),ylab="",main="studentized res.") par(mfcol=c(1,3)) boxplot(lm.1$res,ylab="",main="residuals") boxplot(rstudent(lm.1),ylab="",main="studentized res.") qqnorm(rstudent(lm.1),ylab="",main="studentized res.") # Detect error correlation par(mfcol=c(1,1)) plot(rstudent(lm.1),type="b") abline(0,0) library(tseries) runs.test(factor(lm.1$res>0)) (np <- sum(lm.1$res > 0)) (nm <- sum(lm.1$res < 0)) (runs.mn <- 2*np*nm / (np+nm) + 1) (runs.sd <- sqrt( 2*np*nm*(2*np*nm - np - nm)/((np+nm)^2*(np+nm-1)) )) (6 - runs.mn) / runs.sd library(lmtest) dwtest(lm.1,alternative = "two.sided") # Question 3 x <- rnorm(9) y <- 2 + 3 * x + rnorm(9) lm.1 <- lm(y~x) summary(lm.1) # Add mean(x) mean(y) observations to the data as the last (10th) observation x.mnx <- c(x,mean(x)) y.mny <- c(y,mean(y)) lm.2 <- lm(y.mny~x.mnx) summary(lm.2) # Study leverage of 10th observation par(mfcol=c(1,2)) plot(hat(model.matrix(lm.2)),ylab="",main="leverages") abline(h=0.2) plot(cooks.distance(lm.2),ylab="",main="Cooks distance") abline(h=1/2) round(cooks.distance(lm.2),6) x <- model.matrix(lm.2) h <- x %*% solve(t(x)%*%x) %*% t(x) h[10,]