#new.housing <- read.table("D:/Courses/regression/data sets/new housing.txt",header=T) attach(new.housing) lm.1 <- lm(housing ~ intrate + gnp + pop) summary(lm.1) # Residual plots par(mfcol=c(1,3)) plot(lm.1$res,ylab="",main="residuals") plot(rstandard(lm.1),ylab="",main="standardized res.") plot(rstudent(lm.1),ylab="",main="studentized res.") # qqnorm plots par(mfcol=c(1,3)) qqnorm(lm.1$res,main="residuals") qqnorm(rstandard(lm.1),main="standardized res.") qqnorm(rstudent(lm.1),main="studentized res.") x <- c(0,seq(2,8,length=18),10) y <- rnorm(20,sd=0.5) y[1] <- 0 y[20] <- 1 y[9] <- 1.5 lm.1 <- lm(y~x) lm.n.20 <- lm(y~x,subset=c(rep(T,19),F)) par(mfcol=c(1,1)) plot(x,y) ; abline(lm.1) # Residual plots par(mfcol=c(1,3)) plot(lm.1$res,ylab="",main="residuals") plot(rstandard(lm.1),ylab="",main="standardized res.") plot(rstudent(lm.1),ylab="",main="studentized res.") # leverage and Cook's distance plot par(mfcol=c(1,2)) plot(hat(model.matrix(lm.1)),ylab="",main="leverages") abline(h=2*2/20) plot(cooks.distance(lm.1),ylab="",main="Cook's distance") abline(h=0.5) # computation for the 20th observation lm.1 <- lm(y~x) lm.n.20 <- lm(y~x,subset=c(rep(T,19),F)) cbind(res=lm.1$res,stand=rstandard(lm.1),student=rstudent(lm.1), leverage=hat(model.matrix(lm.1)),cook.dist=cooks.distance(lm.1))[15:20,] y[20] - lm.1$fit[20] h.20 <- hat(model.matrix(lm.1))[20]; h.20 sigma <- summary(lm.1)$sigma ; sigma sigma.n.20 <- summary(lm.n.20)$sigma ; sigma.n.20 (y[20] - lm.1$fit[20]) / (sigma * sqrt(1-h.20)) # = r.20 (y[20] - lm.1$fit[20]) / (sigma.n.20 * sqrt(1-h.20)) pred.n.20 <- predict.lm(lm.n.20,newdata=data.frame(x=x)) sum( (lm.1$fit-pred.n.20)^2) / (2 * sigma^2) 1/2 * r.20^2 * ( h.20 / (1-h.20)) par(mfcol=c(1,1)) plot(x,y) identify(x,y) abline(lm.1) abline(lm(y~x,subset=c(rep(T,19),F)),lty=2) abline(lm(y~x,subset=c(rep(T,8),F,rep(T,11),F)),lty=3) abline(lm(y~x,subset=c(F,rep(T,19))),lty=4) # dwtest -- lmtest package # runs.test -- tseries package #z.vec <- rnorm(20) #pos.ar <- z.vec #neg.ar <- z.vec #for(i in 2:20) #{ #neg.ar[i] <- sqrt(0.5) * z.vec[i] - sqrt(0.5) * neg.ar[i-1] #pos.ar[i] <- sqrt(0.5) * z.vec[i] + sqrt(0.5) * pos.ar[i-1] #} par(mfcol=c(1,2)) plot(pos.ar,type="b",ylab="Residuals",xlab="Time",main="Positive AR") ; abline(0,0) plot(neg.ar,type="b",ylab="Residuals",xlab="Time",main="Negative AR") ; abline(0,0) runs.test(factor(pos.ar>0)) runs.test(factor(neg.ar>0)) dwtest(lm(pos.ar ~ rep(1,20)+0),alternative = "two.sided") dwtest(lm(neg.ar ~ rep(1,20)+0),alternative = "two.sided") # load faraway package library(faraway) data(state) statedata <- data.frame(state.x77,row.names=state.abb,check.names=T) lm.11 <- lm(Life.Exp ~ ., data=statedata) ; summary(lm.11) library(leaps) x <- model.matrix(lm.11)[,-1] y <- statedata$Life leap.1 <- leaps(x,y,nbest=3) leap.2 <- leaps(x,y,method="adjr2",nbest=3) leap.1 maxadjr(leap.2,best=8) Cpplot(leap.1) lm.2 <- lm(Life.Exp ~ ., data=statedata) extractAIC(lm.2) 50 * log(sum(lm.2$res^2)/50) + 16 step(lm.2,direction="backward")