# Excercise 9 solution # Question 1 # air.freight <- read.table("D:/Courses/regression/data sets/AIRFREIG.txt",header=F) x <- air.freight[,2] y <- air.freight[,3] n <- length(x) plot(x,y,xlab=" num. of times the carton was transferred",ylab=" num. broken ampules") lm.1 <- lm(y~x) abline(lm.1) #library(alr3) #pure.error.anova(lm.1) lm.2 <- lm(y ~ x + factor(x)) summary(lm.2) anova(lm.2) lm.3 <- lm(y ~ factor(x)) summary(lm.3) anova(lm.1,lm.2) anova(lm.1,lm.3) # Question 2 # bee.data <- read.table("D:/Courses/regression/data sets/kaveret.txt",header=F) num.bees <- bee.data[,2] hour <- bee.data[,3] num.sqrt <- sqrt(num.bees) hour.p2 <- hour^2 hour.p3 <- hour^3 hour.p4 <- hour^4 hour.p5 <- hour^5 hour.p6 <- hour^6 lm.3 <- lm(num.sqrt ~ hour + hour.p2) lm.4 <- lm(num.sqrt ~ hour + hour.p2 + hour.p3 + hour.p4 + hour.p5 + hour.p6) lm.5 <- lm(num.sqrt ~ factor(hour)) # Alef anova(lm.3,lm.4) anova(lm.4) anova(lm.4)[[2]] sum(anova(lm.4)[[2]][3:6]) # Bet x.3 <- model.matrix(lm.3) x.5 <- model.matrix(lm.5) dim(x.3) ; x.3[1:10,] dim(x.5) ; x.5[1:10,] h.5 <- x.5 %*% solve(t(x.5)%*%x.5) %*% t(x.5) dim(h.5) lbl <- paste("h-",hour,sep="") dimnames(h.5) <- list(lbl,lbl) hour[1:8] round(h.5[1:8,1:8],4) table(hour) ; round(1/table(hour),4) pure.error.anova(lm.5) # Excercise 9 Question 3 # furnace.data <- read.table("D:/Courses/regression/data sets/furnace.txt",header=T) attach(furnace.data) ylm <- range(pressure) xlm <- range(temp) # Alef par(mfcol=c(1,1)) plot(temp[furnace=="A"],pressure[furnace=="A"],pch="A",ylim=ylm,xlim=xlm) points(temp[furnace=="B"],pressure[furnace=="B"],pch="B") i.b <- ifelse(furnace=="A",rep(1,19),rep(0,19)) i.a <- ifelse(furnace=="B",rep(1,19),rep(0,19)) temp.a <- ifelse(furnace=="A",temp,rep(0,19)) temp.b <- ifelse(furnace=="B",temp,rep(0,19)) lm.1 <- lm(pressure ~ temp) lm.1a <- lm(pressure ~ temp,subset=(furnace=="A")) lm.1b <- lm(pressure ~ temp,subset=(furnace=="B")) abline(lm.1a,col=2) abline(lm.1b,col=3) # Bet summary(lm.1) summary(lm.1a) summary(lm.1b) # Gimel + Dalet lm.31 <- lm(pressure ~ i.b + temp + temp.b) lm.32 <- lm(pressure ~ i.a + i.b + temp.a + temp.b + 0) summary(lm.31) summary(lm.32) summary(lm.1a) summary(lm.1b) # Heh + Vav lm.21 <- lm(pressure ~ temp + temp.b) lm.22 <- lm(pressure ~ i.b + temp) summary(lm.22) anova(lm.21,lm.31) anova(lm.22,lm.31) summary(lm.31) summary(lm(pressure ~ temp * factor(furnace))) #--------------------------------------------------------------- # Haashara #la.rent <- read.table("D:/Courses/regression/data sets/LARENT.txt",header=F) rent.price <- la.rent[,1] area.bath <- la.rent[,6] dist.beach <- la.rent[,8] dist.ucla <- la.rent[,9] vec.1 <- rep(1,26) lm.0 <- lm(rent.price ~ vec.1 + 0) lm.1 <- lm(rent.price ~ area.bath + dist.beach + dist.ucla ) lm.2 <- lm(rent.price ~ vec.1 + area.bath + dist.beach + dist.ucla + 0) x.mat0 <- cbind(vec.1) x.mat1 <- cbind(vec.1,area.bath,dist.beach,dist.ucla) mn.20.0 <- t(x.mat0[20,]) %*% lm.0$coeff mn.20.1 <- t(x.mat1[20,]) %*% lm.1$coeff sd.20.0 <- sqrt(t(x.mat0[20,]) %*% vcov(lm.0) %*% x.mat0[20,]) sd.20.1 <- sqrt(t(x.mat1[20,]) %*% vcov(lm.1) %*% x.mat1[20,]) # Model 20th "confidence interval" for new observation predict(lm.1,interval="confidence")[20,] predict(lm.1,interval="prediction")[20,] predict(lm.1,se.fit=T) sd.pred.20.1 <- sqrt(predict(lm.1,se.fit=T)$residual.scale^2 + sd.20.1^2) # residual.scal^2 == MSE mn.20.1 + sd.pred.20.1*qt(0.975,22) ; mn.20.1 - sd.pred.20.1*qt(0.975,22) # Scatter plots of fitted + confidence bands in relation to fitted and index. conf.1 <- predict(lm.1,interval="confidence") pred.1 <- predict(lm.1,interval="prediction") ord <- order(lm.1$fit) plot(lm.1$fit,rent.price) lines(lm.1$fit[ord],conf.1[ord,1],col=3) lines(lm.1$fit[ord],conf.1[ord,2],col=2) lines(lm.1$fit[ord],conf.1[ord,3],col=2) lines(lm.1$fit[ord],pred.1[ord,2],col=4) lines(lm.1$fit[ord],pred.1[ord,3],col=4) conf.0 <- predict(lm.0,interval="confidence") pred.0 <- predict(lm.0,interval="prediction") plot(rent.price,ylim=range(pred.0)) lines(conf.0[,1],col=3) lines(conf.0[,2],col=2) lines(conf.0[,3],col=2) lines(pred.0[,2],col=4) lines(pred.0[,3],col=4) # Scatter plots of fitted + confidence intervals in relation to fitted and explanatory variables. plot(lm.1$fit,rent.price) points(lm.1$fit,pred.1[,1],col=3) segments(pred.1[,1],pred.1[,2],pred.1[,1],pred.1[,3],col=2) plot(area.bath,rent.price) points(area.bath,pred.1[,1],col=3) segments(area.bath,pred.1[,2],area.bath,pred.1[,3],col=2) conf.0 <- predict(lm.0,interval="confidence") pred.0 <- predict(lm.0,interval="prediction") plot(lm.0$fit,rent.price) points(lm.0$fit,conf.0[,1],col=3) segments(conf.0[,1],conf.0[,2],conf.0[,1],conf.0[,3],col=2) plot(lm.0$fit,rent.price) points(lm.0$fit,pred.0[,1],col=3) segments(pred.0[,1],pred.0[,2],pred.0[,1],pred.0[,3],col=2) plot(area.bath,rent.price) points(area.bath,pred.0[,1],col=3) segments(area.bath,pred.0[,2],area.bath,pred.0[,3],col=2)