#################################### # Grade example par(mfcol=c(1,1)) x1 <- c(70,80,82,75,90,99,77,81,92,83) y1 <- c(77,79,82,73,85,90,76,88,93,89) plot(x1,y1,xlab="Midterm grades",ylab="Final grades") x1 <- c(5,6,7,8,10,11,13,14,16,18,20,21,24,25,27) y1 <- c(4,6,7,9,13,14,17,19,20,20,21,20,21,19,20) plot(x1,y1,xlab="Age",ylab="Steroid levels") #################################### # Galton's made up data z1 <- rnorm(300) z2 <- rnorm(300) z0 <- rnorm(300) x <- 175 + sqrt(30)*z1 + sqrt(70)*z0 y <- 175 + sqrt(30)*z2 + sqrt(70)*z0 plot(x,y,xlab="Fathers' height",ylab="Childrens height") abline(0,1,lty=2,lwd = 2,col = 2) abline(lm(y~x),lwd = 2, col = 3) var(cbind(x,y)) #################################### # Heart size example hartsize <- read.table("D:/Courses/regression/data sets/hartsize.txt",header=F) dimnames(hartsize)[[2]] <- c("age","log.diameter") rm(age) rm(log.diameter) attach(hartsize) plot(age,log.diameter) abline(lm(log.diameter ~ age)) pred <- predict(lm(log.diameter ~ age)) segments(age,pred,age,log.diameter,lty=2) # Computations learned in class S.XX <- sum( (age - mean(age))^2 ) S.YY <- sum( (log.diameter - mean(log.diameter))^2 ) S.XY <- sum( (age - mean(age)) * (log.diameter - mean(log.diameter))) # Least square estimators B1.hat <- S.XY / S.XX B0.hat <- mean(log.diameter) - B1.hat*mean(age) Y.hat <- B0.hat + B1.hat*age e.vec <- log.diameter - Y.hat sum(e.vec) sum(e.vec*age) sum(e.vec*Y.hat) SSE <- sum(e.vec^2) SST <- S.YY SSR <- sum( (Y.hat - mean(log.diameter))^2 ) SST SSE + SSR (R.squared <- SSR / SST) # Linear model in R summary(lm(log.diameter ~ age)) anova(lm(log.diameter ~ age)) # Standard errors for intercept and slope n <- length(age) MSE <- SSE / (n-2) (sqrt(MSE)) (se.B0.hat <- sqrt(MSE) / sqrt(S.XX)) (se.B1.hat <- sqrt(MSE) * sqrt(1/n + mean(age)^2/S.XX)) # Confidence intervals for regression line and prediction interval for new observations plot(age,log.diameter) lines(age,Y.hat,col=2,lty = 2) abline(B0.hat,B1.hat,col=2) points(mean(age),mean(log.diameter),pch = 3,col=2,cex =2) ci.ul <- Y.hat + qt(0.975,n-2)*sqrt(MSE)*sqrt(1/n + (age - mean(age))^2/S.XX) ci.ll <- Y.hat - qt(0.975,n-2)*sqrt(MSE)*sqrt(1/n + (age - mean(age))^2/S.XX) #predict(lm(log.diameter ~ age),interval = "confidence") lines(age,ci.ul,col=3) lines(age,ci.ll,col=3) pi.ul <- Y.hat + qt(0.975,n-2)*sqrt(MSE)*sqrt(1 + 1/n + (age - mean(age))^2/S.XX) pi.ll <- Y.hat - qt(0.975,n-2)*sqrt(MSE)*sqrt(1 + 1/n + (age - mean(age))^2/S.XX) #predict(lm(log.diameter ~ age),interval = "prediction") lines(age,pi.ul,col=4) lines(age,pi.ll,col=4)