############################### # # # Question 1 # # # ############################### # air.freight <- read.table("D:/Courses/regression/data sets/AIRFREIG.txt",header=F) x <- air.freight[,2] y <- air.freight[,3] n <- length(x) plot(x,y,xlab=" num. of times the carton was transferred",ylab=" num. broken ampules") abline(lm(y~x)) lm(y~x) S.XX <- sum((x-mean(x))^2) S.YY <- sum((y-mean(y))^2) S.XY <- sum((x-mean(x))*(y-mean(y))) b.1 <- S.XY / S.XX b.0 <- mean(y) - b.1 * mean(x) y.p <- b.0 + b.1*x # Aleph SST <- S.YY SSR <- sum((y.p - mean(y))^2) SSE <- sum((y - y.p)^2) SST SSE + SSR # Bet SSR / SST # R-squared b.1^2 * S.XX / S.YY # = R-squared sqrt( (SSR / SST) * S.YY / S.XX ) # = estimator for beta_1 # Gimmel MSE <- SSE / (n-2) sd.b.1 <- sqrt(MSE / S.XX) sqrt(MSE) b.1 sd.b.1 summary(lm(y~x)) b.1 + sd.b.1 * qt(0.975,n-2) b.1 - sd.b.1 * qt(0.975,n-2) # Dalet (b.1 - 3) / sd.b.1 # Statistic value for testing slope is 3 qt(0.95,n-2) # 0.05 significance level critical value 1 - pt((b.1 - 3) / sd.b.1,n-2) ############################### # # # Question 2 # # # ############################### # gpa <- read.table("D:/Courses/regression/data sets/GPA.txt",header=F) x <- gpa[,2] y <- gpa[,3] n <- length(x) S.XX <- sum((x-mean(x))^2) S.YY <- sum((y-mean(y))^2) S.XY <- sum((x-mean(x))*(y-mean(y))) SSR <- b.1^2 * S.XX SST <- S.YY SSE <- SST - SSR MSE <- SSE / (n-2) b.1 <- S.XY / S.XX b.0 <- mean(y) - b.1 * mean(x) sd.b.1 <- sqrt(MSE / S.XX) sd.b.0 <- sqrt(MSE * ( 1/n + mean(x)^2 / S.XX)) sqrt(MSE) b.0 b.1 sd.b.0 sd.b.1 summary(lm(y~x)) b.1 + sd.b.1 * qt(0.975,n-2) ; b.1 - sd.b.1 * qt(0.975,n-2) b.0 + sd.b.0 * qt(0.975,n-2) ; b.0 - sd.b.0 * qt(0.975,n-2) plot(x,y,xlab="Midterm grade",ylab="Final grade") abline(b.0,b.1) y.5 <- b.0 + b.1 * 5 sd.y.5 <- sqrt(MSE * ( 1/n + (5-mean(x))^2 / S.XX)) sd.y.5 <- sqrt(MSE * (1 + 1/n + (5-mean(x))^2 / S.XX)) y.5 + sd.y.5 * qt(0.975,n-2) ; y.5 - sd.y.5 * qt(0.975,n-2) ############################### # # # Question 3 # # # ############################### x <- 1:10 y <- 2 + 1.5*x + rnorm(10,sd = 3) plot(x,y,ylim = c(0,25)) abline(2,1.5,col=2) abline(lm(y~x),col=3) ci.5 <-predict(lm(y~x),data.frame(x=5),interval = "confidence",level = 0.90) arrows(5,ci.5[2],5,ci.5[3],col=4,code = 3, angle = 90,length = 0.1) # 1st simulation cov.ind <- rep(0,10000) for(i in 1:10000) { y <- 2 + 1.5*x + rnorm(10,sd = 3) ci.5 <-predict(lm(y~x),data.frame(x=5),interval = "confidence",level = 0.90) if(ci.5[2] <= 9.5 & 9.5 <= ci.5[3]) cov.ind[i] <- 1 } table(cov.ind) t.test(cov.ind) # 2nd simulation cov.ind <- rep(0,10000) for(i in 1:10000) { y <- 2 + 1.5*x + rnorm(10,sd = 3) pi.5 <-predict(lm(y~x),data.frame(x=5),interval = "prediction",level = 0.90) if(pi.5[2] <= y[5] & y[5] <= pi.5[3]) cov.ind[i] <- 1 } table(cov.ind) t.test(cov.ind) # Another simulation? cov.ind <- rep(0,10000) for(i in 1:10000) { y <- 2 + 1.5*x + rnorm(10,sd = 3) y.0 <- 2 + 1.5*5 + rnorm(1,sd = 3) pi.5 <-predict(lm(y~x),data.frame(x=5),interval = "prediction",level = 0.90) if(pi.5[2] <= y.0 & y.0 <= pi.5[3]) cov.ind[i] <- 1 } table(cov.ind) t.test(cov.ind) ############################### # # # Question 4 # # # ############################### a <- cbind(c(1,2,3),c(4,5,6)) b <- cbind(c(1,2,3),c(2,3,4)) # 4.a a ; b ; a+b ; a-b ; 3*b ; t(a)%*%b ; t(a)%*%a solve(t(a)%*%b) ( t(a)%*%b ) %*% ( solve(t(a)%*%b) ) # 4.b a <- cbind(c(1,2,3),c(2,2,4),c(5,10,15),c(1,6,1)) a det(a[,1:3]) det(a[,c(1,2,4)]) solve(a[,1:3]) solve(a[,c(1,2,4)])