############################### # # # Question 1 # # # ############################### y <- c(1.73,1,1.73,2,3,4.36,2.65,2.65,3.32) x <- c(0.25,0.22,0.36,0.23,0.37,0.46,0.27,0.44,0.31) plot(x,y) lm.1 <- lm(y~x) # full model = intercept + slope lm.2 <- lm(y~x+0) # no intercept model summary(lm.1) abline(lm.1,col=2) abline(lm.2,col=3) sum(x*y) / sum(x^2) # estimate of beta summary(lm.2) sum(lm.1$res) # sum of full model residuals sum(lm.2$res) # sum of no-intercept model residuals MSE <- sum(lm.2$res^2) / (9-1) # MSE MSE sqrt(MSE) # check if result same as R output MSE / sum(x^2) # beta variance estimator vcov(lm.2) # R estimated coeff. covariance matrix sqrt(vcov(lm.2)) # beta std - matching R output ############################### # # # Question 2 # # # ############################### y.hat.nrm <- rep(NA,1000) epsilon.nrm <- rep(NA,1000) SSE <- rep(NA,1000) SSR <- rep(NA,1000) x <- 1:10 # Simulation 1: beta_0 = 2 and beta_1 = 1.5 for(i in 1:1000) { error.vec <- rnorm(10,sd = 3) y.1 <- 2 + 1.5*x + error.vec lm.1 <- lm(y.1 ~ x) y.hat.nrm[i] <- sum(lm.1$fit^2) epsilon.nrm[i] <- sum(error.vec^2) SSE[i] <- sum(lm.1$res^2) SSR[i] <- sum((lm.1$fit-mean(y.1))^2) } boxplot(list(y.hat.nrm,epsilon.nrm,SSE,SSR)) x.vec <- seq(0,300,length=5000) #x.vec <- seq(0,80,length=5000) plot(x.vec,pchisq(x.vec,df=10),col=3,lty=2,type="l",ylim=c(0,1)) lines(x.vec,pchisq(x.vec,df=8),col=4,lty=2,type="l") lines(ecdf(y.hat.nrm/9),col=2) lines(ecdf(epsilon.nrm/9),col=3) lines(ecdf(SSE/9),col=4) lines(ecdf(SSR/9),col=5) # y.hat.nrm and SSR have non-central chi-squared distribution lines(x.vec,pchisq(x.vec,df=2,ncp = sum((2+1.5*x)^2)/9),col=2,lty=2) lines(x.vec,pchisq(x.vec,df=1,ncp = sum( ((2+1.5*x) - mean(2+1.5*x))^2)/9),col=5,lty=2) # K-S tests showing that epsilon.nrm/9 has 10-df chi-squared distribution ks.test(epsilon.nrm/9,"pchisq",df=9) ks.test(epsilon.nrm/9,"pchisq",df=10) ks.test(epsilon.nrm/9,"pchisq",df=11) # Simulation 2: beta_0 = 2 and beta_1 = 0 -- an example in which SSR is central chi-squared for(i in 1:1000) { error.vec <- rnorm(10,sd = 3) y.2 <- 2 + 0 *x + error.vec lm.2 <- lm(y.2 ~ x) y.hat.nrm[i] <- sum(lm.2$fit^2) epsilon.nrm[i] <- sum(error.vec^2) SSE[i] <- sum(lm.2$res^2) SSR[i] <- sum((lm.2$fit-mean(y.2))^2) } x.vec <- seq(0,40,length=5000) plot(x.vec,pchisq(x.vec,df=10),col=3,lty=2,type="l",ylim=c(0,1)) lines(x.vec,pchisq(x.vec,df=8),col=4,lty=2,type="l") lines(x.vec,pchisq(x.vec,df=1),col=5,lty=2,type="l") lines(ecdf(y.hat.nrm/9),col=2) lines(ecdf(epsilon.nrm/9),col=3) lines(ecdf(SSE/9),col=4) lines(ecdf(SSR/9),col=5) # Simulation 3: beta_0 = 0 and beta_1 = 0 -- an example in which y.hat.nrn. is central chi-squared for(i in 1:1000) { error.vec <- rnorm(10,sd = 3) y.3 <- 0 + 0 *x + error.vec lm.3 <- lm(y.3 ~ x) y.hat.nrm[i] <- sum(lm.3$fit^2) epsilon.nrm[i] <- sum(error.vec^2) SSE[i] <- sum(lm.3$res^2) SSR[i] <- sum((lm.3$fit-mean(y.3))^2) } plot(x.vec,pchisq(x.vec,df=10),col=3,lty=2,type="l",ylim=c(0,1)) lines(x.vec,pchisq(x.vec,df=8),col=4,lty=2,type="l") lines(x.vec,pchisq(x.vec,df=2),col=2,lty=2,type="l") lines(x.vec,pchisq(x.vec,df=1),col=5,lty=2,type="l") lines(ecdf(y.hat.nrm/9),col=2) lines(ecdf(epsilon.nrm/9),col=3) lines(ecdf(SSE/9),col=4) lines(ecdf(SSR/9),col=5) # Simulation 4: linear model in which SSE is non-central chi-squared for(i in 1:1000) { error.vec <- rnorm(10,sd = 3) y.4 <- 1 + 0.2 * x^2 + error.vec lm.4 <- lm(y.4 ~ x) SSE[i] <- sum(lm.4$res^2) } plot(x.vec,pchisq(x.vec,df=8),col=4,lty=2,type="l",ylim=c(0,1)) lines(ecdf(SSE/9),col=4) # Question: what is the non-centrality parameter? ############################### # # # Question 3 # # # ############################### A.mat <- cbind(c(1,2),c(0,3),c(-2,-4)) mu.vec <- c(3,1) A.mat %*% t(A.mat) #Z.smp <- array(rnorm(3*10^5),dim=c(10^5,3)) Z.smp <- array(rnorm(3*10^2),dim=c(10^2,3)) W.smp <- A.mat %*% t(Z.smp) + mu.vec cov(t(W.smp)) apply(W.smp,1,mean) V.1 <- -Z.smp[,1] V.2 <- ifelse(abs(Z.smp[,1])<= 1, Z.smp[,1], - Z.smp[,1]) hist(V.2) plot(V.1,V.2) abline(0,1,col=2) plot(V.1,V.1 + V.2) abline(0,1,col=2) abline(h = 0,col=2) hist(V.1 + V.2)