# Read crab data # Horseshoe crab data set (Agresti Table 4.3): # Color (1 = light .... 4 = dark) # Spine (1 = good condition .. . 3 = bad condition) # Satelites (number of ... ) # Width (cm.) # Weight (gm.) crab.data <- read.table("E:\\Courses\\CDA\\Crab data.txt",header=T) attach(crab.data) par(mfcol=c(1,2)) plot(width,satell) plot(width,jitter(satell)) # Fit logistic GLM model for ocuurence of satellites length(satell) satell.ind <- ifelse(satell==0,rep(0,173),rep(1,173)) par(mfcol=c(1,1)) plot(width,satell.ind) lines(lowess(width,satell.ind),col=2) lm.logit <- glm(satell.ind ~ width,family="binomial") summary(lm.logit) lines(sort(width),sort(lm.logit$fit),col=3) cbind(model.matrix(lm.logit),lm.logit$fit) (a <- predict(lm.logit,data.frame(width=28))) predict(lm.logit,data.frame(width=28),type="link") sum(summary(lm.logit)$coeff[,1] * c(1,28)) predict(lm.logit,data.frame(width=28),type="response") exp(a) / (1+exp(a)) vcov(lm.logit) (pred.se <- predict(lm.logit,data.frame(width=28),type="link",se.fit=T)) sqrt(cbind(1,28) %*% vcov(lm.logit) %*% t(cbind(1,28))) inv.logit <- function(x) exp(x) / (1 + exp(x)) ci.ul <- inv.logit(pred.se$fit + qnorm(.975)*pred.se$se) ci.ll <- inv.logit(pred.se$fit - qnorm(.975)*pred.se$se) arrows(28,ci.ll,28,ci.ul,code = 3,length = 0.05,angle = 90,col=4,lwd = 2) # Fit poisson GLM model for satellite counts par(mfcol=c(1,1)) plot(width,satell) lines(lowess(width,satell),col=2) lm.pois <- glm(satell ~ width,family="poisson") summary(lm.pois) model.matrix(lm.pois) lines(sort(width),sort(lm.pois$fit),col=3) predict(lm.pois,data.frame(width=28)) summary(lm.pois)$coeff sum(summary(lm.pois)$coeff[,1] * c(1,28)) exp(sum(summary(lm.pois)$coeff[,1] * c(1,28))) predict(lm.pois,data.frame(width=28),type="response") points(28, exp(sum(summary(lm.pois)$coeff[,1] * c(1,28))),pch = 4,col=4,cex =1) (pred.se <- predict(lm.pois,data.frame(width=28),type="link",se.fit=T)) vcov(lm.pois) summary(lm.pois)$coeff sqrt(diag(vcov(lm.pois))) sqrt(cbind(1,28) %*% vcov(lm.pois) %*% t(cbind(1,28))) ci.ul <- exp(pred.se$fit + qnorm(.975)*pred.se$se) ci.ll <- exp(pred.se$fit - qnorm(.975)*pred.se$se) arrows(28,ci.ll,28,ci.ul,code = 3,length = 0.05,angle = 90,col=4,lwd = 2) # Example 1: 3*4 table -- independent row & columns variables X <-paste("X",1:3,sep="") Y <-paste("Y",1:4,sep="") data<-as.vector(outer(c(10,20,10),c(10,20,20,10))) data.mat <- cbind(expand.grid(Row=X,Col=Y),count=data) (tbl <-tapply(data.mat$count,data.mat[,1:2], sum) ) fit.1 <- glm(count~ Row + Col, family="poisson", data=data.mat) fit.2 <- glm(count~ Row + Col + Row*Col, family="poisson", data=data.mat) round(summary(fit.1)$coef,3) round(summary(fit.2)$coef,3) (est.vec <- summary(fit.1)$coef[,1]) exp(est.vec[1]) exp(est.vec[1] + est.vec[2] + est.vec[4]) exp(est.vec[1] + est.vec[3] + est.vec[4]) # Example 2: 2*2 table -- dependent row & columns variables X <-paste("X",1:2,sep="") Y <-paste("Y",1:2,sep="") data<-c(60,90,40,110) data.mat <- cbind(expand.grid(Row=X,Col=Y),count=data) (tbl <-tapply(data.mat$count,data.mat[,1:2], sum) ) fit.1 <- glm(count~ Row + Col, family="poisson", data=data.mat) fit.2 <- glm(count~ Row + Col + Row*Col, family="poisson", data=data.mat) round(summary(fit.1)$coef,3) round(summary(fit.2)$coef,3) (est.1 <- summary(fit.1)$coef[,1]) (est.2 <- summary(fit.2)$coef[,1]) exp(est.1[1]) exp(est.1[1] + est.1[2]) exp(est.1[1] + est.1[2] + est.1[3]) array(fit.1$fit,dim=c(2,2)) exp(est.2[1]) # observed [1,1] cell exp(est.2[1] + est.2[2]) # observed [2,1] cell exp(est.2[1] + est.2[3]) # observed [1,2] cell exp(est.2[1] + est.2[2] + est.2[3] + est.2[4]) # observed [2,2] cell array(fit.2$fit,dim=c(2,2)) tbl[1,1] * tbl[2,2] / (tbl[1,2] * tbl[2,1]) # Odds ratio exp(est.2[4]) Example 3: 3 way table X <-paste("l",1:3,sep="") Y <-paste("l",1:3,sep="") Z <-paste("l",1:3,sep="") data<- rpois(27,30) data.mat <- cbind(expand.grid(X=X,Y=Y,Z=Z),count=data) summary(glm(count~ X*Y*Z, family="poisson", data=data.mat)) # Exampe 4: return to Excercise 1, Question 2 Wth <- rep(NA,365) Umb <- rep(NA,365) Acc <- rep(NA,365) p.weath <- c(.2,.6,.2) p.umb.weath <- c(.20,.10,.70) p.acc.weath <- c(.30,.20,.50) for(i in 1:365) { w <- sample(1:3,1,prob=p.weath) Umb[i] <- sample(0:1,1,prob=c(1-p.umb.weath[w],p.umb.weath[w])) Acc[i] <- sample(0:1,1,prob=c(1-p.acc.weath[w],p.acc.weath[w])) Wth[i] <- c("Sharav","Pleasant","Rain")[w] } #(counts <- table(Umb,Acc,Wth)) ( tbl.00 <- cbind(expand.grid(Umb=0:1,Acc=0:1,Wth=c("Sharav","Pleasant","Rain")), Freq = c(168, 17, 36, 4, 11, 32, 13, 17, 41, 6, 16, 4)) ) tbl <- tbl.00 attach(tbl) fitU.A.W <- glm(Freq ~ Umb + Acc + Wth, family="poisson",data=tbl) summary(fitU.A.W) ( fitNULL <- glm(Freq ~ 1, family="poisson",data=tbl) ) 2 * sum(Freq * log( Freq / exp(fitNULL$coeff) )) 2 * sum(Freq * log( Freq / fitNULL$fit )) 2 * sum(Freq * log( Freq / fitU.A.W$fit )) (fitUAW <- glm(Freq~Umb*Acc*Wth, family="poisson",data=tbl)) anova(fitU.A.W) anova(fitUAW) anova(fitU.A.W,fitUAW) fitUA.W <- glm(Freq ~ Umb*Acc + Wth, family="poisson",data=tbl) fitUW.A <- glm(Freq ~ Umb*Wth + Acc, family="poisson",data=tbl) fitU.AW <- glm(Freq ~ Umb + Acc*Wth, family="poisson",data=tbl) fitUA.UW <- glm(Freq ~ Umb*Acc + Umb*Wth, family="poisson",data=tbl) fitUA.AW <- glm(Freq ~ Umb*Acc + Acc*Wth, family="poisson",data=tbl) fitUW.AW <- glm(Freq ~ Umb*Wth + Acc*Wth, family="poisson",data=tbl) fitUA.UW.AW <- glm(Freq ~ Umb*Acc + Umb*Wth + Acc*Wth, family="poisson",data=tbl) anova(fitU.A.W,fitUA.W,fitUA.UW,fitUA.UW.AW,fitUAW) anova(fitU.A.W,fitUA.W,fitUA.AW,fitUA.UW.AW) anova(fitU.A.W,fitUW.A,fitUA.UW,fitUA.UW.AW) anova(fitU.A.W,fitUW.A,fitUW.AW,fitUA.UW.AW) anova(fitU.A.W,fitU.AW,fitUA.AW,fitUA.UW.AW) anova(fitU.A.W,fitU.AW,fitUW.AW,fitUA.UW.AW) anova( glm(Freq ~ Wth*Acc*Umb, family="poisson",data=tbl)) # automatic R model selection library(MASS) stepAIC(fitUAW) # for excercise 5 summary(fitUW.AW) fitUW.AW$y round(fitUW.AW$fit,2) fitUW.AW$dev