# 1: Automobile Accident Example: library(MASS) table.8.8<-data.frame(expand.grid(S=c("No","Yes"), L=c("Urban","Rural"), G=c("Female","Male"),I=c("No","Yes")), count=c(7287,11587,3246,6134,10381,10969,6123, 6693,996, 759, 973, 757, 812, 380, 1084, 513)) fitNULL <-glm(count~ 1, family="poisson",data=table.8.8) fitG.I.L.S <-glm(count~ G + I + L + S, family="poisson",data=table.8.8) fitGI.GL.GS.IL.IS.LS <-glm(count~ (G + I + L + S)^2, family="poisson",data=table.8.8) fitGIL.GIS.GLS.ILS <-glm(count~ (G + I + L + S)^3, family="poisson",data=table.8.8) fitGILS <-glm(count~ G*I*L*S, family="poisson",data=table.8.8) anova(fitNULL,fitG.I.L.S,fitGI.GL.GS.IL.IS.LS,fitGIL.GIS.GLS.ILS,fitGILS) summary(fitGILS) extractAIC(fitGILS) loglik <- log(prod(dpois(table.8.8[,5],table.8.8[,5]))) -2*loglik + 2*16 stepAIC(fitGILS) # stepAIC is in MASS library anova(fitGI.GL.GS.IL.IS.LS,fitGIL.GIS.GLS.ILS) # Gini index fit.2 <- fitGI.GL.GS.IL.IS.LS$fit fit.3 <- fitGIL.GIS.GLS.ILS$fit n <- sum(fit.3) delta.2 <- (fitGILS$fit - fit.2) / (2*n) delta.3 <- (fitGILS$fit - fit.3) / (2*n) cbind(table.8.8,fit.2,fit.3,delta.2,delta.3) sum(abs(delta.2)) sum(abs(delta.3)) # AIC computation summary(fitG.I.L.S) fitG.I.L.S$aic - 2*sum(log(dpois(fitGILS$y,fitG.I.L.S$fit))) + 2*5 fitGILS$aic - 2*sum(log(dpois(fitGILS$y,fitGILS$fit))) + 2*16 fitG.I.L.S$aic - fitGILS$aic fitG.I.L.S$dev + 2 * (16 - 5) # 2: Connection between loglinear and logit models fit.loglinear <- glm(count ~ G*L*S+G*I+L*I+I*S, data = table.8.8, family = poisson) summary(fit.loglinear) fit.loglinear$fit fit.loglinear$coefficients fit.loglinear$coefficients[5] + fit.loglinear$coefficients[11] fit.loglinear$coefficients[5] exp(-fit.loglinear$coefficients[11]) # Construction of logit model for Injury fit.logit <- glm(I ~ G + L + S, data = table.8.8, family = binomial, weight = count) summary(fit.logit) exp(fit.logit$coefficients[2:4]) fit.logit$coefficients model.matrix(fit.logit) p.logit <- model.matrix(fit.logit) %*% fit.logit$coefficients predict.glm(fit.logit) p.hat <- exp(p.logit) / (1 + exp(p.logit)) round(t(p.hat),2) fit.logit$fit fit.logit.0 <- glm(I ~ 1, data = table.8.8, family = binomial, weight = count) summary(fit.logit.0) fit.logit.1 <- glm(I ~ (G + L + S)^3, data = table.8.8, family = binomial, weight = count) summary(fit.logit.1) cbind(table.8.8,p.NULL = fit.logit.0$fit,p.model = fit.logit$fit, p.SAT = fit.logit.1$fit) sum(table.8.8$count[9:16]) / n table.8.8$count[9:16] / (table.8.8$count[1:8] + table.8.8$count[9:16]) # Deviance computations (log.lik.0 <- sum(table.8.8$count * log(ifelse(fit.logit.0$y,fit.logit.0$fit,1-fit.logit.0$fit)))) (log.lik.1 <- sum(table.8.8$count * log(ifelse(fit.logit.1$y,fit.logit.1$fit,1-fit.logit.1$fit)))) (log.lik <- sum(table.8.8$count * log(ifelse(fit.logit$y,fit.logit$fit,1-fit.logit$fit)))) -2 * log.lik.0 -2 * log.lik -2 * log.lik.1 anova(fit.logit.0,fit.logit,fit.logit.1) # 3: ML estimation for contingency tables ( tbl <- cbind(expand.grid(Umb=0:1,Acc=0:1,Wth=c("Sh","Pl","Rn")), counts = c(168, 17, 36, 4, 11, 32, 13, 17, 41, 6, 16, 4)) ) fitAW.UW <- glm(counts~Acc*Wth+Umb*Wth, family="poisson",data=tbl) cbind(tbl,fit = fitAW.UW$fit,se = predict(fitAW.UW,type="response",se=T)$se) (tbl.U.W <- apply(tbl.3w,c(1,3),sum)) (tbl.A.W <- apply(tbl.3w,c(2,3),sum)) (tbl.W <- apply(tbl.3w,3,sum)) tbl.U.W[1,1] * tbl.A.W[1,1] / tbl.W[1] # MLE mu pred of cell (U=1,A=1,W=1) tbl.U.W[2,1] * tbl.A.W[1,1] / tbl.W[1] # MLE mu pred of cell (U=2,A=1,W=1) tbl.U.W[1,2] * tbl.A.W[1,2] / tbl.W[2] # MLE mu pred of cell (U=1,A=1,W=2) fitAW.UW$coef # Relation between MLE mu and gamma predictions x.mat <- model.matrix(fitAW.UW) exp(x.mat %*% fitAW.UW$coef) summary(fitAW.UW) fitAW.UW$y %*% x.mat # "Normal" equations fitAW.UW$fit %*% x.mat summary(fitAW.UW)$cov.unscaled vcov(fitAW.UW) summary(fitAW.UW)$coef round(sqrt(diag(summary(fitAW.UW)$cov.unscaled)),3) solve(t(x.mat) %*% diag(fitAW.UW$fit) %*% x.mat) # Computation of gamma estimators cov matrix # 4: Zeros -- random ir structural? (tmp <- rpois(4,outer(c(10,.8),c(.4,10)))) (tbl <-data.frame(expand.grid(X=c(0,1), Y=c(0,1)),counts=tmp) ) summary(glm(counts~ X + Y, family="poisson",data=tbl)) summary(glm(counts~ X * Y, family="poisson",data=tbl)) predict(glm(counts~ X + Y, family="poisson",data=tbl),type="link") predict(glm(counts~ X * Y, family="poisson",data=tbl),type="link") predict(glm(counts~ X + Y, family="poisson",data=tbl),type="response") predict(glm(counts~ X * Y, family="poisson",data=tbl),type="response") # 5: Structual zero example n.11 <- 30 n.12 <- 63 n.22 <- 63 (theta.hat <- (2*n.11 + n.12) / (2*n.11 + 2*n.12 + n.22)) (exp.11 <- 156 * theta.hat^2) (exp.12 <- 156 * theta.hat * (1-theta.hat)) (exp.22 <- 156 * (1-theta.hat)) (X2 <-(n.11 - exp.11)^2/exp.11 + (n.12 - exp.12)^2/exp.12 + (n.22 - exp.22)^2/exp.22) 1 - pchisq(X2,1) (n.11 + n.12) / (n.11 + n.12 + n.22) n.11 / (n.11 + n.12)