# Input data table col.1 <- c(62,11, 2,11, 1, 1, 3, 1, 1) col.2 <- c(17, 7, 3, 3, 4, 0, 0, 0, 0) col.3 <- c( 5, 0, 1, 0, 0, 1, 0, 0, 0) col.4 <- c(90,22, 2,21, 6, 2, 2, 2, 0) col.5 <- c(42,18, 0,13, 9, 1, 1, 1, 0) col.6 <- c( 3, 1, 1, 2, 0, 1, 0, 0, 0) col.7 <- c(74,19, 1,20, 6, 4, 9, 4, 1) col.8 <- c(31,14, 3, 8, 5, 3, 2, 2, 2) col.9 <- c(11, 3, 1, 3, 2, 1, 1,0 , 3) ( cbind(col.1,col.2,col.3,col.4,col.5,col.6,col.7,col.8,col.9) ) counts <- c(col.1,col.2,col.3,col.4,col.5,col.6,col.7,col.8,col.9) E <- factor(rep(c(1,1,1,2,2,2,3,3,3),9)) H <- factor(rep(1:3,3*9)) L <- factor(rep(rep(1:3,each=9),3)) C <- factor(rep(1:3,each=3*9)) ( data <- data.frame(E,H,L,C,counts)) # Seif aleph: fit.0 <- glm(counts ~ 1, family="poisson", data=data) fit.1 <- glm(counts ~ E + H + L + C, family="poisson", data=data) fit.2 <- glm(counts ~ (E + H + L + C)^2, family="poisson", data=data) fit.3 <- glm(counts ~ (E + H + L + C)^3, family="poisson", data=data) anova(fit.0,fit.1,fit.2,fit.3) # The Anova reveals that the 2nd order interaction model (fit.2) fits the data: # 1. its residual deviance is small # 2. the residual deviance of the main effect model is too large # 3. the 3rd order interaction model (fit.3) is not significantly better # than the 2nd order interaction model. # Seif bet & gimmel: library(MASS) stepAIC(fit.3) fit.12 <- glm(counts ~ E+H+L+C+E*H+H*L+E*C+L*C,family="poisson", data=data) # model fitted by stepAIC fit.11 <- glm(counts ~ E+H+L+C+H*E+H*L,family="poisson", data=data) # model suggested in seif gimmel anova(fit.0,fit.1,fit.11,fit.12,fit.2,fit.3) anova(fit.12) # fit.12 is the model chosen by the stepAIC, fit.11 is the model suggested seif gimmel # the anova reveals that fit.12 is significantly better than fit.11. # however, the output of stepAIC further reveals that the two terms dropped # in fit.11 -- E*C & L*C -- are the least significant terms in fit.12 # Seif dalet: delta.0 <- (fit.0$fit - fit.0$y) / (2*sum(fit.0$y)) delta.1 <- (fit.1$fit - fit.0$y) / (2*sum(fit.0$y)) delta.11 <- (fit.11$fit - fit.0$y) / (2*sum(fit.0$y)) delta.12 <- (fit.12$fit - fit.0$y) / (2*sum(fit.0$y)) delta.2 <- (fit.2$fit - fit.0$y) / (2*sum(fit.0$y)) apply(abs(cbind(delta.0,delta.1,delta.11,delta.12,delta.2)),2,sum) # The model picked by stepAIC has high GINI dissimalarity: 5.5% # this is caused by the fact that this is a relatively sparse table -- 607 obs on 81 cells .... # Seif Heh: summary(fit.11) # The relation between the variables is expressed in the interaction terms, # I copied the relevant lines in the output: # Coefficients: # Estimate Std. Error z value Pr(>|z|) # (Intercept) 3.90715 0.09960 39.227 < 2e-16 *** # # . # . # . # # E2:H2 0.36231 0.23698 1.529 0.126298 # E3:H2 0.67247 0.41118 1.635 0.101951 # E2:H3 1.41968 0.39773 3.569 0.000358 *** # E3:H3 2.23061 0.52230 4.271 1.95e-05 *** # H2:L2 0.73226 0.20622 3.551 0.000384 *** # H3:L2 0.76043 0.40832 1.862 0.062557 . # H2:L3 -0.02703 0.47326 -0.057 0.954457 # H3:L3 2.01605 0.47535 4.241 2.22e-05 *** # Note that in 4-way tables the 2-way interaction terms express the odds ratio # between two variables when the two remaining variables are kept constant. # 1. relation between H and L (we will just study the 4 odds ratio fo which we have # standard error in the output, i.e. OR in relation to the 1st category -- too little): # a. OR of H1 -> H2 and L1 -> L2: a very signifcant exp( 0.73) = 2.07 # b. OR of H1 -> H2 and L1 -> L3: an insignificant exp(-0.03) # c. OR of H1 -> H3 and L1 -> L2: marginally signifcant exp( 0.76) = 2.14 # d. OR of H1 -> H3 and L1 -> L3: a very significant exp( 2.02) = 7.53 # computation of the other OR is a bit more difficult and to compute the s.e. we need the cov matrix # OR of H2 -> H3 and L2 -> L3: exp(0.73 + 2.02 - 0.76 - -0.03) = 7.53 # ==> strong agreement in views on spending on health and law enforcement # 2. We also see a similar relation between E & H: the interesting exception # is the very strong interaction E2:H3 -- expressing a view that support in health is # too high and support in environioment is ok. # Seif Vav: H.2 <- ifelse(H==1,rep(0,81),rep(1,81)) (data.l <- cbind(data,H.2)) fitl.1 <- glm(H.2 ~ E + L + C, data = data.l, family = binomial, weight = counts) summary(fitl.1) # In the logistic regression we see the same dependence between # the view on spending on health and spending on environemnt and law enforcement # Mosaic plot for Florida death penalty example library(vcd) vic.race<-c("white","black") def.race<-vic.race death.penalty<-c("yes", "no") datalabel<-list(defendant=def.race,death=death.penalty,victim=vic.race) table.2.6<- expand.grid(defendant=def.race,death=death.penalty,victim=vic.race) data<-c(53, 11, 414, 37, 0, 4, 16, 139) table.2.6<-cbind(table.2.6,count=data) Tbl <- xtabs(count~defendant+death+victim ,data=table.2.6) tbl.1 <- xtabs(count~defendant+death+victim ,data=table.2.6)[,,1] tbl.2 <- xtabs(count~defendant+death+victim ,data=table.2.6)[,,2] tbl.m <- apply(xtabs(count~defendant+death+victim ,data=table.2.6),c(1,2),sum) (tbl.1[1,1]*tbl.1[2,2]) / (tbl.1[1,2]*tbl.1[2,1]) # strata 1 Odds Ratio (tbl.2[1,1]*tbl.2[2,2]) / (tbl.2[1,2]*tbl.2[2,1]) # strata 2 Odds Ratio (tbl.m[1,1]*tbl.m[2,2]) / (tbl.m[1,2]*tbl.m[2,1]) # marginal Odds Ratio mosaic( ~ defendant + death,data=Tbl,shade = TRUE, legend = TRUE) mosaic( ~ victim + defendant + death,data=Tbl,shade = TRUE, legend = TRUE) mosaic( ~ defendant + death | victim,data=Tbl,shade = TRUE, legend = TRUE)