#   Input data table 

col.1 <- c(62,11, 2,11, 1, 1, 3, 1, 1)
col.2 <- c(17, 7, 3, 3, 4, 0, 0, 0, 0)
col.3 <- c( 5, 0, 1, 0, 0, 1, 0, 0, 0)
col.4 <- c(90,22, 2,21, 6, 2, 2, 2, 0)
col.5 <- c(42,18, 0,13, 9, 1, 1, 1, 0)
col.6 <- c( 3, 1, 1, 2, 0, 1, 0, 0, 0)
col.7 <- c(74,19, 1,20, 6, 4, 9, 4, 1) 
col.8 <- c(31,14, 3, 8, 5, 3, 2, 2, 2)
col.9 <- c(11, 3, 1, 3, 2, 1, 1,0 , 3)

( cbind(col.1,col.2,col.3,col.4,col.5,col.6,col.7,col.8,col.9) )

counts <- c(col.1,col.2,col.3,col.4,col.5,col.6,col.7,col.8,col.9) 

E <- factor(rep(c(1,1,1,2,2,2,3,3,3),9))
H <- factor(rep(1:3,3*9))
L <- factor(rep(rep(1:3,each=9),3))
C <- factor(rep(1:3,each=3*9))

( data <- data.frame(E,H,L,C,counts))

# Seif aleph:


fit.0 <- glm(counts ~ 1, family="poisson", data=data)
fit.1 <- glm(counts ~ E + H + L + C, family="poisson", data=data)
fit.2 <- glm(counts ~ (E + H + L + C)^2, family="poisson", data=data)
fit.3 <- glm(counts ~ (E + H + L + C)^3, family="poisson", data=data)

anova(fit.0,fit.1,fit.2,fit.3)

# The Anova reveals that the 2nd order interaction model (fit.2) fits the data:
# 1. its residual deviance is small 
# 2. the residual deviance of the main effect model is too large  
# 3. the 3rd order interaction model (fit.3) is not significantly better
#    than the 2nd order interaction model.


# Seif bet & gimmel:

library(MASS)
stepAIC(fit.3)

fit.12 <- glm(counts ~ E+H+L+C+E*H+H*L+E*C+L*C,family="poisson", data=data)  # model fitted by stepAIC
fit.11 <- glm(counts ~ E+H+L+C+H*E+H*L,family="poisson", data=data)          # model suggested in seif gimmel

anova(fit.0,fit.1,fit.11,fit.12,fit.2,fit.3)
anova(fit.12)

# fit.12 is the model chosen by the stepAIC, fit.11 is the model suggested seif gimmel
# the anova reveals that fit.12 is significantly better than fit.11.
# however, the output of stepAIC further reveals that the two terms dropped
# in fit.11 -- E*C & L*C -- are the least significant terms in fit.12

 
# Seif dalet:

delta.0   <- (fit.0$fit  - fit.0$y) / (2*sum(fit.0$y))
delta.1   <- (fit.1$fit  - fit.0$y) / (2*sum(fit.0$y))
delta.11  <- (fit.11$fit - fit.0$y) / (2*sum(fit.0$y))
delta.12  <- (fit.12$fit - fit.0$y) / (2*sum(fit.0$y))
delta.2   <- (fit.2$fit  - fit.0$y) / (2*sum(fit.0$y))

apply(abs(cbind(delta.0,delta.1,delta.11,delta.12,delta.2)),2,sum)

# The model picked by stepAIC has high GINI dissimalarity: 5.5%  
# this is caused by the fact that this is a relatively sparse table -- 607 obs on 81 cells ....



# Seif Heh:

summary(fit.11)

# The relation between the variables is expressed in the interaction terms,
# I copied the relevant lines in the output:

# Coefficients:
#             Estimate Std. Error z value Pr(>|z|)    
# (Intercept)  3.90715    0.09960  39.227  < 2e-16 ***
#
#			.
#			.
#			.
#
# E2:H2        0.36231    0.23698   1.529 0.126298    
# E3:H2        0.67247    0.41118   1.635 0.101951    
# E2:H3        1.41968    0.39773   3.569 0.000358 ***
# E3:H3        2.23061    0.52230   4.271 1.95e-05 ***
# H2:L2        0.73226    0.20622   3.551 0.000384 ***
# H3:L2        0.76043    0.40832   1.862 0.062557 .  
# H2:L3       -0.02703    0.47326  -0.057 0.954457    
# H3:L3        2.01605    0.47535   4.241 2.22e-05 ***

# Note that in 4-way tables the 2-way interaction terms express the odds ratio
# between two variables when the two remaining variables are kept constant.

# 1. relation between H and L (we will just study the 4 odds ratio fo which we have 
#	standard error in the output, i.e. OR in relation to the 1st category -- too little):
# a. OR of H1 -> H2 and L1 -> L2: a very signifcant     exp( 0.73) = 2.07
# b. OR of H1 -> H2 and L1 -> L3: an insignificant      exp(-0.03) 
# c. OR of H1 -> H3 and L1 -> L2: marginally signifcant exp( 0.76) = 2.14
# d. OR of H1 -> H3 and L1 -> L3: a very significant    exp( 2.02) = 7.53
# computation of the other OR is a bit more difficult and to compute the s.e. we need the cov matrix
#  OR of H2 -> H3 and L2 -> L3: exp(0.73 + 2.02 - 0.76 - -0.03) = 7.53
#  ==> strong agreement in views on spending on health and law enforcement

# 2. We also see a similar relation between E & H: the interesting exception
#    is the very strong interaction E2:H3 -- expressing a view that support in health is 
#    too high and support in environioment is ok.


# Seif Vav:

H.2 <- ifelse(H==1,rep(0,81),rep(1,81))
(data.l <- cbind(data,H.2))

fitl.1 <- glm(H.2 ~ E + L + C, data = data.l, family = binomial, weight = counts) 
summary(fitl.1)


# In the logistic regression we see the same dependence between
# the view on spending on health and spending on environemnt and law enforcement


#	Mosaic plot for Florida death penalty example

library(vcd)

vic.race<-c("white","black") 
def.race<-vic.race 
death.penalty<-c("yes", "no") 
datalabel<-list(defendant=def.race,death=death.penalty,victim=vic.race) 
table.2.6<- expand.grid(defendant=def.race,death=death.penalty,victim=vic.race) 
data<-c(53, 11, 414, 37, 0, 4, 16, 139) 
table.2.6<-cbind(table.2.6,count=data) 
Tbl	<- xtabs(count~defendant+death+victim ,data=table.2.6) 

tbl.1 <- xtabs(count~defendant+death+victim ,data=table.2.6)[,,1] 
tbl.2 <- xtabs(count~defendant+death+victim ,data=table.2.6)[,,2]
tbl.m <- apply(xtabs(count~defendant+death+victim ,data=table.2.6),c(1,2),sum) 

(tbl.1[1,1]*tbl.1[2,2]) / (tbl.1[1,2]*tbl.1[2,1])  # strata 1  Odds Ratio
(tbl.2[1,1]*tbl.2[2,2]) / (tbl.2[1,2]*tbl.2[2,1])  # strata 2  Odds Ratio

(tbl.m[1,1]*tbl.m[2,2]) / (tbl.m[1,2]*tbl.m[2,1])   # marginal Odds Ratio

mosaic(  ~ defendant  + death,data=Tbl,shade = TRUE, legend = TRUE)
mosaic(  ~ victim +  defendant  + death,data=Tbl,shade = TRUE, legend = TRUE)
mosaic(  ~  defendant  + death | victim,data=Tbl,shade = TRUE, legend = TRUE)