# 0. Accident data of Simar (1976) -- Example 3.2.3, Carlin and Louis page 60 y.vec <- 0:7 count.vec <- c(7840,1317,239,42,14,4,4,1) cbind(y.vec,count.vec, round(count.vec/sum(count.vec),5)) (mn.y <- sum(y.vec * count.vec) / sum(count.vec)) (var.y <-sum((y.vec-mn.y)^2 * count.vec) / sum(count.vec)) # 1. eBayes analysis of Robbins(1955) count.vec / sum(count.vec) y.Robbins <- rep(NA,8) for(i in 1:8) y.Robbins[i] <- y.vec[i+1]*count.vec[i+1]/count.vec[i] cbind(y.vec,count.vec, round(count.vec/sum(count.vec),5),round(y.Robbins,3)) # 2. The Gamma Poisson model # y | lambda ~ Poisson(lambda) and lambda ~ Gamma(shape = r, scale = theta) ==> y ~ NB(size = r, prob = 1/(1+theta)) # where the Gamma mean is r*theta and the Gamma variance is r*theta^2 # and the NB mean is r*theta and the NB variance is r*theta*(1+theta) r <- 3 theta <- 2 mean(rgamma(10^5,shape=r,scale=theta)) mean(rpois(10^5,rgamma(10^5,shape=r,scale=theta))) mean(rnbinom(10^5,size = r,prob = 1/(1+theta))) var(rgamma(10^5,shape=r,scale=theta)) var(rpois(10^5,rgamma(10^5,shape=r,scale=theta))) var(rnbinom(10^5,size = r,prob = 1/(1+theta))) # This yields method of moments estimates for r and theta (theta.MM <- var.y/ mn.y - 1) (r.MM <- mn.y/theta.MM) # Posterior distribution of lambda | y is Gamma( shape = r+y, scale = theta/(theta+1)) with posterior mean (r+y)*theta/(1+theta) gamma.smp <- rgamma(10^6,shape=r.MM,scale=theta.MM) y.smp <- rpois(10^6,gamma.smp) round(sapply(split(gamma.smp,y.smp),mean),3) y.GP <- (y.vec+r.MM)*theta.MM/(1+theta.MM) cbind(y.vec,count.vec, round(count.vec/sum(count.vec),5),round(y.Robbins,3),round(y.GP,3)) # 3. NPLM using the EM ######################################################## # Solution in Carlin and Louis Page 62, Table 3.2 m <- 4 mu.CL <- c(0.089,0.580,3.176,3.669) pi.CL <- c(0.7600,0.2362,0.0036,0.0002) # Actual probability vector has pi_3 = 0.0037 but then sum of probs 1.0001 lik.mat.y <- array(dpois(rep(y.vec,m),rep(mu.CL,each = 8)),dim=c(8,m)) sum(count.vec * log( (lik.mat.y * rep(pi.CL,each = 8)) %*% cbind(rep(1,m)) ) ) # log-likelihood for this solution # posterior mean y.CL <- apply(lik.mat.y * rep(pi.CL,each = 8)* rep(mu.CL,each = 8),1,sum) / apply(lik.mat.y * rep(pi.CL,each = 8),1,sum) cbind(y.vec,count.vec, round(count.vec/sum(count.vec),5),round(y.Robbins,3),round(y.GP,3),round(y.CL,3)) ######################################################### n.EM <- 10000 #m <- 40 m <- 400 #m <- 4 #mu.vec <- seq(0.01,8,length = m) #mu.vec <- seq(0.01,4,length = m) mu.vec <- c(seq(0.01,4,length = m-4),c(0.089,0.580,3.176,3.669)) #mu.vec <- c(0.089,0.580,3.176,3.669) loglik.EM <- rep(NA,n.EM) pi.mat <- array(dim=c(n.EM,m)) #pi.mat[1,] <- rep(1/m,m) pi.mat[1,] <- c(0.50,rep(0.5/(m-1),m-1)) #pi.mat[1,] <- c(0.001*rep(1/(m-4),m-4),0.999*c(0.7600,0.2362,0.0036,0.0002)) #pi.mat[1,] <- c(0.7600,0.2362,0.0036,0.0002) w.mat <- array(dim=c(8,m)) lik.mat.y <- array(dpois(rep(y.vec,m),rep(mu.vec,each = 8)),dim=c(8,m)) loglik.EM[1]<- sum(count.vec * log((lik.mat.y * rep(pi.mat[1,],each = 8)) %*% cbind(rep(1,m)))) for(i in 2:n.EM) { if(i %% 1000 == 1) print(i) tmp.mat <- array(dpois(rep(y.vec,m),rep(mu.vec,each = 8)),dim=c(8,m)) * rep(pi.mat[i-1,],each = 8) w.mat <- tmp.mat / c(tmp.mat %*% cbind(rep(1,m))) pi.prop <- rbind(count.vec) %*% w.mat pi.mat[i,] <- pi.prop / sum(pi.prop) loglik.EM[i]<- sum(count.vec * log((lik.mat.y * rep(pi.mat[i,],each = 8)) %*% cbind(rep(1,m)))) } par(mfcol=c(1,2)) plot(1:n.EM,loglik.EM,type="l",xlab = "Iteration") plot(mu.vec,pi.mat[n.EM,],xlab = "mu vector",ylab = "pi vector") loglik.EM[n.EM]