library(VGAM) library(LearnBayes) data(cancermortality) ? cancermortality y.vec <- cancermortality$y n.vec <- cancermortality$n rbind(y.vec,n.vec) par(mfcol=c(1,1)) plot(n.vec, y.vec / n.vec,log="x") (p.hat <- sum(y.vec) / sum(n.vec)) summary(y.vec / n.vec) abline(h=p.hat) abline(h = qbeta(.025,.5 + sum(y.vec),.5 + sum(n.vec) - sum(y.vec)),col=2,lty=2) abline(h = qbeta(.975,.5 + sum(y.vec),.5 + sum(n.vec) - sum(y.vec)),col=2,lty=2) #-------------------------------- # 1. Explain why fit a negative binomial # By comparing y_1 ... y_20 to their predicitve distribution # Assumption: p.i ~ beta(.5 + sum(y.vec),.5 + sum(n.vec) - sum(y.vec) ) & y.i | p.i ~ binom(n.i,p.i) # Thus: y.i ~ beta.binom(.5 + sum(y.vec),.5 + sum(n.vec) - sum(y.vec) ) y.pred.quant<- rep(NA,20) for(i in 1:20) y.pred.quant[i] <- pbetabinom.ab(y.vec[i], n.vec[i], .5 + sum(y.vec),.5 + sum(n.vec) - sum(y.vec)) cbind(y.vec,n.vec,y.pred.quant) summary(rbetabinom.ab(1000, n.vec[15], .5 + sum(y.vec),.5 + sum(n.vec) - sum(y.vec))) dbetabinom.ab(0:5, n.vec[19], .5 + sum(y.vec),.5 + sum(n.vec) - sum(y.vec)) pbetabinom.ab(0:5, n.vec[19], .5 + sum(y.vec),.5 + sum(n.vec) - sum(y.vec)) # 2. Use learnbayes function to plot posterior # Beta-binomial model # y ~ binom(n,p) # p ~ beta(alpha,beta) # Parameterization # mean -- eta = alpha / (alpha + beta) # precision -- K = alpha + beta # Prior # g(eta,K) \propto (eta^-1 *(1-eta)^-1 *(1+K)^-2 betabinexch0 mycontour mycontour(betabinexch0,c(.0001,.003,1,20000),cancermortality) title(xlab="eta",ylab="K") mycontour(betabinexch,c(-8,-4.5,3,16.5),cancermortality) title(xlab="logit eta") 3. Laplace method approximation (fit <- laplace(betabinexch,c(-7,6),cancermortality)) points(fit$mode[1],fit$mode[2],pch=3) npar <- list(m=fit$mode,v=fit$var) mycontour(lbinorm,c(-8,-4.5,3,16.5),npar) title(xlab="logit eta", ylab="log K") (a <- fit$mode[1] + 1.96 * sqrt(fit$var[1,1])) (b <- fit$mode[1] - 1.96 * sqrt(fit$var[1,1])) exp(a) / (1 + exp(a)) exp(b) / (1 + exp(b)) (eta.mode <- exp(fit$mode[1]) / (1+exp(fit$mode[1]))) (K.mode <- exp(fit$mode[2])) eta.mode * K.mode # a - parameter estimate (1-eta.mode) * K.mode # b - parameter estimate # 4. Rejection Sampling betabinT=function(theta,datapar) { data <- datapar$data tpar <- datapar$par d <- betabinexch(theta,data)-dmt(theta,mean=c(tpar$m), S=tpar$var,df=tpar$df,log=TRUE) return(d) } tpar <- list(m=fit$mode,var=2*fit$var,df=4) datapar <- list(data=cancermortality,par=tpar) datapar <- list(par=tpar,data=cancermortality) start <- c(-7,12) fit1 <- laplace(betabinT,start,datapar) fit1$mode betabinT(fit1$mode,datapar) theta <- rejectsampling(betabinexch,tpar,-569.28,10000,cancermortality) #theta <- rejectsampling(betabinexch,tpar,-568,10000,cancermortality) #theta <- rejectsampling(betabinexch,tpar,-590,10000,cancermortality) dim(theta) mycontour(betabinexch,c(-8,-4.5,3,16.5),cancermortality) title(xlab="logit eta",ylab="log K") points(theta[,1],theta[,2]) quantile(theta[,1],c(0.02,0.5,0.975)) mean(theta[,1]) # 5. Importance Sampling tpar <- list(m=fit$mode,var=2*fit$var,df=4) impsampling.DY <- function (logf, tpar, h, n, data) { theta <- rmt(n, mean = c(tpar$m), S = tpar$var, df = tpar$df) lf <- logf(theta, data) lp <- dmt(theta, mean = c(tpar$m), S = tpar$var, df = tpar$df,log = TRUE) md <- max(lf - lp) wt <- exp(lf - lp - md) h.vec <- apply(theta,1,h) est <- sum(wt * h.vec)/sum(wt) SEest <- sqrt(sum((h.vec - est)^2 * wt^2))/sum(wt) return(list(est = est, se = SEest, theta = theta, wt = wt, h.vec =h.vec )) } myfunc <- function(theta) return(theta[2]) (s <- impsampling.DY(betabinexch,tpar,myfunc,10,cancermortality)) # 6. Sampling Importance Resampling sir bayes.influence.DY <- function (theta, data) { y = data[, 1] n = data[, 2] N = length(y) summary.obs = array(0, c(N, 3)) K = exp(theta[, 2]) eta = exp(theta[, 1])/(1 + exp(theta[, 1])) m = length(K) summary = quantile(eta, c(0.05, 0.5, 0.95)) for (i in 1:N) { weight = exp(lbeta(K * eta, K * (1 - eta)) - lbeta(K * eta + y[i], K * (1 - eta) + n[i] - y[i])) probs = weight/sum(weight) indices = sample(1:m, size = m, prob = probs, replace = TRUE) theta.s = theta[indices, ] summary.obs[i, ] = quantile( exp(theta.s[, 1]) / (1 +exp(theta.s[, 1])), c(0.05, 0.5, 0.95)) } return(list(summary = summary, summary.obs = summary.obs)) } theta.s <- sir(betabinexch,tpar,10000,cancermortality) S <- bayes.influence.DY(theta.s,cancermortality) par(mfcol=c(1,1)) plot(c(0,0,0),S$summary,type="b",lwd=3,xlim=c(-1,21),ylim=c(0.0005,0.0022), xlab="Observation removed",ylab="eta") for (i in 1:20) lines(c(i,i,i),S$summary.obs[i,],type="b") cbind(y.vec,n.vec,y.vec / n.vec)