# 1.1 MCMC for analysis of of Normal Grouped Data library(LearnBayes) d <- list(int.lo=c(-Inf,seq(66,74,by=2)),int.hi=c(seq(66,74,by=2), Inf),f=c(14,30,49,70,33,15)) cbind(int.lo=c(-Inf,seq(66,74,by=2)),int.hi=c(seq(66,74,by=2), Inf),f=c(14,30,49,70,33,15)) y <- c(rep(65,14),rep(67,30),rep(69,49),rep(71,70),rep(73,33), rep(75,15)) mean(y) log(sd(y)) start <- c(70,1) fit <- laplace(groupeddatapost,start,d) fit modal.sds <- sqrt(diag(fit$var)) proposal <- list(var=fit$var,scale=2) fit2 <- rwmetrop(groupeddatapost,proposal,start,10000,d) fit2$accept plot(1:100,fit2$par[1:100,1],type="b",xlab="Metropolis iteration",ylab="Mu") post.means <- apply(fit2$par,2,mean) post.sds <- apply(fit2$par,2,sd) cbind(c(fit$mode),modal.sds) cbind(post.means,post.sds) mycontour(groupeddatapost,c(69,71,.6,1.3),d) title(xlab="mu",ylab="log sigma") points(fit2$par[5001:10000,1],fit2$par[5001:10000,2]) quantile(fit2$par[,1],c(0.025,0.50,0.975)) fit$mode[1] ; fit$mode[1] + 1.96 * post.sds[1] ; fit$mode[1] - 1.96 * post.sds[1] y <- c(rep(65,14),rep(67,30),rep(69,49),rep(71,70),rep(73,33), rep(75,15)) t.test(y) # 1.2 MCMC Output Analysis library(coda) ??coda start <- c(70,1) fit <- laplace(groupeddatapost,start,d) start <- c(65,1) proposal <- list(var=fit$var,scale=0.2) bayesfit <- rwmetrop(groupeddatapost,proposal,start,10000,d) par(mfrow=c(1,1)) mycontour(groupeddatapost,c(65,71,.6,1.6),d) lines(bayesfit$par[,1],bayesfit$par[,2],xlab="mu",ylab="log-sigma",col=4) dimnames(bayesfit$par)[[2]] <- c("mu","log sigma") #xyplot(mcmc(bayesfit$par[-c(1:2000),]),col="black") xyplot(mcmc(bayesfit$par),col="black") par(mfrow=c(2,1)) autocorr.plot(mcmc(bayesfit$par[-c(1:2000),]),auto.layout=FALSE) summary(mcmc(bayesfit$par[-c(1:2000),])) sqrt(var(bayesfit$par[-c(1:2000),1])) sqrt(var(bayesfit$par[-c(1:2000),1])/8000) #batchSE(mcmc(bayesfit$par[-c(1:2000),]), batchSize=1) batchSE(mcmc(bayesfit$par[-c(1:2000),]), batchSize=50) #batchSE(mcmc(bayesfit$par[-c(1:2000),]), batchSize=500) start=c(70,1) proposal=list(var=fit$var,scale=2.0) bayesfit=rwmetrop(groupeddatapost,proposal,start,10000,d) par(mfrow=c(1,1)) mycontour(groupeddatapost,c(65,71,.6,2),d) lines(bayesfit$par[,1],bayesfit$par[,2],xlab="mu",ylab="log-sigma",col=4) dimnames(bayesfit$par)[[2]] <- c("mu","log sigma") sim.parameters <- mcmc(bayesfit$par[-c(1:2000),]) #xyplot(mcmc(bayesfit$par[-c(1:2000),]),col="black") xyplot(mcmc(bayesfit$par),col="black") par(mfrow=c(2,1)) autocorr.plot(sim.parameters,auto.layout=FALSE) summary(sim.parameters) batchSE(sim.parameters, batchSize=50) # 2. Analysis of heart transplant data # The data set consists of the number of deaths within 30 days of heart transplant surgery # for 94 U.S. hospitals that performed at least 10 heart transplant surgeries. # Also the exposure, the expected number of deaths, is recorded for each hospital. # hearttransplants is a data frame with 94 observations on the following 2 variables. # e = expected number of deaths (the exposure) # y = observed number of deaths within 30 days of heart transplant surgery # Source Christiansen, C. and Morris, C. (1995), Fitting and checking a two-level # Poisson model: modeling patient mortality rates in heart transplant patients, # in Berry, D. and Stangl, D., eds, Bayesian Biostatistics, Marcel Dekker. library(LearnBayes) data(hearttransplants) attach(hearttransplants) rm(y) par(mfcol=c(1,1)) plot(e, y, log="x") plot(e, y/e, log="x", ylab="y/e") plot(e, y/e, log="x", ylab="y/e",pch = as.character(y)) (lambda.hat <- sum(y) / sum(e)) abline(h=lambda.hat,lty=2,col=4) e.ser <- seq(500,13000,length=100) lines(e.ser,lambda.hat + 1.96 * sqrt(lambda.hat / e.ser),lty=2,col=4) lines(e.ser,lambda.hat - 1.96 * sqrt(lambda.hat / e.ser),lty=2,col=4) lines(e.ser,qpois(0.95,e.ser*lambda.hat)/e.ser,lty=1,col=4) lines(e.ser,qpois(0.5,e.ser*lambda.hat)/e.ser,lty=1,col=4) lines(e.ser,qpois(0.05,e.ser*lambda.hat)/e.ser,lty=1,col=4) # 2.1 Equal Mortality Rates? sum(y) sum(e) lambda <- rgamma(1000,shape=277,rate=294681) ys94 <- rpois(1000,e[94]*lambda) hist(ys94,breaks=seq(0.5,max(ys94)+0.5)) lines(c(y[94],y[94]),c(0,120),lwd=3) lambda <- rgamma(1000,shape=277,rate=294681) prob.out <- function(i) sum(rpois(1000,e[i]*lambda)<=y[i])/1001 pout <- sapply(1:94,prob.out) summary(pout) plot(sort(pout)) abline(0,1/94) # 2.2 Modeling a Prior Belief of Exchangeability par(mfcol=c(2,2)) m <- 500 alpha <- c(5,20,80,400) for(j in 1:4) { mu <- rgamma(m,shape=10,rate=10) lambda1 <- rgamma(m,shape=alpha[j],rate=alpha[j]/mu) lambda2 <- rgamma(m,shape=alpha[j],rate=alpha[j]/mu) plot(lambda1,lambda2,main=paste("alpha=",as.character(alpha[j]))) } # 2.3 Simulating from the Posterior poissgamexch datapar <- list(data = hearttransplants, z0 = 0.53) start <- c(2, -7) (fit <- laplace(poissgamexch, start, datapar)) par(mfrow = c(1, 1)) mycontour(poissgamexch, c(0, 8, -7.3, -6.6), datapar) title(xlab="log alpha",ylab="log mu") start <- c(4, -7) fitgibbs <- gibbs(poissgamexch, start, 1000, c(1,.15), datapar) fitgibbs$accept mycontour(poissgamexch, c(0, 8, -7.3, -6.6), datapar) title(xlab="log alpha",ylab="log mu") points(fitgibbs$par[, 1], fitgibbs$par[, 2]) alpha <- exp(fitgibbs$par[, 1]) mu <- exp(fitgibbs$par[, 2]) post.mn.lambda <- rep(NA,94) plot(e,y/e,log="x") abline(h=277/294681) for (i in 1:94) { post.mn.lambda[i] <- mean(rgamma(1000, y[i] + alpha, e[i] + alpha/mu)) arrows(e[i],y[i]/e[i],e[i],post.mn.lambda[i],col=4,length=0.05) } points(e[85],post.mn.lambda[85],col=2,cex=2) # 2.4 which is the best hospital? plot(y/e) abline(h=277/294681) for (i in 1:94) { lambda.ser <- rgamma(1000, y[i] + alpha, e[i] + alpha/mu) q.lambda <- quantile(lambda.ser,prob=c(0.025,0.975)) segments(i,q.lambda[1],i,q.lambda[2],col=4) points(i,mean(lambda.ser),pch=4,col=3) } p.ls.than.85 <- rep(NA,94) lambda.ser.85 <- rgamma(1000, y[85] + alpha, e[85] + alpha/mu) plot(e,post.mn.lambda - post.mn.lambda[85],ylim=c(-0.0005,0.002),pch = 4,col=4) abline(h=0) for (i in 1:94) { lambda.ser <- rgamma(1000, y[i] + alpha, e[i] + alpha/mu) - lambda.ser.85 q.lambda <- quantile(lambda.ser,prob=c(0.05,0.975)) p.ls.than.85[i] <- mean(0 < lambda.ser) if(0.95 < p.ls.than.85[i]) segments(e[i],q.lambda[1],e[i],q.lambda[2],col=2) if(0.95 >= p.ls.than.85[i]) segments(e[i],q.lambda[1],e[i],q.lambda[2],col=3) } p.ls.than.85 sum(0.95 < p.ls.than.85) # 2.5 Posterior predictive model checking sum(y) sum(e) lambda <- rgamma(1000,shape=277,rate=294681) eq.lambda.pred.85 <- rpois(1000,e[85]*lambda) heir.model.pred.85 <- rpois(1000,e[85]*lambda.ser.85) par(mfcol=c(1,2)) hist(eq.lambda.pred.85) hist(heir.model.pred.85) lines(c(y[94],y[94]),c(0,120),lwd=3) pout.eq.mn <- rep(NA,94) pout.heir <- rep(NA,94) for (i in 1:94) { lambda.ser <- rgamma(1000, y[i] + alpha, e[i] + alpha/mu) y.samp <- rpois(1000,e[i]*lambda.ser) pout.heir[i] <- (sum(y.samp < y[i]) + 1/2 * sum(y.samp == y[i]))/1001 lambda.ser <- rgamma(1000,shape=277,rate=294681) y.samp <- rpois(1000,e[i]*lambda.ser) pout.eq.mn[i] <- (sum(y.samp < y[i]) + 1/2 * sum(y.samp == y[i]))/1001 } par(mfcol=c(1,1)) plot(pout.eq.mn,pout.heir,ylim=c(0,1)) abline(0,1,col=3) # 3. Bird extinction example library(LearnBayes) data(birdextinct) attach(birdextinct) logtime=log(time) plot(nesting,logtime) out = (logtime > 3) text(nesting[out], logtime[out], label=species[out], pos = 2) par(mfcol=c(1,2)) plot(jitter(size),logtime,xaxp=c(0,1,1)) plot(jitter(status),logtime,xaxp=c(0,1,1)) ##### Least-squares fit fit <- lm(logtime~nesting+size+status,data=birdextinct,x=TRUE,y=TRUE) summary(fit) ##### Sampling from posterior theta.sample <- blinreg(fit$y,fit$x,5000) par(mfrow=c(2,2)) hist(theta.sample$beta[,2],main="NESTING",xlab=expression(beta[1])) hist(theta.sample$beta[,3],main="SIZE",xlab=expression(beta[2])) hist(theta.sample$beta[,4],main="STATUS",xlab=expression(beta[3])) hist(theta.sample$sigma,main="ERROR SD",xlab=expression(sigma)) apply(theta.sample$beta,2,quantile,c(.05,.5,.95)) quantile(theta.sample$sigma,c(.05,.5,.95)) ###### Mean & predicted extinction log-times cov1 <- c(1,4,0,0) X1 <- rbind(cov1) mean.draws <- blinregexpected(X1,theta.sample) pred.draws <- blinregpred(X1,theta.sample) par(mfrow=c(1,1)) boxplot(list(mean.draws,pred.draws),names=c("Mean","Prediction"),ylab="log TIME") newdata <- data.frame(1,nesting=4,size=0,status=0) predict(fit,newdata,interval ="confidence",level = 0.95, type = "response") quantile(mean.draws,prob=c(0.50,0.025,0.975)) predict(fit,newdata,interval ="prediction",level = 0.95, type = "response") quantile(pred.draws,prob=c(0.50,0.025,0.975)) ###### Estimating mean extinction times cov1 <- c(1,4,0,0) cov2 <- c(1,4,1,0) cov3 <- c(1,4,0,1) cov4 <- c(1,4,1,1) X1=rbind(cov1,cov2,cov3,cov4) mean.draws=blinregexpected(X1,theta.sample) c.labels=c("A","B","C","D") par(mfrow=c(2,2)) for (j in 1:4) hist(mean.draws[,j], main=paste("Covariate set",c.labels[j]),xlab="log TIME") ######## Predicting extinction times cov1=c(1,4,0,0) cov2=c(1,4,1,0) cov3=c(1,4,0,1) cov4=c(1,4,1,1) X1=rbind(cov1,cov2,cov3,cov4) pred.draws=blinregpred(X1,theta.sample) c.labels=c("A","B","C","D") par(mfrow=c(2,2)) for (j in 1:4) hist(pred.draws[,j], main=paste("Covariate set",c.labels[j]),xlab="log TIME") ######### Model checking via posterior predictive distribution pred.draws <- blinregpred(fit$x,theta.sample) pred.sum <- apply(pred.draws,2,quantile,c(.05,.95)) par(mfrow=c(1,1)) ind <- 1:length(logtime) matplot(rbind(ind,ind),pred.sum,type="l",lty=1,col=1, xlab="INDEX",ylab="log TIME") points(ind,logtime,pch=19) out <- (logtime>pred.sum[2,]) text(ind[out], logtime[out], label=species[out], pos = 4)