# 1. Bayes classifier for the two group model with simple null hypothesis z.vec <- seq(8,-4,length = 1000) pi.0 <- 0.90 mu.1 <- 3 f.0 <- dnorm(z.vec,mean=0,sd=1) f.1 <- dnorm(z.vec,mean=mu.1,sd=1) f <- pi.0*f.0 + (1-pi.0)*f.1 fdr.vec <- pi.0*f.0 / f Fdr.vec <- cumsum(fdr.vec * f) / cumsum(f) plot(z.vec,f.0,type="l",col=2,ylim=c(0,1),xlab="Z",ylab="",lwd=2,main = "Bayes classifiers for the two group model") lines(z.vec,f.1,type="l",col=3,lwd=2) lines(z.vec,f,type="l",col=4,lwd=2) lines(z.vec,fdr.vec,type="l",col=5,lwd=2) legend(5,0.8,lwd=2,c("f.0","f.1","f","fdr"),col=c(2,3,4,5)) abline(v = 0,col=2,lty = 2) abline(v = 3,col=3,lty = 2) plot(z.vec,f,type="l",col=4,ylim=c(0,1),xlab="Z",ylab="Fdr and fdr",lwd=2,main = "") lines(z.vec,fdr.vec,type="l",col=5,lwd=2) lines(z.vec,Fdr.vec,type="l",col=6,lwd=2) legend(5,0.8,lwd=2,c("f","fdr","Fdr"),col=c(4,5,6)) (z.05 <- approx(Fdr.vec,z.vec,xout=0.05)$y) abline(h = 0.05,col=6,lty=2) arrows(z.05,0.05,z.05,-.02,length=.05,lwd=1,col=6,code=2,angle=45) # 1.1 Two group model simulation m <- 10^5 H.smp <- rbinom(m,1,1 - pi.0) Z.smp <- rnorm(m,mean = H.smp*3) P.smp <- 1 - pnorm(Z.smp,mean=0) f0.smp <- dnorm(Z.smp,mean = 0) f1.smp <- dnorm(Z.smp,mean = 3) fdr.smp <- pi.0*f0.smp / (pi.0*f0.smp + (1-pi.0)*f1.smp) # Bayes classifier results sum(z.05 <= Z.smp) mean(1 - H.smp[z.05 <= Z.smp]) # locfdr results #library(locfdr) a <- locfdr(Z.smp,nulltype=0) round(a$mat,3) fdr.ord <- order(a$fdr,decreasing = F) plot(z.vec,fdr.vec,type = "l",xlim = c(2.5,4),ylim = c(0,0.6)) # fdr estimates lines(Z.smp[fdr.ord],a$fdr[fdr.ord],col=4) Fdr.locfdr <- cumsum(a$fdr[fdr.ord]) / (1:m) sum(Fdr.locfdr <= 0.05) # BH procedure results P.adj <- p.adjust(P.smp,method="BH") sum(P.adj <= 0.05) mean(1 - H.smp[P.adj <= 0.05]) (pi.0.hat <- 2*mean(P.smp > 0.5)) mean(1 - H.smp[P.adj <= 0.05/pi.0.hat]) sum(P.adj <= 0.05/pi.0.hat) z.ord <- order(Z.smp,decreasing = F) # Comparison of eBayes methods plot(z.vec,Fdr.vec,type="l",col=2,ylim=c(0,1),xlab="Z",ylab="Fdr",main = "Comparison of Fdr estimators",lty = 2) lines(a$mat[,1],a$mat[,4],col=3,lwd = 2) lines(Z.smp[z.ord],P.adj[z.ord] * pi.0.hat ,col=4,lwd = 2) ################################################################################################ # 2. Bayes classifier for continuous parmaeter ################################################################################################ theta.vec <- seq(-6,6,length = 1000) pi.theta <- 0.5*0.9*dexp(abs(theta.vec),3) + 0.5*0.1*dexp(abs(theta.vec),1) sum(pi.theta)*(theta.vec[2]-theta.vec[1]) plot(theta.vec,pi.theta,type="l",main = "Theta distribution") z.vec <- seq(10,-10,length = 5000) f.0 <- rep(0,5000) f.1 <- rep(0,5000) for(i in 1:500) f.0 <- f.0 + dnorm(z.vec,mean = theta.vec[i])*pi.theta[i] for(i in 501:1000) f.1 <- f.1 + dnorm(z.vec,mean = theta.vec[i])*pi.theta[i] f.0 <- f.0 / sum(f.0) f.1 <- f.1 / sum(f.1) plot(z.vec,f.0/(z.vec[1] - z.vec[2]),type="l",main = "Theta and Z distributions",col=2,xlim = c(-4,4),ylim=c(0,1.5) ) lines(z.vec,f.1/(z.vec[1] - z.vec[2]),col=3) lines(theta.vec,pi.theta / (sum(pi.theta)*(theta.vec[2] - theta.vec[1])),col = 4) f <- 0.5*f.0 + 0.5*f.1 fdr.vec <- 0.5*f.0 / f Fdr.vec <- cumsum(fdr.vec * f) / cumsum(f) Fdr.qvalue <- 0.5 * (1 - pnorm(z.vec)) / cumsum(f) plot(z.vec,fdr.vec,type="l",col=5,lwd=2,main = "fdr and Fdr values") lines(z.vec,Fdr.vec,col=4,lwd = 2) lines(z.vec,Fdr.qvalue,col=4,lwd = 2,lty = 2) abline(v = 0,lty = 2) abline(h = 1,lty = 2) abline(h = 0.5,lty = 2) abline(h = 0.05,lty = 2) (z.05 <- approx(Fdr.vec,z.vec,xout = 0.05)$y) m <- 10^5 theta.smp <- c(rexp(0.9*m,3),rexp(0.1*m,1))*sample(c(-1,1),m,replace = T) z.smp <- rnorm(m,theta.smp) sum(z.05 < z.smp) sum(theta.smp[z.05 < z.smp] < 0) mean(theta.smp[z.05 < z.smp] < 0) # How will the BH procedure work? p.smp <- 1 - pnorm(z.smp) p.adj <- p.adjust(p.smp,method="BH") sum(p.adj <= 0.05) sum(p.adj/2 <= 0.05) mean(theta.smp[p.adj/2 <= 0.05] < 0) lines(z.smp[order(z.smp)],p.adj[order(z.smp)]*0.5,col=2) ################################################################################################ # 3. Bayes classifier for 2D discrete parmaeter ################################################################################################ # Motivating simulation m <- 10^5 theta.smp <- sample(1:4,m,replace = T,prob = c(0.85,0.05,0.05,0.05)) h.1 <- (theta.smp == 3 | theta.smp == 4) + 0 h.2 <- (theta.smp == 2 | theta.smp == 4) + 0 z.smp <- cbind(rnorm(m,mean = 3*h.1),rnorm(m,mean = 3*h.2)) p1.smp <- 1 - pnorm(z.smp[,1]) p2.smp <- 1 - pnorm(z.smp[,2]) p.no.assoc <- 1 - pchisq(-2*(log(p1.smp) + log(p2.smp)),2*2) p.no.rep <- pmax(p1.smp,p2.smp) h.no.assoc <- (h.1 == 1 | h.2 == 1) + 0 h.no.rep <- (h.1 == 1 & h.2 == 1) + 0 sum(p.adjust(p.no.assoc,method = "BH") <= 0.05) mean(1 - h.no.assoc[p.adjust(p.no.assoc,method = "BH") <= 0.05]) sum(p.adjust(p.no.rep,method = "BH") <= 0.05) mean(1 - h.no.rep[p.adjust(p.no.rep,method = "BH") <= 0.05]) f.00.smp <- dnorm(z.smp[,1],mean=0) * dnorm(z.smp[,2],mean=0) f.01.smp <- dnorm(z.smp[,1],mean=0) * dnorm(z.smp[,2],mean=3) f.10.smp <- dnorm(z.smp[,1],mean=3) * dnorm(z.smp[,2],mean=0) f.11.smp <- dnorm(z.smp[,1],mean=3) * dnorm(z.smp[,2],mean=3) fdr.no.assoc.smp <- 0.85*f.00.smp / (0.85*f.00.smp + 0.05*f.10.smp + 0.05*f.01.smp + 0.05*f.11.smp) Fdr.no.assoc.smp <- rep(NA,m) Fdr.no.assoc.smp[order(fdr.no.assoc.smp)] <- cumsum(fdr.no.assoc.smp[order(fdr.no.assoc.smp)]) / (1:m) sum(Fdr.no.assoc.smp <= 0.05) mean(1 - h.no.assoc[Fdr.no.assoc.smp <= 0.05]) sum(p.adjust(p.no.assoc,method = "BH")*0.85 <= 0.05) fdr.no.rep.smp <- (0.85*f.00.smp + 0.05*f.10.smp + 0.05*f.01.smp) / (0.85*f.00.smp + 0.05*f.10.smp + 0.05*f.01.smp + 0.05*f.11.smp) Fdr.no.rep.smp <- rep(NA,m) Fdr.no.rep.smp[order(fdr.no.rep.smp)] <- cumsum(fdr.no.rep.smp[order(fdr.no.rep.smp)]) / (1:m) sum(Fdr.no.rep.smp <= 0.05) mean(1 - h.no.rep[Fdr.no.rep.smp <= 0.05]) # Graphical explanation..... x.vec <- seq(-3,6,length=500) y.vec <- seq(-3,6,length=500) mu.00 <- c(0,0) mu.01 <- c(0,3) mu.10 <- c(3,0) mu.11 <- c(3,3) f.00 <- outer(dnorm(x.vec,mean=mu.00[1]),dnorm(y.vec,mean=mu.00[2])) f.01 <- outer(dnorm(x.vec,mean=mu.01[1]),dnorm(y.vec,mean=mu.01[2])) f.10 <- outer(dnorm(x.vec,mean=mu.10[1]),dnorm(y.vec,mean=mu.10[2])) f.11 <- outer(dnorm(x.vec,mean=mu.11[1]),dnorm(y.vec,mean=mu.11[2])) Fdr.fdr <- f.00 * 0 Fdr.p <- f.00 * 0 Fdr.BH <- f.00 * 0 px.outr <- 1 - pnorm(outer(x.vec,rep(1,500))) py.outr <- 1 - pnorm(outer(rep(1,500),y.vec)) p.no.assoc <- 1 - pchisq(-2*(log(px.outr) + log(py.outr)),2*2) p.no.rep <- pmax(px.outr,py.outr) # 2.1 Plots for testing no association pi.00 <- 0.85 pi.01 <- 0.05 pi.10 <- 0.05 pi.11 <- 0.05 f <- pi.00*f.00 + pi.01*f.01 + pi.10*f.10 + pi.11*f.11 f.0 <- pi.00*f.00 f.1 <- pi.01*f.01 + pi.10*f.10 + pi.11*f.11 fdr <- f.0 / f ord.fdr <- order(fdr) p.outr <- p.no.assoc ord.p <- order(p.outr) Fdr.fdr[ord.fdr] <- cumsum((fdr*f)[ord.fdr]) / cumsum((f)[ord.fdr]) Fdr.p[ord.p] <- cumsum((fdr*f)[ord.p]) / cumsum((f)[ord.p]) Fdr.BH[ord.p] <- p.outr[ord.p] / (cumsum((f)[ord.p]) / sum(f)) Fdr.fdr <- array(dim=c(500,500),Fdr.fdr) Fdr.p <- array(dim=c(500,500),Fdr.p) Fdr.BH <- array(dim=c(500,500),Fdr.BH) contour(x.vec,y.vec,f.0,xlim=c(-2,5),ylim=c(-2,5),col=2,xlab="Z1",ylab="Z2") contour(x.vec,y.vec,f.1,col=3,add=T) points(mu.00[1],mu.00[2],col=2,pch=3,lwd=2) points(mu.01[1],mu.01[2],col=3,pch=3,lwd=2) points(mu.10[1],mu.10[2],col=3,pch=3,lwd=2) points(mu.11[1],mu.11[2],col=3,pch=3,lwd=2) contour(x.vec,y.vec,Fdr.fdr,levels=0.05,col=6,add=T,lwd = 3) contour(x.vec,y.vec,Fdr.fdr,levels=0.05,col=6,xlim=c(-2,5),ylim=c(-2,5),lwd=2,xlab="Z1",ylab="Z2") contour(x.vec,y.vec,Fdr.p,levels=0.05,col=3,add=T,lwd = 2) contour(x.vec,y.vec,Fdr.BH,levels=0.05,col=4,add=T,lwd = 2) legend(-1,5,c("Fdr=0.05 Bayes classifier","Fdr=0.05 p-value based classifier", "BH estimated Fdr=0.05 p-value based classifier"),lty=1,col=c(6,3,4),lwd=2) # Discovery probability comparison sum(f[Fdr.fdr <= 0.05]) / sum(f) sum(f[Fdr.p <= 0.05]) / sum(f) sum(f[Fdr.BH <= 0.05]) / sum(f) # 2.2 Plots for testing no association DIFFERENT HYPERPARAMETER values pi.00 <- 0.85 pi.01 <- 0.00 pi.10 <- 0.15 pi.11 <- 0.00 f <- pi.00*f.00 + pi.01*f.01 + pi.10*f.10 + pi.11*f.11 f.0 <- pi.00*f.00 f.1 <- pi.01*f.01 + pi.10*f.10 + pi.11*f.11 fdr <- f.0 / f ord.fdr <- order(fdr) p.outr <- p.no.assoc ord.p <- order(p.outr) Fdr.fdr[ord.fdr] <- cumsum((fdr*f)[ord.fdr]) / cumsum((f)[ord.fdr]) Fdr.p[ord.p] <- cumsum((fdr*f)[ord.p]) / cumsum((f)[ord.p]) Fdr.BH[ord.p] <- p.outr[ord.p] / (cumsum((f)[ord.p]) / sum(f)) Fdr.fdr <- array(dim=c(500,500),Fdr.fdr) Fdr.p <- array(dim=c(500,500),Fdr.p) Fdr.BH <- array(dim=c(500,500),Fdr.BH) contour(x.vec,y.vec,f.0,xlim=c(-2,5),ylim=c(-2,5),col=2,xlab="Z1",ylab="Z2") contour(x.vec,y.vec,f.1,col=3,add=T) points(mu.00[1],mu.00[2],col=2,pch=3,lwd=2) points(mu.01[1],mu.01[2],col=3,pch=3,lwd=2) points(mu.10[1],mu.10[2],col=3,pch=3,lwd=2) points(mu.11[1],mu.11[2],col=3,pch=3,lwd=2) contour(x.vec,y.vec,Fdr.fdr,levels=0.05,col=6,add=T,lwd = 3) contour(x.vec,y.vec,Fdr.fdr,levels=0.05,col=6,xlim=c(-2,5),ylim=c(-2,5),lwd=2,xlab="Z1",ylab="Z2") contour(x.vec,y.vec,Fdr.p,levels=0.05,col=3,add=T,lwd = 2) contour(x.vec,y.vec,Fdr.BH,levels=0.05,col=4,add=T,lwd = 2) legend(-1,5,c("Fdr=0.05 Bayes classifier","Fdr=0.05 p-value based classifier", "BH estimated Fdr=0.05 p-value based classifier"),lty=1,col=c(6,3,4),lwd=2) sum(f[Fdr.fdr <= 0.05]) / sum(f) sum(f[Fdr.p <= 0.05]) / sum(f) sum(f[Fdr.BH <= 0.05]) / sum(f) # 2.3 Testing no replicability pi.00 <- 0.85 pi.01 <- 0.05 pi.10 <- 0.05 pi.11 <- 0.05 f <- pi.00*f.00 + pi.01*f.01 + pi.10*f.10 + pi.11*f.11 f.0 <- pi.00*f.00 + pi.01*f.01 + pi.10*f.10 f.1 <- pi.11*f.11 fdr <- f.0 / f ord.fdr <- order(fdr) p.outr <- p.no.rep ord.p <- order(p.outr) Fdr.fdr[ord.fdr] <- cumsum((fdr*f)[ord.fdr]) / cumsum((f)[ord.fdr]) Fdr.p[ord.p] <- cumsum((fdr*f)[ord.p]) / cumsum((f)[ord.p]) Fdr.BH[ord.p] <- p.outr[ord.p] / (cumsum((f)[ord.p]) / sum(f)) Fdr.fdr <- array(dim=c(500,500),Fdr.fdr) Fdr.p <- array(dim=c(500,500),Fdr.p) Fdr.BH <- array(dim=c(500,500),Fdr.BH) contour(x.vec,y.vec,f.0,levels=c(.001,.005,seq(.01,.15,length=15)),xlim=c(-2,5),ylim=c(-2,5),col=2,xlab="Z1",ylab="Z2") contour(x.vec,y.vec,f.1,col=3,add=T) points(mu.00[1],mu.00[2],col=2,pch=3,lwd=2) points(mu.01[1],mu.01[2],col=2,pch=3,lwd=2) points(mu.10[1],mu.10[2],col=2,pch=3,lwd=2) points(mu.11[1],mu.11[2],col=3,pch=3,lwd=2) contour(x.vec,y.vec,Fdr.fdr,levels=0.05,col=6,add=T,lwd = 3) contour(x.vec,y.vec,Fdr.fdr,levels=0.05,col=6,xlim=c(-2,5),ylim=c(-2,5),lwd=2,xlab="Z1",ylab="Z2") contour(x.vec,y.vec,Fdr.p,levels=0.05,col=3,add=T,lwd = 2) contour(x.vec,y.vec,Fdr.BH,levels=0.05,col=4,add=T,lwd = 2) legend(-1,0,c("Fdr=0.05 Bayes classifier","Fdr=0.05 p-value based classifier", "BH estimated Fdr=0.05 p-value based classifier"),lty=1,col=c(6,3,4),lwd=2) sum(f[Fdr.fdr <= 0.05]) / sum(f) sum(f[Fdr.p <= 0.05]) / sum(f) sum(f[Fdr.BH <= 0.05]) / sum(f) max(p.outr[Fdr.p <= 0.05]) / (sum((f)[Fdr.p <= 0.05]) / sum(f))