# ************ # the function # ************ MSFDR <- function( minimal.lm, maximal.lm , q = 0.05 , print.the.steps = T, print.running.time = print.the.steps) { # computes forward model selection using the multiple stage FDR controlling procedure (MSFDR) if( !( class(minimal.lm) == "lm" & class(maximal.lm) == "lm") ) { print("one of the models you entered aren't linear models (lm), please try fitting lm only") break } if(print.running.time) time <- proc.time() require(MASS) algorithm.direction="forward" # always forward the.scope = list(lower = minimal.lm, upper = maximal.lm) trace.stepAIC <- ifelse(print.the.steps , 1,0) iteration.number <- 1 m = extractAIC(maximal.lm)[1] - 1 # check if the full model should include the intercept or not !!!!!! i = max(extractAIC(minimal.lm)[1]-1, 1) # so if the model is with intercept only, the i size won't be 0. # q = .05 # default Lambda = qnorm ( (1- 0.5 * q* i/(m+1-i*(1-q)) ) ) ^ 2 if(print.the.steps ) {print(paste("Starting Lambda is: ", Lambda)) } # first step of the algorithm new.lm <- stepAIC(minimal.lm, direction=algorithm.direction, scope=the.scope , k = Lambda, trace = trace.stepAIC ) new.lm.model.size <- extractAIC(new.lm)[1] - 1 while(new.lm.model.size > i) { iteration.number <- iteration.number + 1 if(print.the.steps ) { print("=========================================") print("=========================================") print(paste("iteration number: ", iteration.number )) print(paste("current model size is:", new.lm.model.size , ">", i, " (which is bigger then the old model size)")) } i = new.lm.model.size Lambda = qnorm ( (1- 0.5 * q* i/(m+1-i*(1-q)) ) ) ^ 2 if(print.the.steps ) {print(paste("new Lambda is: ", Lambda)) } new.lm <- stepAIC(new.lm, direction=algorithm.direction, scope=the.scope , k = Lambda, , trace = trace.stepAIC ) new.lm.model.size <- extractAIC(new.lm)[1] - 1 } if(print.the.steps ) { print("=========================================") print("=========================================") print("=========================================") print("The final model is: ") print(new.lm$call) } if(print.running.time) { print("") print( "Algorithm running time was:") print( proc.time() - time ) } return(new.lm) } # ********** # An example # ********** # Illustrating the variable selection methods on some data: # Loading the MASS package (comes with "base") library(MASS) # getting the data data(state) #help(state) # information on the data statedata <- data.frame(state.x77,row.names=state.abb,check.names=T) # giving some summary statistics and plots about the data we use: head(statedata) summary(statedata) pairs(statedata[,c(1:3,5:8,4)],lower.panel=panel.smooth)# , col = c(1:3)) # defining the smallest and largest lm we wish to progress through smallest.linear.model <- lm(Life.Exp ~ 1, data=statedata) # summary(g.low ) largest.linear.model <- lm(Life.Exp ~ (.)^2, data=statedata) # using stepAIC for fitting a model with AIC example.1 <- stepAIC(smallest.linear.model , direction="forward", scope=list(lower = smallest.linear.model , upper = largest.linear.model ) ) # Implementing the MSFDR functions (with q = 0.05 and 0.1) example.2 <- MSFDR( minimal.lm = smallest.linear.model, maximal.lm = largest.linear.model, q = 0.05, F) example.3 <- MSFDR( minimal.lm = smallest.linear.model, maximal.lm = largest.linear.model, q = 0.1, T) summary(example.1) summary(example.2) summary(example.3)