#summarizeModels version 0.1 BETA #(c) 2005 by Maarten Buis #Generic function to show results from multiple (lm, or glm) models in one table #Input is a list of multiple model objects (models), all of which must be of the same class. #The class of the first model determines which function is used. summarizeModels <- function(models, ...) { UseMethod("summarizeModels", object=models[[1]]) } #summarizeModels.lm #function to show results from multiple lm-models in one table #input is a list of linear model objects (models) and optionally a vector of model names (modelNames) #options for long are: # "TRUE" for long output (stats under parameters) # "FALSE" for wide output (stats beside parameters) #options for stat are: # "t" for t-values, # "se" for standard errors, # "p" for p-values. #options for fit are: # "anova" for sum of squares + df's # "fstat" for F-statistic + df's + p-value # "r2" for R-Squared # "ar2" for adjusted R-Squared # "N" for N # "sigma" for standard error of the estimate # "ll" for the log likelihood # "bic" for the Bayes Information Criterion; an approximation of the bayes factor versus the constant only model # "aic" for the Aikake's Information Criterion summarizeModels.lm <- function(models, modelNames, long=FALSE, stat="t", fit=c("r2", "N"), digits=max(3, getOption("digits") - 2), round){ if(!all(sapply(models, function(models) class(models)=="lm"))) stop("models of different class") fitNames <- NULL pos <- NULL if(any(fit=="anova")) fitNames<-c(fitNames, "MSS", "df MSS", "RSS", "df RSS", "TSS", "df TSS") if(any(fit=="fstat")) fitNames<-c(fitNames, "F", "df1", "df2", "p-value") if(any(fit=="r2")) fitNames<-c(fitNames, "R-Squared") if(any(fit=="ar2")) fitNames<-c(fitNames, "adj. R-Squared") if(any(fit=="N")) fitNames<-c(fitNames, "N") if(any(fit=="sigma")) fitNames<-c(fitNames, "sigma") if(any(fit=="ll")) fitNames<-c(fitNames, "log likelihood") if(any(fit=="aic")) fitNames<-c(fitNames, "BIC'") if(any(fit=="bic")) fitNames<-c(fitNames, "AIC") if(any(stat == "se")) pos <- c(pos,2) if(any(stat == "t")) pos <- c(pos,3) if(any(stat == "p")) pos <- c(pos,4) stat <- c("se","t","p")[pos-1] #ensures labels are in the right order nModels <- length(models) nStat <- length(stat) if (missing(modelNames)) modelNames <- LETTERS[1:nModels] coefNames <- unique(unlist(lapply(models, function(model) names(coef(model))))) if(long==FALSE){ #wide table seqStat <- 1:((nStat+1)*nModels) seqStat <- seqStat[-(seq(1, (nStat+1)*nModels - nStat, by=nStat+1))] result <- matrix(NA, length(coefNames) + length(fitNames), (1+nStat)*nModels) rownames(result) <- c(coefNames, fitNames) colnames(result)[seqStat] <- stat colnames(result)[seq(1, (nStat+1)*nModels - nStat, by=nStat+1)] <- modelNames for (i in 1:nModels){ sumry <- summary(models[[i]]) coefs <- coefficients(sumry)[,c(1,pos)] result[rownames(coefs), (nStat+1)*(i - 1) + 1:(nStat+1)] <- coefs RSS <- sumry$sigma^2*sumry$df[2] TSS <- RSS/(1-sumry$r.squared) MSS <- TSS-RSS n <- sumry$df[1]+sumry$df[2] sigmall <- sqrt(sumry$sigma^2*sumry$df[2]/n) ll <- -n/2*log(2*pi)-n/2*log(sigmall^2)-.5*sum(models[[1]]$residuals^2/sigmall^2) BIC <- n*log(1-sumry$r.squared)+sumry$df[1]*log(n) AIC <- (-2*ll+2*sumry$df[1])/n FIT <- c(MSS, sumry$df[1]-1, RSS, sumry$df[2], TSS, sumry$df[1]+sumry$df[2]-1, sumry$fstatistic, pf(sumry$fstatistic[1],sumry$fstatistic[2],sumry$fstatistic[3], lower.tail=F), sumry$r.squared, sumry$adj.r.squared, n,sumry$sigma, ll, BIC, AIC) names(FIT) <- c("MSS", "df MSS", "RSS", "df RSS", "TSS", "df TSS", "F", "df1", "df2", "p-value", "R-Squared", "adj. R-Squared", "N", "sigma", "log likelihood", "BIC'", "AIC") result[fitNames, (nStat+1)*(i - 1) + 1] <- FIT[fitNames] } } else{ #long table result <- matrix(NA, length(coefNames)*(nStat+1) + length(fitNames), nModels) seqStat <- 1:((nStat+1)*length(coefNames)) seqStat <- seqStat[-(seq(1, (nStat+1)*length(coefNames) - nStat, by=nStat+1))] rowname <- NULL rowname[seq(1, (nStat+1)*length(coefNames)-1, by=(nStat+1))] <- coefNames rowname[seqStat] <- as.vector(t(sapply(c(stat),function(stat) paste(stat,coefNames)))) rowname[seq((nStat+1)*length(coefNames)+1, (nStat+1)*length(coefNames)+length(fitNames))] <- fitNames rownames(result) <- rowname colnames(result) <- modelNames for (i in 1:nModels){ sumry <- summary(models[[i]]) coefs <- as.vector(t(coefficients(sumry)[,c(1,pos)])) seqStat <- 1:((nStat+1)*sumry$df[1]) seqStat <- seqStat[-(seq(1, (nStat+1)*sumry$df[1] - nStat, by=nStat+1))] name<-NULL name[seq(1, (nStat+1)*sumry$df[1]-nStat, by=(nStat+1))] <- names(coefficients(sumry)[,1]) name[seqStat] <- as.vector(t(sapply(c(stat),function(stat) paste(stat,names(coefficients(sumry)[,1]))))) names(coefs)<-name result[names(coefs), i] <- coefs RSS <- sumry$sigma^2*sumry$df[2] TSS <- RSS/(1-sumry$r.squared) MSS <- TSS-RSS n <- sumry$df[1]+sumry$df[2] sigmall <- sqrt(sumry$sigma^2*sumry$df[2]/n) ll <- -n/2*log(2*pi)-n/2*log(sigmall^2)-.5*sum(models[[1]]$residuals^2/sigmall^2) BIC <- n*log(1-sumry$r.squared)+sumry$df[1]*log(n) AIC <- (-2*ll+2*sumry$df[1])/n FIT <- c(MSS, sumry$df[1]-1, RSS, sumry$df[2], TSS, sumry$df[1]+sumry$df[2]-1, sumry$fstatistic, pf(sumry$fstatistic[1],sumry$fstatistic[2],sumry$fstatistic[3], lower.tail=F), sumry$r.squared, sumry$adj.r.squared, n,sumry$sigma, ll, BIC, AIC) names(FIT) <- c("MSS", "df MSS", "RSS", "df RSS", "TSS", "df TSS", "F", "df1", "df2", "p-value", "R-Squared", "adj. R-Squared", "N", "sigma", "log likelihood", "BIC'", "AIC") result[fitNames, i] <- FIT[fitNames] } } if (!missing(round)) result <- round(result, round) printCoefmat(result, digits=digits, na.print="") } #summarizeModels.glm #function to show results from multiple glm-models in one table #input is a list of linear model objects (models) and optionally a vector of model names (modelNames) #options for long are: # "TRUE" for long output (stats under parameters) # "FALSE" for wide output (stats beside parameters) #options for stat are: # "z" for z-values, # "se" for standard errors, # "p" for p-values. #options for fit are: # "loglik" log likelihood # "AIC" AIC # "BIC" BIC # "0dev" deviance from null model + df + p-value # "mdev" deviance from saturated model + df + p-value # "N" N summarizeModels.glm <- function(models, modelNames, long=FALSE, stat="z", fit=c("loglik", "N"), digits=max(3, getOption("digits") - 2), round){ if(!all(sapply(models, function(models) class(models)[1]=="glm"))) stop("models of different class") fitNames <- NULL pos <- NULL if(any(fit=="loglik")) fitNames<-c(fitNames, "log likelihood") if(any(fit=="AIC")) fitNames<-c(fitNames, "AIC") if(any(fit=="BIC")) fitNames<-c(fitNames, "BIC") if(any(fit=="0dev")) fitNames<-c(fitNames, "null deviance", "df null", "p null") if(any(fit=="mdev")) fitNames<-c(fitNames, "residual deviance", "df residual", "p residual") if(any(fit=="N")) fitNames<-c(fitNames, "N") if(any(stat == "se")) pos <- c(pos,2) if(any(stat == "z")) pos <- c(pos,3) if(any(stat == "p")) pos <- c(pos,4) stat <- c("se","z","p")[pos-1] #ensures labels are in the right order nModels <- length(models) nStat <- length(stat) if (missing(modelNames)) modelNames <- LETTERS[1:nModels] coefNames <- unique(unlist(lapply(models, function(model) names(coef(model))))) if(long==FALSE){ #wide table seqStat <- 1:((nStat+1)*nModels) seqStat <- seqStat[-(seq(1, (nStat+1)*nModels - nStat, by=nStat+1))] result <- matrix(NA, length(coefNames) + length(fitNames), (1+nStat)*nModels) rownames(result) <- c(coefNames, fitNames) colnames(result)[seqStat] <- stat colnames(result)[seq(1, (nStat+1)*nModels - nStat, by=nStat+1)] <- modelNames for (i in 1:nModels){ sumry <- summary(models[[i]]) coefs <- coefficients(sumry)[,c(1,pos)] result[rownames(coefs), (nStat+1)*(i - 1) + 1:(nStat+1)] <- coefs FIT <- c((sumry$aic-2*sumry$df[1])/-2, sumry$aic, sumry$aic-2*sumry$df[1]+log(sumry$df[1]+sumry$df[2]*sumry$df[1]), sumry$null.deviance, sumry$df.nul, pchisq(sumry$null.deviance, sumry$df.null, lower.tail=FALSE), sumry$deviance, sumry$df.residual, pchisq(sumry$deviance, sumry$df.residual, lower.tail=FALSE), sumry$df[1]+sumry$df[2]) names(FIT) <- c("log likelihood", "AIC", "BIC", "null deviance", "df null", "p null", "residual deviance", "df residual", "p residual", "N") result[fitNames, (nStat+1)*(i - 1) + 1] <- FIT[fitNames] } } else{ #long table result <- matrix(NA, length(coefNames)*(nStat+1) + length(fitNames), nModels) seqStat <- 1:((nStat+1)*length(coefNames)) seqStat <- seqStat[-(seq(1, (nStat+1)*length(coefNames) - nStat, by=nStat+1))] rowname <- NULL rowname[seq(1, (nStat+1)*length(coefNames)-1, by=(nStat+1))] <- coefNames rowname[seqStat] <- as.vector(t(sapply(c(stat),function(stat) paste(stat,coefNames)))) rowname[seq((nStat+1)*length(coefNames)+1, (nStat+1)*length(coefNames)+length(fitNames))] <- fitNames rownames(result) <- rowname colnames(result) <- modelNames for (i in 1:nModels){ sumry <- summary(models[[i]]) coefs <- as.vector(t(coefficients(sumry)[,c(1,pos)])) seqStat <- 1:((nStat+1)*sumry$df[1]) seqStat <- seqStat[-(seq(1, (nStat+1)*sumry$df[1] - nStat, by=nStat+1))] name<-NULL name[seq(1, (nStat+1)*sumry$df[1]-nStat, by=(nStat+1))] <- names(coefficients(sumry)[,1]) name[seqStat] <- as.vector(t(sapply(c(stat),function(stat) paste(stat,names(coefficients(sumry)[,1]))))) names(coefs)<-name result[names(coefs), i] <- coefs FIT <- c((sumry$aic-2*sumry$df[1])/-2, sumry$aic, sumry$aic-2*sumry$df[1]+log(sumry$df[1]+sumry$df[2]*sumry$df[1]), sumry$null.deviance, sumry$df.nul, pchisq(sumry$null.deviance, sumry$df.null, lower.tail=FALSE), sumry$deviance, sumry$df.residual, pchisq(sumry$deviance, sumry$df.residual, lower.tail=FALSE), sumry$df[1]+sumry$df[2]) names(FIT) <- c("log likelihood", "AIC", "BIC", "null deviance", "df null", "p null", "residual deviance", "df residual", "p residual", "N") result[fitNames, i] <- FIT[fitNames] } } if (!missing(round)) result <- round(result, round) printCoefmat(result, digits=digits, na.print="") }