This R script evaluates GBM/CNN performance by bootstrapping across 500 iterations of the independent test set.
Load requisite packages and define directories.
Read test set classification probabilities from gradient boosting machines (GBM) and convolutional neural network (CNN) model output. To comply with the bootstrapping function, datasets must have a column called labels
(has true labels) and column called predictions
(has predicted probability).
# parse GBM data
gbm <- readRDS('data/AD vs. CTRL GBM Results.rds')
# parse astrocyte GBM
gbm_astrocyte <- gbm[['Astrocyte']]$Scores[,2:3]
setnames(gbm_astrocyte,c('Condition','Alzheimer'),c('labels','predictions'))
gbm_astrocyte[,labels := as.integer(labels) - 1]
# parse microglia GBM
gbm_microglia <- gbm[['Microglia']]$Scores[,2:3]
setnames(gbm_microglia,c('Condition','Alzheimer'),c('labels','predictions'))
gbm_microglia[,labels := as.integer(labels) - 1]
# read astrocyte CNN
cnn_astrocyte <- fread('data/Astrocyte CNN TestSetResults.csv')[,c('TrueLabel','ProbabilityAD')]
setnames(cnn_astrocyte,c('TrueLabel','ProbabilityAD'),c('labels','predictions'))
cnn_astrocyte[,labels := ifelse(labels == 1,0,1)]
# read microglia CNN
cnn_microglia <- fread('data/Microglia CNN TestSetResults.csv')[,c('TrueLabel','ProbabilityAD')]
setnames(cnn_microglia,c('TrueLabel','ProbabilityAD'),c('labels','predictions'))
cnn_microglia[,labels := ifelse(labels == 1,0,1)]
# create named list
all_experiments <- list(gbm_astrocyte = gbm_astrocyte, gbm_microglia = gbm_microglia,
cnn_astrocyte = cnn_astrocyte,cnn_microglia = cnn_microglia)
Function to compute model performance and calculate 95% confidence intervals by boostrapping across 500 iterations of the independent test set.
compute_model_performance <- function(model_name,model_preds,metrics = c('acc', 'ppv', 'npv', 'sens', 'spec', 'f','auc','aucpr'),threshold_criteria = 'max_accuracy',marginal_thresholds = NULL){
# prediction object ROCR expects; if it is NOT ROCR class prediction then make model_preds one
if(!'prediction' %in% class(model_preds)) {
ROCR_prediction <- prediction(model_preds$predictions,model_preds$labels)
} else {ROCR_prediction <- model_preds}
# first, check if there are any threshold dependent metrics
if(any(!metrics %in% c('auc','aucpr'))){
acc <- performance(ROCR_prediction,'acc')
acc <- data.table(cutoffs = acc@x.values[[1]],accuracy = acc@y.values[[1]])
if(threshold_criteria == 'max_accuracy'){
# pick cutoff that yields max accuracy
threshold <- acc[acc[,.I[which.max(accuracy)]],cutoffs]
}
# must input all_experiments_performance_metrics_cast if using this threshold_criteria
if(threshold_criteria == 'from_marginal'){
# merge back threshold from marginal model
colnames_strip <- names(stratifed_result_indices)
from_marginal_model <- gsub(paste(colnames_strip,collapse = "|"),"",model_name)
from_marginal_model <- gsub('_\\b',"",from_marginal_model)
threshold <- marginal_thresholds[model_name == from_marginal_model,Threshold]
# need to get threshold closest to this ON the data in strata
threshold <- acc[cutoffs <= threshold,][order(-cutoffs)][1,cutoffs] # get threshold within acc object immediately BEFORE this threshold or equal to if its
}
}
# build DT for results from threshold independent metrics
ROCR_results_no_threshold <- map_dfr(metrics[metrics %in% c('auc','aucpr')],~data.table(Measure = .x,
Value = performance(ROCR_prediction,.x)@y.values[[1]],
model_name = model_name,
threshold_criteria = threshold_criteria,
threshold = 'Measure Independent of Threshold'))
# threshold dependent methods
# index y value where x value equal to threshold selected from threshold criteria
ROCR_results_threshold <- map_dfr(metrics[!metrics %in% c('auc','aucpr')],~data.table(Measure = .x,
Value = performance(ROCR_prediction,.x)@y.values[[1]][which(performance(ROCR_prediction,.x)@x.values[[1]] == threshold)],
model_name = model_name,
threshold_criteria = threshold_criteria,
threshold = threshold
))
# add CIs, sample 300 times w replacement and take .025 slices each end
set.seed(5281995)
seeds <- sample.int(10000000,500)
# for 1:500, compute CI for each measure
# takes a replicate index and data to resample, resamples w replacement, computes all specified measures for each replicate
boot_CIs <- function(i,data_to_resample){
# new seed for each replicate
set.seed(seed = seeds[[i]]);
# extract just preds and labels from ROCR object
data <- data.table(predictions = data_to_resample@predictions[[1]],labels = data_to_resample@labels[[1]])
# resample w replacement
data <- data[sample(1:nrow(data),replace = T),]
# make a new prediction object
ROCR_prediction <- prediction(data$predictions,data$labels)
if(any(!metrics %in% c('auc','aucpr'))){
acc <- performance(ROCR_prediction,'acc')
acc <- data.table(cutoffs = acc@x.values[[1]],accuracy = acc@y.values[[1]])
if(threshold_criteria == 'max_accuracy'){
# pick cutoff that yields max accuracy
threshold <- acc[acc[,.I[which.max(accuracy)]],cutoffs]
}
}
ROCR_results_no_threshold <- map_dfr(metrics[metrics %in% c('auc','aucpr')],~data.table(Measure = .x,
Value = performance(ROCR_prediction,.x)@y.values[[1]],
model_name = model_name,
threshold_criteria = threshold_criteria,
threshold = 'Measure Independent of Threshold',
replicate = i))
# threshold dependent methods, index y vals where x val equal to threshold selected from threshold criteria
ROCR_results_threshold <- map_dfr(metrics[!metrics %in% c('auc','aucpr')],~data.table(Measure = .x,
Value = performance(ROCR_prediction,.x)@y.values[[1]][which(performance(ROCR_prediction,.x)@x.values[[1]] == threshold)],
model_name = model_name,
threshold_criteria = threshold_criteria,
threshold = threshold,
replicate = i))
rbindlist(list(ROCR_results_threshold,ROCR_results_no_threshold),fill = TRUE)
}
# get results of each replicate
bootstrap_runs <- map_dfr(1:500,~boot_CIs(.x,data_to_resample = ROCR_prediction))
# cast so each row is a replicate, each col is a measure
bootstrap_runs_cast <- dcast(bootstrap_runs,model_name+threshold_criteria+replicate~Measure,value.var = 'Value')
# get quantiles accross measure cols
bootstrap_aggregation <- imap_dfr(bootstrap_runs_cast[,4:ncol(bootstrap_runs_cast)],~data.table(Measure = .y,CI_l = quantile(.x,.025,na.rm = T),CI_u = quantile(.x,.975,na.rm = T)))
# bind metrics together
final_metrics <- rbindlist(list(ROCR_results_threshold,ROCR_results_no_threshold),fill = TRUE)
final_metrics[bootstrap_aggregation,on=.(Measure),`:=` (CI_l = i.CI_l,CI_u = i.CI_u)]
return(final_metrics)
}
Map previously defined function.
all_experiments_performance_metrics<- imap_dfr(all_experiments, ~compute_model_performance(model_name = .y, model_preds = .x))
If you see mistakes or want to suggest changes, please create an issue on the source repository.