This R script performs unsupervised spectral clustering to investigate the existence of diverse phenotypes of astrocytes and microglia in control and AD brains.
Load requisite packages and define directories. Note that this script uses my personal utilities package brainstorm
, which can be downloaded via devtools::install_github("ayushnoori/brainstorm")
.
Note that directories are relative to the R project path.
Load ROI measurement data from the 3 - ROI Measurements
directory and split by Group
.
Define functions to compute spectral clustering using the SNFtool
package. Here, affinityCustom
is a modified version of affinityMatrix
from SNFtool
without sparsifying the affinity matrix by a K-nearest neighbors approach (i.e., removing the assumption that local pairwise similarities with high values are more reliable than remote ones).
Define spectral clustering function.
spectral_clustering = function(dat, lab, mx, k = 3) {
# print log
cat(paste("\n", toupper(lab), "ANALYSIS\n"))
# calculate distance matrix
distM = as.matrix(dat[, ..mx]) %>% dist2(., .) %>% .^(1/2)
# calculate similarity matrix
simM = affinityCustom(distM)
# perform spectral clustering
cat(paste0("- Performing Spectral Clustering, ", word(Sys.time(), 2), "\n"))
clust = spectralClustering(simM, K = k)
# add spectral clustering labels to data
dat[, State := clust]
return(dat)
}
Define function to bin distance, where distlab
is a character vector specifying the name of the distance column of interest.
bin_distance = function(dat, distlab,
distbins = c(0, 25, 50),
distlevels = c("< 25 um", "25-50 um", "> 50 um"),
distna = "None") {
# bin distance labels
dat %>%
.[, TemporaryBin := .SD, .SDcols = distlab] %>%
.[, TemporaryBin := cut(TemporaryBin,
breaks = c(distbins, max(TemporaryBin, na.rm = T)),
include.lowest = T)] %>%
.[, TemporaryBin := addNA(TemporaryBin)] %>%
.[, TemporaryBin := plyr::mapvalues(TemporaryBin, levels(TemporaryBin),
c(distlevels, "None"))]
# group None with > 50 um
dat[TemporaryBin == "None", TemporaryBin := distna]
# rename column
setnames(dat, "TemporaryBin", paste0(distlab, "Bin"))
return(invisible(dat))
}
Define function to (a) prepare data for heatmap by refactoring label, reordering, and binning distance, then (b) plot the heatmap using the pheatmap
package.
scale_data = function(b) { return(100*(b - min(b))/(max(b) - min(b))) }
plot_heatmap = function(dat, lab, mx, hmcols, hmsel) {
# subset marker/metadata columns
dat = dat[, .SD, .SDcols = c(mx, hmsel)]
# calculate proportion of control ROIs by state
prop = dat[, sum(Condition == "Control")/.N, by = .(State)] %>%
.[order(-V1), State]
# convert to factor then reorder ROIs
dat = dat %>%
.[, State := factor(State, levels = prop, labels = c("Homeostatic", "Intermediate", "Reactive"))] %>%
# .[, State := factor(State)] %>%
.[, Sample := factor(Sample)] %>%
.[, Condition := factor(Condition, levels = c("Control", "Alzheimer"))] %>%
.[order(State, Condition, Layer, runif(nrow(.))), ]
# calculate distances
dat = dat %>% bin_distance("Distance")
# write file
setcolorder(dat, c("ID", "State"))
fwrite(dat, file.path(dir4, paste(lab, "Spectral Clustering.csv")))
# calculate column gaps
gaps = cumsum(summary(dat[, State]))
# prepare for heatmap
hmdat = dat[, ..mx] %>% map_dfc(~scale_data(.x)) %>% t()
# select row names and column names
hmannos = dat[, .SD, .SDcols = c("Layer", "Condition", "State")]
# group None with > 50
group_none = function(x) hmannos[get(x) == "None", c(x) := "> 50 um"]
walk(hmdists, group_none)
# remove "Bin" label
colnames(hmannos) = gsub("Bin", "", colnames(hmannos), fixed = T)
# set rownames and colnames
colnames(hmdat) = dat$ID; rownames(hmannos) = dat$ID
# plot heatmap
hm = pheatmap(hmdat,
cluster_cols = FALSE, cluster_rows = FALSE,
annotation_colors = hmcols, annotation_col = hmannos,
border_color = NA, # main = paste(lab, "Heatmap"),
show_colnames = FALSE, gaps_col = gaps, silent = TRUE)
ggsave(file.path(dir4, paste(lab, "Heatmap.pdf")), hm, width = 8, height = 6)
ggsave(file.path(dir4, paste(lab, "Heatmap.png")), hm, width = 8, height = 6, dpi = 1200)
return(dat)
}
Apply spectral_clustering
function over ROI measurement data by Type
.
# define markers of interest
markers = list(Astrocyte = c("GFAP", "YKL40", "VIM", "TSPO",
"EAAT1", "EAAT2", "GS"),
Microglia = c("MHC2", "CD68", "TMEM119", "TSPO", "FTL"),
Vessel = c("GFAP", "YKL40", "VIM", "TSPO", "EAAT1", "EAAT2", "GS"))
# comment out the below lines if you do NOT want to re-run spectral clustering
# perform spectral clustering with k = 3 clusters
all = imap(all, ~spectral_clustering(.x, .y, markers[[.y]], 4))
# save spectral clustering result
saveRDS(all, file.path(dir4, "Spectral Clustering.rds"))
# read spectral clustering result
# all = readRDS(file.path(dir4, "Spectral Clustering.rds"))
Plot data as heatmaps.
# define metadata of interest for output .csv file
sel = colnames(all$Astrocyte) %>% .[!(. %in% c(unlist(markers), c("ALDH1L1", "Abeta", "DAPI", "HuC.D", "IBA1", "PHF1.tau")))]
# define distance colors
distcols = c('< 25 um' = "#FFAF85", '25-50 um' = "#FFED85", '> 50 um' = "#96BDD9")
# define heatmap color palette
cols = list(
Distance = distcols,
Layer = c(II = "#DDF2B2", III = "#8DD2B9", IV = "#39AEC3", V = "#2072B1", VI = "#0C2C84"),
Condition = c(Control = "#377EB8", Alzheimer = "#CE6D8B"),
State = c('Homeostatic' = "#39B200", 'Intermediate' = "#F0C808", 'Reactive' = "#960200"))
# plot heatmap data
hm = imap(all, ~plot_heatmap(.x, .y, markers[[.y]], cols, sel))
# save processed data
saveRDS(hm, file.path(dir4, "Z-Score Data.rds"))
If you see mistakes or want to suggest changes, please create an issue on the source repository.