Dependencies

Load requisite packages and define directories. Note that this script uses my personal utilities package brainstorm, which can be downloaded via devtools::install_github("ayushnoori/brainstorm").

# data manipulation
library(data.table)
library(purrr)
library(magrittr)

# spectral clustering
library(stringr)
library(SNFtool)

# heatmap
library(pheatmap)
library(ggplot2)
library(RColorBrewer)

# utility functions
library(brainstorm)

Note that directories are relative to the R project path.

# set directories
dir3 = file.path("Results", "3 - ROI Measurements")
dir4 = file.path("Results", "4 - Spectral Clustering")

Load Data

Load ROI measurement data from the 3 - ROI Measurements directory and split by Group.

all = fread(file.path(dir3, "ROI Measurements.csv")) %>% split(.$Group)

Define Clustering Functions

Define functions to compute spectral clustering using the SNFtool package. Here, affinityCustom is a modified version of affinityMatrix from SNFtool without sparsifying the affinity matrix by a K-nearest neighbors approach (i.e., removing the assumption that local pairwise similarities with high values are more reliable than remote ones).

Define spectral clustering function.

spectral_clustering = function(dat, lab, mx, k = 3) {
  
  # print log
  cat(paste("\n", toupper(lab), "ANALYSIS\n"))
  
  # calculate distance matrix
  distM = as.matrix(dat[, ..mx]) %>% dist2(., .) %>% .^(1/2)
  
  # calculate similarity matrix
  simM = affinityCustom(distM)
  
  # perform spectral clustering
  cat(paste0("- Performing Spectral Clustering, ", word(Sys.time(), 2), "\n"))
  clust = spectralClustering(simM, K = k)
  
  # add spectral clustering labels to data
  dat[, State := clust]
  
  return(dat)
  
}

Define function to bin distance, where distlab is a character vector specifying the name of the distance column of interest.

bin_distance = function(dat, distlab,
                        distbins = c(0, 25, 50),
                        distlevels = c("< 25 um", "25-50 um", "> 50 um"),
                        distna = "None") {
  
  # bin distance labels
  dat %>% 
    .[, TemporaryBin := .SD, .SDcols = distlab] %>%
    .[, TemporaryBin := cut(TemporaryBin,
                        breaks = c(distbins, max(TemporaryBin, na.rm = T)),
                        include.lowest = T)] %>%
    .[, TemporaryBin := addNA(TemporaryBin)] %>%
    .[, TemporaryBin := plyr::mapvalues(TemporaryBin, levels(TemporaryBin),
                                    c(distlevels, "None"))]
  
  # group None with > 50 um
  dat[TemporaryBin == "None", TemporaryBin := distna]
  
  # rename column
  setnames(dat, "TemporaryBin", paste0(distlab, "Bin"))
  
  return(invisible(dat))
  
}

Define function to (a) prepare data for heatmap by refactoring label, reordering, and binning distance, then (b) plot the heatmap using the pheatmap package.

scale_data = function(b) { return(100*(b - min(b))/(max(b) - min(b))) }

plot_heatmap = function(dat, lab, mx, hmcols, hmsel) {

  # subset marker/metadata columns
  dat = dat[, .SD, .SDcols = c(mx, hmsel)]
  
  # calculate proportion of control ROIs by state
  prop = dat[, sum(Condition == "Control")/.N, by = .(State)] %>%
    .[order(-V1), State]
  
  # convert to factor then reorder ROIs
  dat = dat %>%
    .[, State := factor(State, levels = prop, labels = c("Homeostatic", "Intermediate", "Reactive"))] %>%
    # .[, State := factor(State)] %>%
    .[, Sample := factor(Sample)] %>%
    .[, Condition := factor(Condition, levels = c("Control", "Alzheimer"))] %>%
    .[order(State, Condition, Layer, runif(nrow(.))), ]
  
  # calculate distances
  dat = dat %>% bin_distance("Distance")
  
  # write file
  setcolorder(dat, c("ID", "State"))
  fwrite(dat, file.path(dir4, paste(lab, "Spectral Clustering.csv")))
  
  # calculate column gaps
  gaps = cumsum(summary(dat[, State]))
  
  # prepare for heatmap
  hmdat = dat[, ..mx] %>% map_dfc(~scale_data(.x)) %>% t()
  
  # select row names and column names
  hmannos = dat[, .SD, .SDcols = c("Layer", "Condition", "State")]
  
  # group None with > 50
  group_none = function(x) hmannos[get(x) == "None", c(x) := "> 50 um"]
  walk(hmdists, group_none)
  
  # remove "Bin" label
  colnames(hmannos) = gsub("Bin", "", colnames(hmannos), fixed = T)
  
  # set rownames and colnames
  colnames(hmdat) = dat$ID; rownames(hmannos) = dat$ID
                    
  # plot heatmap
  hm = pheatmap(hmdat,
                cluster_cols = FALSE, cluster_rows = FALSE,
                annotation_colors = hmcols, annotation_col = hmannos,
                border_color = NA, # main = paste(lab, "Heatmap"),
                show_colnames = FALSE, gaps_col = gaps, silent = TRUE)
  
  ggsave(file.path(dir4, paste(lab, "Heatmap.pdf")), hm, width = 8, height = 6)
  ggsave(file.path(dir4, paste(lab, "Heatmap.png")), hm, width = 8, height = 6, dpi = 1200)
  
  return(dat)

}

Perform Spectral Clustering

Apply spectral_clustering function over ROI measurement data by Type.

# define markers of interest
markers = list(Astrocyte = c("GFAP", "YKL40", "VIM", "TSPO",
                             "EAAT1", "EAAT2", "GS"),
               Microglia = c("MHC2", "CD68", "TMEM119", "TSPO", "FTL"),
               Vessel = c("GFAP", "YKL40", "VIM", "TSPO", "EAAT1", "EAAT2", "GS"))

# comment out the below lines if you do NOT want to re-run spectral clustering

# perform spectral clustering with k = 3 clusters
all = imap(all, ~spectral_clustering(.x, .y, markers[[.y]], 4))

# save spectral clustering result
saveRDS(all, file.path(dir4, "Spectral Clustering.rds"))

# read spectral clustering result
# all = readRDS(file.path(dir4, "Spectral Clustering.rds"))

Plot Heatmap

Plot data as heatmaps.

# define metadata of interest for output .csv file
sel = colnames(all$Astrocyte) %>% .[!(. %in% c(unlist(markers), c("ALDH1L1", "Abeta", "DAPI", "HuC.D", "IBA1", "PHF1.tau")))]

# define distance colors
distcols = c('< 25 um' = "#FFAF85", '25-50 um' = "#FFED85", '> 50 um' = "#96BDD9")

# define heatmap color palette
cols = list(
  Distance = distcols, 
  Layer = c(II = "#DDF2B2", III = "#8DD2B9", IV = "#39AEC3", V = "#2072B1", VI = "#0C2C84"),
  Condition = c(Control = "#377EB8", Alzheimer = "#CE6D8B"),
  State = c('Homeostatic' = "#39B200", 'Intermediate' = "#F0C808", 'Reactive' = "#960200"))

# plot heatmap data
hm = imap(all, ~plot_heatmap(.x, .y, markers[[.y]], cols, sel))

# save processed data
saveRDS(hm, file.path(dir4, "Z-Score Data.rds"))

Spectral Clustering

Dependencies

Load Data

Define Clustering Functions

Perform Spectral Clustering

Plot Heatmap

Corrections