This module focuses on extracting spatial summary metrics from multiplex imaging data.
Load data and libraries.
library(tidyverse)
library(patchwork)
# load processed ovarian cancer data
load(url("https://github.com/julia-wrobel/MI_tutorial/raw/main/Data/ovarian.RDA"))
Below we visualize a point pattern for a single subject from the ovarian cancer dataset.
# subset to a single subject
subj_df = filter(ovarian_df, sample_id == 7)
# 7, 110, 34
# plot macrophage and B-cells
all = subj_df %>%
ggplot(aes(x, y)) +
geom_point(aes(color = tissue_category),
size = 0.1, alpha = 0.7) +
scale_color_manual(values = c("pink", "lightgreen")) +
geom_point(data = filter(subj_df, phenotype_cd68 == "CD68+"),
color = "darkgreen", size = .5, shape = 4) +
geom_point(data = filter(subj_df, phenotype_cd19 == "CD19+"),
color = "purple", size = .5, shape = 6) +
theme(legend.position ="bottom")
cell_subset = subj_df %>%
ggplot(aes(x, y)) +
geom_point(data = filter(subj_df, phenotype_cd68 == "CD68+"),
color = "darkgreen", size = .5, shape = 4) +
geom_point(data = filter(subj_df, phenotype_cd19 == "CD19+"),
color = "purple", size = .5, shape = 6) +
theme(legend.position ="bottom")
all + cell_subset
The spatstat
package in R
has great
resources for analyzing spatial point patterns. Let’s use the image
above to do some exploratory analysis with spatstat.
We will do spatial analysis on the distribution of B-cells and
macrophages, so we subset to only these cell types, and create a
phenotype
variable that designates whether a cell is a B
cell or a macrophage. Then we create a ppp
object, which is
what the spatstat
package uses for storage and analysis of
point pattern data.
library(spatstat)
subj_df = subj_df %>%
# create a phenotype variable that is B-cell, macrophage, or other
mutate(phenotype = case_when(
phenotype_cd68 == "CD68+" ~ "macrophage",
phenotype_cd19 == "CD19+" ~ "B-cell",
TRUE ~ "other"
)) %>%
filter(phenotype != "other") %>%
select(x,y, immune, tissue_category, phenotype)
# first define window of observation for your image
w = convexhull.xy(subj_df$x,subj_df$y)
# create ppp object as multitype point pattern
# need to factor the marks variable for ppp object to be treated as a multitype point pattern
ovarian_pp = ppp(subj_df$x,subj_df$y, window = w,
marks = factor(subj_df$phenotype))
First we calculate Ripley’s K for B cells.
k_bcell = Kest(subset(ovarian_pp, marks == "B-cell"), correction = "isotropic")
We can plot this k_bcell
object using
spatstat
to visualize the estimated K (\(\hat{K}^{iso}\)) compared to the K under
CSR (\(K^{pois}\)).
plot(k_bcell)
Now visualize the K-function for macrophages in this image:
k_mac = Kest(subset(ovarian_pp, marks == "macrophage"),
correction = "isotropic")
plot(k_mac)
It appears that there is some degree of clustering for both B-cells and macrophages in this image. Are these statistically significant?
We can test this using the envelope
function, which
performs Monte Carlo simulations of the K function under CSR. Plot below
shows pointwise envelope for B-cell K-function. Pointwise means that
these are performed separately for each value of r. Can only be
interpreted if a specific value of r is chosen in advance.
e_test = envelope(subset(ovarian_pp, marks == "B-cell"),
Kest, correction = "isotropic")
## Generating 99 simulations of CSR ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
## 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
## 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99.
##
## Done.
plot(e_test)
Points outside the envelope indicate significant departure from CSR.
Note that this could be due to inhomogeneity in the distribution of
cells, or due to clustering. See spatstat::Kinhom
function
for a test that takes into account inhomogeneity. See
spatstat::Lest
function to compute the L-function.
The bivariate K function measures whether the spatial distribution of 2 cell types is independent. Deviation from the \(K^{pois}\) line suggests a spatial relationship between the B-cells and macrophages in the image.
k_biv = Kcross(ovarian_pp, "B-cell", "macrophage", correction = "isotropic")
plot(k_biv)
Can interpret as probability of a neighboring cell occurring within radius r.
G_bcell = Gest(subset(ovarian_pp, marks == "B-cell"), correction = "rs")
plot(G_bcell)
Its bivariate counterpart
G_biv = Gcross(ovarian_pp, correction = "rs")
plot(G_biv)
Organize data for use in the spatialTIME
package.
library(spatialTIME)
# subsetting to 2 subjects for faster computation and ease of visualization
ids = c( 7, 128)
spatial_ls = filter(ovarian_df, sample_id %in% ids) %>%
mutate(XMax = x, XMin = x,
YMax = y, YMin = y,
sample_id = factor(sample_id),
id = sample_id,
cd19 = ifelse(phenotype_cd19 == "CD19+", 1, 0),
cd3 = ifelse(phenotype_cd3 == "CD3+", 1, 0),
cd8 = ifelse(phenotype_cd8 == "CD8+", 1, 0),
cd68 = ifelse(phenotype_cd68 == "CD68+", 1, 0)) %>%
dplyr::select(sample_id, id, XMax, XMin, YMax, YMin,
cd19, cd3, cd8, cd68, tissue_category) %>%
nest_by(id)
# define clinical data
clinical_data = ovarian_df %>%
dplyr::select(sample_id, survival_time, death, stage_bin, BRCA_mutation) %>%
distinct() %>%
mutate(id = sample_id)
sample_data = ovarian_df %>%
group_by(sample_id) %>%
summarize(total_cells = n()) %>%
ungroup() %>%
mutate(id = sample_id)
df_spatialTIME = spatialTIME::create_mif(
clinical_data = clinical_data,
sample_data = sample_data,
spatial_list = spatial_ls$data,
patient_id = "id",
sample_id = "sample_id"
)
rm(spatial_ls)
Plotting samples
cell_types = c("cd68", "cd19")
values = RColorBrewer::brewer.pal(length(cell_types), "Accent")
#add an element in the `derived` object position
df_spatialTIME <- plot_immunoflo(df_spatialTIME,
plot_title = "sample_id", mnames = cell_types,
cell_type = "tissue_category")
Calculate spatial summary metrics (calculating univariate K function across samples and phenotypes).
df_spatialTIME <- ripleys_k(mif = df_spatialTIME,
mnames = cell_types,
method = "K",
r_range = seq(0, 300, 10),
edge_correction = 'translation'
#permute = TRUE,
#num_permutations = 100
)
Plot images(top row) and K functions (bottom row).
k_funs = df_spatialTIME$derived$univariate_Count %>%
janitor::clean_names() %>%
ggplot(aes(x = r, y = observed_k)) +
geom_line(aes(group = marker,
color = marker)) +
geom_line(aes(r, theoretical_csr), linetype = 2) +
facet_wrap(~sample_id, scales = 'free', nrow = 1) + theme_bw() +
theme(legend.position = "bottom") +
scale_color_manual(values = values)
imgs = patchwork::wrap_plots(df_spatialTIME$derived$spatial_plots, nrow = 1)
imgs / k_funs
Also allows for calculation of bivariate K, univariate and bivariate L, G, with and without permutations.