library(conos)
library(qs)
library(dplyr)
library(magrittr)
library(pagoda2)
library(scHelper) # github.com/rrydbirk/scHelper
library(cacoa)
## Warning: replacing previous import 'ape::where' by 'dplyr::where' when loading
## 'cacoa'
library(ggplot2)
library(rstatix)
library(ggpubr)
library(ComplexHeatmap)
library(destiny)
library(Seurat)
library(harmony)
library(slingshot)
library(circlize)
library(cowplot)
library(sccore)
library(corrplot)
library(RColorBrewer)
library(glmnet)
meta <- qread("meta.qs")
# Palettes
pal.major <- RColorBrewer::brewer.pal(9, "Set1") %>%
setNames(c("Adipocytes", "Myeloid immune cells", "ASPCs", "ECs", "SMCs", "Lymphoid immune cells", "Mast cells", "Lymphatic ECs", "Pericytes")) %>%
{setNames(c(., ., sapply(unname(.), \(color) {
hsv_vals <- rgb2hsv(col2rgb(color))
new_hues <- hsv(pmin(1, hsv_vals[1] * 0.9), hsv_vals[2], hsv_vals[3])
})), c(names(.), paste0(names(.), ", female"), paste0(names(.), ", male"))
)} %>%
{setNames(c(., .[c("Lymphoid immune cells", "Myeloid immune cells")]), c(names(.), c("Lymphoid ICs", "Myeloid ICs")))}
tt <- Sys.time()
con.major <- qread("con_major.qs", nthreads = 10)
anno.major <- qread("anno_major.qs")
These figures were made in GraphPad Prism.
con.major$plotGraph(groups = anno.major,
plot.na = FALSE,
size = 0.1,
alpha = 0.1,
embedding = "UMAP_refined7",
show.labels = TRUE,
font.size = 5) +
labs(x = "UMAP1", y = "UMAP2") +
theme(line = element_blank()) +
scale_color_manual(values = RColorBrewer::brewer.pal(9, "Set1"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
cm.merged <- con.major$getJointCountMatrix() %>%
.[rownames(.) %in% names(anno.major), ]
c("ADIPOQ", "PLIN1", # Adipocytes
"PDGFRA", "DCN", # ASPCs
"MECOM", "ADGRL4", # ECs
"PKHD1L1", "PROX1", # LECs
"MYOCD", "MYH11", # SMCs
"STEAP4", "GIPR", # Pericytes
"IL7R", "CD3E", # Lymphoid
"KIT", "CPA3", # Mast
"MRC1", "F13A1") %>% # Myeloid
sccore::dotPlot(.,
cm.merged,
factor(anno.major[rownames(cm.merged)],
levels = c("Adipocytes",
"ASPCs",
"ECs",
"Lymphatic ECs",
"SMCs",
"Pericytes",
"Lymphoid immune cells",
"Mast cells",
"Myeloid immune cells")),
gene.order = .,
cols = c("white","grey20"))
# Use Cacoa to calculate proportions
cao <- Cacoa$new(data.object = con.major,
cell.groups = anno.major,
ref.level = "vis2",
target.level = "vis3",
sample.groups = con.major$samples %>%
names() %>%
strsplit("_") %>%
sapply('[[', 3) %>%
gsub(1, 2, .) %>%
`names<-`(con.major$samples %>%
names()),
sample.groups.palette = ggsci::pal_jama()(7))
cao$sample.groups <- con.major$samples %>%
names() %>%
strsplit("_") %>%
sapply('[[', 3) %>%
`names<-`(con.major$samples %>%
names()) %>%
gsub("vis", "Visit ", .)
p <- cao$plotCellGroupSizes(show.significance = TRUE,
filter.empty.cell.types = FALSE)
plot.dat <- p$data %>%
mutate(sex = rep(meta$sex, 9)) %>%
mutate(sample = stringi::stri_dup(LETTERS[seq(14)], 3) %>%
paste(collapse = "") %>%
strsplit("") %>%
unlist() %>%
rep(9)) %>%
mutate(sex_visit = paste(sex, group, sep = " ") %>%
gsub("Vis", "vis", .),
facet = ifelse(variable %in% c("SMCs", "Mast cells", "Lymphatic ECs", "Pericytes", "Lymphoid immune cells"), "low", "high"),
variable = factor(variable, levels = c("Adipocytes", "ASPCs", "ECs", "Myeloid immune cells", "Lymphatic ECs", "SMCs", "Pericytes", "Lymphoid immune cells", "Mast cells")) %>%
renameAnnotation("Myeloid immune cells", " Myeloid immune cells")) %>%
mutate(sex_visit_anno = paste0(sex_visit, "_", variable))
# Kruskal-Wallis
sex.stat <- plot.dat %$%
split(., sex) %>%
lapply(\(x) split(x, x$variable)) %>%
lapply(lapply, \(x) kruskal.test(x$value, g = x$group)$p.value) %>%
lapply(sapply, gtools::stars.pval) %>%
{lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>%
`names<-`(c("Female","Male")) %>%
lapply(\(x) data.frame(ct = names(x), sig = unname(x))) %>%
{lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>%
bind_rows() %>%
mutate(ct = factor(ct, levels = unique(plot.dat$variable))) %>%
arrange(ct) %>%
mutate(sig = gsub(".", "", sig, fixed = T),
facet = ifelse(ct %in% c("SMCs", "Mast cells", "Lymphatic ECs", "Pericytes", "Lymphoid immune cells"), "low", "high"))
# Wilcoxon
stat.test <- plot.dat %>%
dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
group_by(var, sex) %>%
wilcox_test(value~sex_visit, paired = T) %>%
mutate(., p.adj.signif = apply(., 1, \(x) if(is.na(x[10])) "" else if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>%
filter(p.adj.signif != "ns", !is.na(p), !var %in% c("Adipocytes", "Lymphoid immune cells")) %>% # Remove those not significant for KW
add_xy_position(x = "var", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05) %>% # Adjust line position
arrange(desc(y.position)) %>%
mutate(facet = ifelse(var %in% c("SMCs","Mast cells","Lymphatic ECs","Pericytes","Lymphoid immune cells"), "low", "high"))
# Plot
p.high <- plot.dat %>%
filter(facet == "high") %>%
ggplot(aes(variable, value)) +
geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
color = "black", size = 0.5, alpha = 0.2) +
theme_bw() +
labs(x = "", y = "% cells per sample") +
theme(line = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0, hjust = 1)) +
scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
geom_text(inherit.aes = F, data = sex.stat %>% filter(facet == "high"), aes(ct, y = 65, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
stat_pvalue_manual(stat.test %>% filter(facet == "high"), label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) + # Wilcoxon
guides(fill = "none")
x.cor <- 4
p.low <- plot.dat %>%
filter(facet == "low") %>%
ggplot(aes(variable, value)) +
geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
color = "black", size = 0.5, alpha = 0.2) +
theme_bw() +
labs(x = "", y = "", fill = "") +
theme(line = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0, hjust = 1)) +
scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
geom_text(inherit.aes = FALSE, data = sex.stat %>% filter(facet == "low"), aes(ct, y = 15, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
stat_pvalue_manual(stat.test %>% filter(facet == "low") %>% mutate(x = x-x.cor, xmin = xmin-x.cor, xmax = xmax-x.cor), label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 2, hide.ns = FALSE) # Wilcoxon
pp <- plot_grid(plotlist = list(p.high, p.low), rel_widths = c(1,1.5))
pp
size_data <- read.csv("Adipocyte_size.csv", sep = ",", header = TRUE)
sample_colours <- c("#F8B3BB", "#BD6563", "#7D2A25", "#ABD1D4", "#62649E", "#3E4282")
patient_averages <- size_data %>%
group_by(ID, Visit, Sex) %>%
summarise(Average_Diameter = mean(Diameter, na.rm = TRUE)) %>%
ungroup() %>%
mutate(sex_visit = interaction(Visit, Sex) %>%
factor(labels = c("Female Visit 1", "Female Visit 2", "Female Visit 3", "Male Visit 1", "Male Visit 2", "Male Visit 3")))
## `summarise()` has grouped output by 'ID', 'Visit'. You can override using the
## `.groups` argument.
plot <- ggplot(patient_averages, aes(x = sex_visit,
y = Average_Diameter,
fill = sex_visit)) +
geom_boxplot(position = position_dodge(width = 0.75), outlier.shape = NA) +
geom_jitter(size = 1, width = 0.2, alpha = 0.3, show.legend = F) + # Add individual points
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
text = element_text(family = "serif", size = 12),
line = element_blank(),
axis.text.x = element_blank()
) +
scale_fill_manual(values = sample_colours) +
theme(rect = element_rect(fill = "transparent")) +
labs(fill = "", x = "", y = "Diameter (µm)")
plot
# Load the CSV file
CLS <- read.delim("Human_WL_CLS_density_for_plotting.csv", sep = ",")
names(CLS)[names(CLS) == "CLS_density_.number_per_mm3."] <- "CLS_density"
# Define a helper function to map p-values to stars
pval_to_stars <- function(p) {
if (p < 0.0001) {
return("****")
} else if (p < 0.001) {
return("***")
} else if (p < 0.01) {
return("**")
} else if (p < 0.05) {
return("*")
} else {
return("ns") # Not significant
}
}
# Perform a Kruskal-Wallis test separately for each sex
kw_res_F <- kruskal.test(CLS_density ~ Visit, data = CLS %>% filter(Sex == "F"))
kw_res_M <- kruskal.test(CLS_density ~ Visit, data = CLS %>% filter(Sex == "M"))
# Extract Kruskal-Wallis p-values
kw_pval_F <- kw_res_F$p.value
kw_pval_M <- kw_res_M$p.value
# Perform paired Wilcoxon tests for each sex with Holm's correction
paired_wilcox <- function(CLS, sex_group) {
data_sex <- CLS %>% filter(Sex == sex_group)
pairwise_tests <- list(
visit1_vs_visit2 = wilcox.test(
x = data_sex %>% filter(Visit == "1") %>% pull(CLS_density),
y = data_sex %>% filter(Visit == "2") %>% pull(CLS_density),
paired = F
)$p.value,
visit1_vs_visit3 = wilcox.test(
x = data_sex %>% filter(Visit == "1") %>% pull(CLS_density),
y = data_sex %>% filter(Visit == "3") %>% pull(CLS_density),
paired = F
)$p.value,
visit2_vs_visit3 = wilcox.test(
x = data_sex %>% filter(Visit == "2") %>% pull(CLS_density),
y = data_sex %>% filter(Visit == "3") %>% pull(CLS_density),
paired = F
)$p.value
)
corrected_pvals <- p.adjust(unlist(pairwise_tests), method = "holm")
return(corrected_pvals)
}
wilcox_pvals_F <- paired_wilcox(CLS, "F")
## Warning in wilcox.test.default(x = data_sex %>% filter(Visit == "1") %>% :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = data_sex %>% filter(Visit == "2") %>% :
## cannot compute exact p-value with ties
wilcox_pvals_M <- paired_wilcox(CLS, "M")
## Warning in wilcox.test.default(x = data_sex %>% filter(Visit == "1") %>% :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = data_sex %>% filter(Visit == "1") %>% :
## cannot compute exact p-value with ties
# Map the Wilcoxon p-values to stars
wilcox_stars_F <- sapply(wilcox_pvals_F, pval_to_stars)
wilcox_stars_M <- sapply(wilcox_pvals_M, pval_to_stars)
plot <- ggplot(CLS, aes(x = interaction(Visit, Sex), y = CLS_density, fill = interaction(Visit, Sex))) +
geom_boxplot(outlier.shape = NA) + # Remove outliers
geom_jitter(color = "grey", size = 1, width = 0.2, alpha = 0.5) + # Add individual points
labs(title = "CLS Density Across Visits and Genders", x = "Visit and Sex", y = "# CLS per mm3") +
theme_bw() +
theme(plot.title = element_text(size = 10)) +
geom_signif(
comparisons = list(
c("1.F", "2.F"),
c("1.F", "3.F"),
c("2.F", "3.F"),
c("1.M", "2.M"),
c("1.M", "3.M"),
c("2.M", "3.M")
),
annotations = c(
wilcox_stars_F["visit1_vs_visit2"],
wilcox_stars_F["visit1_vs_visit3"],
wilcox_stars_F["visit2_vs_visit3"],
wilcox_stars_M["visit1_vs_visit2"],
wilcox_stars_M["visit1_vs_visit3"],
wilcox_stars_M["visit2_vs_visit3"]
),
textsize = 4,
map_signif_level = FALSE
)
# Add Kruskal-Wallis p-values as text
plot <- plot +
annotate("text", x = 1, y = max(CLS$CLS_density) * 1.1,
label = paste("KW Female p-value:", format(kw_pval_F, digits = 3)),
hjust = 0, size = 3) +
annotate("text", x = 4, y = max(CLS$CLS_density) * 1.1,
label = paste("KW Male p-value:", format(kw_pval_M, digits = 3)),
hjust = 0, size = 3) +
scale_fill_manual(values = sample_colours)
plot
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
size_data <- read.csv("2D_histology_averaged_data_output.csv",sep = ",", header = TRUE)
condition_levels <- c("Female Visit 1", "Female Visit 2", "Female Visit 3", "Male Visit 1", "Male Visit 2", "Male Visit 3")
# 1. Calculate per-patient averages for each visit
patient_averages <- size_data %>%
group_by(Donor, Visit, Sex) %>%
summarise(Vascular_Count = mean(Vascular_count_per_area, na.rm = TRUE)) %>%
ungroup() %>%
mutate(sex_visit = interaction(Visit, Sex) %>%
factor(labels = c("Female Visit 1", "Female Visit 2", "Female Visit 3", "Male Visit 1", "Male Visit 2", "Male Visit 3")))
## `summarise()` has grouped output by 'Donor', 'Visit'. You can override using
## the `.groups` argument.
# 3. Create the boxplot
plot <- ggplot(patient_averages, aes(x = sex_visit, y = Vascular_Count, fill = sex_visit)) +
geom_boxplot(position = position_dodge(width = 0.75), outlier.shape = NA) +
geom_jitter(size = 1, width = 0.2, alpha = 0.3) + # Add individual points
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
text = element_text(family = "serif", size = 12),
axis.text.x = element_blank(),
line = element_blank()
) +
scale_fill_manual(values = sample_colours) +
labs(x = "", fill = "", y = "Blood vessels per m2")
plot
con <- qread("con_vascular.qs", nthreads = 10)
anno.vascular <- qread("anno_vascular.qs")
# For 2d,e
cm.bulk <- read.delim("Bulk_norm_counts.txt", header = T) %>%
.[match(unique(.$hgnc_symbol), .$hgnc_symbol), ] %>%
`rownames<-`(.$hgnc_symbol) %>%
select(-hgnc_symbol, -ensembl_gene_id, -entrezgene_id) %>%
scale()
colnames(cm.bulk) %<>%
gsub("D", "Donor_", .) %>%
gsub("V", "vis", .)
# Metadata
metadata.all <- read.delim("Meta_Data_WL_Select.txt", h=T, dec = ",") %>%
mutate(sex = factor(sex, labels = c("Female", "Male")),
donor = gsub("D", "Donor_", recordid) %>%
gsub("V", "vis", .))
con$plotGraph(groups = anno.vascular,
plot.na = FALSE,
size = 0.2,
alpha = 1,
embedding = "UMAP",
shuffle.colors = TRUE,
show.labels = TRUE,
font.size = 5) +
labs(x = "UMAP1", y = "UMAP2") +
theme(line = element_blank()) +
scale_colour_manual(values = RColorBrewer::brewer.pal(9, "Purples")[-c(1,2)][c(1,4,7,2,6,3,5)])
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
cm.merged <- con$getJointCountMatrix() %>%
.[rownames(.) %in% names(anno.vascular), ]
c("PCSK5", "NEBL", "FN1", "CADM2", "BTNL9", "CEACAM1", "IL1R1", "VCAN", "ACKR1", "PKHD1L1", "MMRN1", "RELN", "MYH11", "ACTA2", "MYOCD", "COL25A1", "STEAP4", "NCKAP5") %>%
sccore::dotPlot(.,
cm.merged,
anno.vascular %>% factor(levels = c("arEC", "cEC", "cvEC", "vEC", "lEC", "SMCs", "Pericytes")),
gene.order = .,
cols = c("white","purple3"))
Here, we’re plotting proportions of total cells, so we load the Conos object and the annotation for all cells first.
con.major <- qread("con_major.qs", nthreads = 10)
anno.major <- qread("anno_major.qs")
# Use Cacoa to calculate proportions
cao <- Cacoa$new(data.object = con.major,
cell.groups = anno.major,
ref.level = "vis2",
target.level = "vis3",
sample.groups = con.major$samples %>%
names() %>%
strsplit("_") %>%
sapply('[[', 3) %>%
gsub(1, 2, .) %>%
`names<-`(con.major$samples %>%
names()),
sample.groups.palette = ggsci::pal_jama()(7))
cao$sample.groups <- con.major$samples %>%
names() %>%
strsplit("_") %>%
sapply('[[', 3) %>%
`names<-`(con.major$samples %>%
names()) %>%
gsub("vis", "Visit ", .)
anno.comb <- anno.major[!names(anno.major) %in% names(anno.vascular)] %>%
{factor(c(., anno.vascular))}
p <- cao$plotCellGroupSizes(cell.groups = anno.comb,
show.significance = FALSE,
filter.empty.cell.types = FALSE)
plot.dat <- p$data %>%
filter(variable %in% levels(anno.vascular)) %>%
mutate(sex = rep(meta$sex, anno.vascular %>% levels() %>% length())) %>%
mutate(sex_visit = paste(sex, group, sep = " ") %>% gsub("Vis", "vis", .),
facet = ifelse(variable %in% c("cvEC", "lEC"), "low", "high"),
variable = factor(variable, levels = c("arEC", "cEC", "vEC", "SMCs", "Pericytes", "cvEC", "lEC")) %>%
renameAnnotation("lEC", " lEC"))
# Kruskal-Wallis
sex.stat <- plot.dat %$%
split(., sex) %>%
lapply(\(x) split(x, x$variable)) %>%
lapply(lapply, \(x) kruskal.test(x$value, g = x$group)$p.value) %>%
lapply(sapply, gtools::stars.pval) %>%
{lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>%
`names<-`(c("Female","Male")) %>%
lapply(\(x) data.frame(ct = names(x), sig = unname(x))) %>%
{lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>%
bind_rows() %>%
mutate(ct = factor(ct, levels = unique(plot.dat$variable))) %>%
arrange(ct) %>%
mutate(sig = gsub(".", "", sig, fixed = T),
facet = ifelse(ct %in% c("cvEC", " lEC"), "low", "high"))
# Wilcoxon
stat.test <- plot.dat %>%
dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
group_by(var, sex) %>%
wilcox_test(value~sex_visit, paired = T) %>%
filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
add_xy_position(x = "var", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05) %>% # Adjust line position
arrange(desc(y.position)) %>%
mutate(y.position = c(y.position[1]+1, y.position[2], y.position[3]+1, y.position[4], y.position[5]+1, y.position[6:7]), # Manually adjust overlapping lines
facet = ifelse(var %in% c("cvEC", " lEC"), "low", "high"))
# Plot
p.high <- ggplot(plot.dat %>% filter(facet == "high"), aes(variable, value)) +
geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
color = "black", size = 0.5, alpha = 0.2) +
theme_bw() +
labs(x = "", y = "% cells per sample") +
theme(line = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
geom_text(inherit.aes = F, data = sex.stat %>% filter(facet == "high"), aes(ct, y = 22.5, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
stat_pvalue_manual(stat.test %>% filter(facet == "high"), label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) + # Wilcoxon
guides(fill = "none")
p.low <- ggplot(plot.dat %>% filter(facet == "low"), aes(variable, value)) +
geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
color = "black", size = 0.5, alpha = 0.2) +
theme_bw() +
labs(x = "", y = "", fill = "") +
theme(line = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
geom_text(inherit.aes = F, data = sex.stat %>% filter(facet == "low"), aes(ct, y = 4.5, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
stat_pvalue_manual(stat.test %>% filter(facet == "low"), label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) # Wilcoxon
plot_grid(plotlist = list(p.high, p.low), rel_widths = c(1,0.8))
Preparations
# Use Cacoa to calculate proportions
cao <- Cacoa$new(data.object = con.major,
cell.groups = anno.major,
ref.level = "vis2",
target.level = "vis3",
sample.groups = con.major$samples %>%
names() %>%
strsplit("_") %>%
sapply('[[', 3) %>%
gsub(1, 2, .) %>%
`names<-`(con.major$samples %>%
names()),
sample.groups.palette = ggsci::pal_jama()(7))
cao$sample.groups <- con.major$samples %>%
names() %>%
strsplit("_") %>%
sapply('[[', 3) %>%
`names<-`(con.major$samples %>%
names()) %>%
gsub("vis", "Visit ", .)
anno.comb <- anno.major[!names(anno.major) %in% names(anno.vascular)] %>%
{factor(c(., anno.vascular))}
Here, we’re showing the preparations to create models for prediction. This takes some time and is not run again.
# Get markers
markers.vascular.all <- con.major$getDifferentialGenes(groups = anno.comb, z.threshold = 1, upregulated.only = T, verbose = T)
# vEC
# Get proportions
proportions <- cao$.__enclos_env__$private$extractCodaData(cell.groups = anno.comb, ret.groups = F) %>%
{100 * ./rowSums(.)} %>%
as.data.frame() %$%
setNames(vEC, rownames(.))
# Extract markers. Please note, the marker metrics may change a little in each iteration
markers.all <- markers.vascular.all$vEC %>%
filter(Specificity > 0.9,
AUC > 0.65) %>%
arrange(desc(AUC)) %>%
pull(Gene)
# Subset bulk data
cm.bulk.mat <- cm.bulk %>%
as.data.frame() %>%
filter(rownames(.) %in% markers.all) %>%
as.matrix() %>%
t()
# Now, train a final model on all data for visualization and gene selection
cm.test <- cm.bulk.mat %>%
.[rownames(.) %in% names(proportions), ]
lasso_cv <- cv.glmnet(cm.test, proportions, alpha = 1, nfolds = 40)
best_lambda <- lasso_cv$lambda.min
lasso_model <- glmnet(cm.test, proportions, alpha = 1, lambda = best_lambda)
# Predict on all samples using the final model
lasso.predict.all <- predict(lasso_model, newx = cm.test, s = best_lambda)
# Save model
list(lasso_cv = lasso_cv,
lasso_model = lasso_model) %>%
qsave("vEC_model.qs")
# cEC
## Get proportions
proportions <- cao$.__enclos_env__$private$extractCodaData(cell.groups = anno.comb, ret.groups = F) %>%
{100 * ./rowSums(.)} %>%
as.data.frame() %$%
setNames(cEC, rownames(.))
## Markers
markers.all <- markers.vacsular.all$cEC %>%
filter(Specificity > 0.9,
AUC > 0.6) %>%
arrange(desc(AUC)) %>%
pull(Gene)
## Subset bulk data
cm.bulk.mat <- cm.bulk %>%
as.data.frame() %>%
filter(rownames(.) %in% markers.all) %>%
as.matrix() %>%
t()
# Now, train a final model on all data for visualization and gene selection
cm.test <- cm.bulk.mat %>%
.[rownames(.) %in% names(proportions), ]
lasso_cv <- cv.glmnet(cm.test, proportions, alpha = 1, nfolds = 40)
best_lambda <- lasso_cv$lambda.min
lasso_model <- glmnet(cm.test, proportions, alpha = 1, lambda = best_lambda)
# Predict on all samples using the final model
lasso.predict.all <- predict(lasso_model, newx = cm.test, s = best_lambda)
# Save model
list(lasso_cv = lasso_cv,
lasso_model = lasso_model) %>%
qsave("cEC_model.qs")
# Load and prepare
tmp <- qread("cEC_model.qs")
lasso_model <- tmp$lasso_model
best_lambda <- tmp$lasso_cv$lambda.min
cm.bulk.mat <- cm.bulk %>%
as.data.frame() %>%
filter(rownames(.) %in% rownames(coef(lasso_model))) %>%
as.matrix() %>%
t()
proportions <- cao$.__enclos_env__$private$extractCodaData(cell.groups = anno.comb, ret.groups = F) %>%
{100 * ./rowSums(.)} %>%
as.data.frame() %$%
setNames(cEC, rownames(.))
## Predict proportions
proportions.bulk <- predict(lasso_model, newx = cm.bulk.mat, s = best_lambda)
## Prepare plot
plot.df <- proportions.bulk %>%
as.data.frame() %>%
mutate(donor = rownames(.)) %>%
mutate(visit = strsplit(donor, "_") %>% sget(3),
sex = metadata.all$sex[match(donor, metadata.all$donor)]) %>%
mutate(sex_visit = paste(sex, visit, sep = "_"))
trend.trained <- proportions.bulk %>%
as.data.frame() %>%
mutate(donor = rownames(.)) %>%
filter(donor %in% names(proportions)) %>%
mutate(visit = strsplit(donor, "_") %>% sget(3),
sex = metadata.all$sex[match(donor, metadata.all$donor)]) %>%
group_by(visit, sex) %>%
summarize(s1 = median(s1)) %>%
mutate(sex_visit = paste(sex, visit, sep = "_"))
## `summarise()` has grouped output by 'visit'. You can override using the
## `.groups` argument.
trend.untrained <- proportions.bulk %>%
as.data.frame() %>%
mutate(donor = rownames(.)) %>%
filter(!donor %in% names(proportions)) %>%
mutate(visit = strsplit(donor, "_") %>% sget(3),
sex = metadata.all$sex[match(donor, metadata.all$donor)]) %>%
group_by(visit, sex) %>%
summarize(s1 = median(s1)) %>%
mutate(sex_visit = paste(sex, visit, sep = "_"))
## `summarise()` has grouped output by 'visit'. You can override using the
## `.groups` argument.
plot.df %>%
ggplot() +
geom_boxplot(mapping = aes(sex_visit, s1, fill = sex_visit), outliers = F) +
geom_jitter(mapping = aes(sex_visit, s1, col = sex_visit), alpha = 0.3, col = "black", width = 0.3) +
theme_bw() +
labs(y = "Predicted proportions (%)", title = "Predicted vECs proportions", x = "", col = "Median trends", fill = "") +
geom_line(mapping = aes(x = sex_visit, y = s1, group = sex, col = "Trained data"), data = trend.trained, linewidth = 1) +
geom_line(mapping = aes(x = sex_visit, y = s1, group = sex, col = "Untrained data"), data = trend.untrained, linewidth = 1) +
scale_color_manual(values = c("Trained data" = "black", "Untrained data" = "grey40")) +
scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
theme(line = element_blank(),
axis.text = element_text(color = "black"),
axis.text.x = element_blank())
Statistics
plot.df %>%
filter(sex == "Female") %>%
rstatix::kruskal_test(s1 ~ visit)
## # A tibble: 1 × 6
## .y. n statistic df p method
## * <chr> <int> <dbl> <int> <dbl> <chr>
## 1 s1 63 26.1 2 0.00000212 Kruskal-Wallis
plot.df %>%
filter(sex == "Female") %>%
rstatix::wilcox_test(s1 ~ visit)
## # A tibble: 3 × 9
## .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 s1 vis1 vis2 21 21 240 0.636 0.636 ns
## 2 s1 vis1 vis3 21 21 43 0.00000129 0.00000387 ****
## 3 s1 vis2 vis3 21 21 48 0.00000302 0.00000604 ****
plot.df %>%
filter(sex == "Male") %>%
rstatix::kruskal_test(s1 ~ visit)
## # A tibble: 1 × 6
## .y. n statistic df p method
## * <chr> <int> <dbl> <int> <dbl> <chr>
## 1 s1 30 11.9 2 0.00263 Kruskal-Wallis
plot.df %>%
filter(sex == "Male") %>%
rstatix::wilcox_test(s1 ~ visit)
## # A tibble: 3 × 9
## .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 s1 vis1 vis2 10 10 59 0.529 0.529 ns
## 2 s1 vis1 vis3 10 10 15 0.007 0.014 *
## 3 s1 vis2 vis3 10 10 8 0.000725 0.002 **
# Load and prepare
tmp <- qread("vEC_model.qs")
lasso_model <- tmp$lasso_model
best_lambda <- tmp$lasso_cv$lambda.min
cm.bulk.mat <- cm.bulk %>%
as.data.frame() %>%
filter(rownames(.) %in% rownames(coef(lasso_model))) %>%
as.matrix() %>%
t()
proportions <- cao$.__enclos_env__$private$extractCodaData(cell.groups = anno.comb, ret.groups = F) %>%
{100 * ./rowSums(.)} %>%
as.data.frame() %$%
setNames(vEC, rownames(.))
## Predict proportions
proportions.bulk <- predict(lasso_model, newx = cm.bulk.mat, s = best_lambda)
plot.df <- proportions.bulk %>%
as.data.frame() %>%
mutate(donor = rownames(.)) %>%
mutate(visit = strsplit(donor, "_") %>% sget(3),
sex = metadata.all$sex[match(donor, metadata.all$donor)]) %>%
mutate(sex_visit = paste(sex, visit, sep = "_"))
trend.trained <- proportions.bulk %>%
as.data.frame() %>%
mutate(donor = rownames(.)) %>%
filter(donor %in% names(proportions)) %>%
mutate(visit = strsplit(donor, "_") %>% sget(3),
sex = metadata.all$sex[match(donor, metadata.all$donor)]) %>%
group_by(visit, sex) %>%
summarize(s1 = median(s1)) %>%
mutate(sex_visit = paste(sex, visit, sep = "_"))
## `summarise()` has grouped output by 'visit'. You can override using the
## `.groups` argument.
trend.untrained <- proportions.bulk %>%
as.data.frame() %>%
mutate(donor = rownames(.)) %>%
filter(!donor %in% names(proportions)) %>%
mutate(visit = strsplit(donor, "_") %>% sget(3),
sex = metadata.all$sex[match(donor, metadata.all$donor)]) %>%
group_by(visit, sex) %>%
summarize(s1 = median(s1)) %>%
mutate(sex_visit = paste(sex, visit, sep = "_"))
## `summarise()` has grouped output by 'visit'. You can override using the
## `.groups` argument.
plot.df %>%
ggplot() +
geom_boxplot(mapping = aes(sex_visit, s1, fill = sex_visit), outliers = F) +
geom_jitter(mapping = aes(sex_visit, s1, col = sex_visit), alpha = 0.3, col = "black", width = 0.3) +
theme_bw() +
labs(y = "Predicted proportions (%)", title = "Predicted cECs proportions", x = "", col = "Median trends", fill = "") +
geom_line(mapping = aes(x = sex_visit, y = s1, group = sex, col = "Trained data"), data = trend.trained, linewidth = 1) +
geom_line(mapping = aes(x = sex_visit, y = s1, group = sex, col = "Untrained data"), data = trend.untrained, linewidth = 1) +
scale_color_manual(values = c("Trained data" = "black", "Untrained data" = "grey40")) +
scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
theme(line = element_blank(),
axis.text = element_text(color = "black"),
axis.text.x = element_blank())
Statistics
plot.df %>%
filter(sex == "Female") %>%
rstatix::kruskal_test(s1 ~ visit)
## # A tibble: 1 × 6
## .y. n statistic df p method
## * <chr> <int> <dbl> <int> <dbl> <chr>
## 1 s1 63 28.6 2 0.000000605 Kruskal-Wallis
plot.df %>%
filter(sex == "Female") %>%
rstatix::wilcox_test(s1 ~ visit)
## # A tibble: 3 × 9
## .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 s1 vis1 vis2 21 21 221 1 1 ns
## 2 s1 vis1 vis3 21 21 40 0.000000754 0.00000151 ****
## 3 s1 vis2 vis3 21 21 34 0.00000024 0.00000072 ****
plot.df %>%
filter(sex == "Male") %>%
rstatix::kruskal_test(s1 ~ visit)
## # A tibble: 1 × 6
## .y. n statistic df p method
## * <chr> <int> <dbl> <int> <dbl> <chr>
## 1 s1 30 16.8 2 0.00023 Kruskal-Wallis
plot.df %>%
filter(sex == "Male") %>%
rstatix::wilcox_test(s1 ~ visit)
## # A tibble: 3 × 9
## .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 s1 vis1 vis2 10 10 50 1 1 ns
## 2 s1 vis1 vis3 10 10 6 0.000325 0.00065 ***
## 3 s1 vis2 vis3 10 10 1 0.0000217 0.0000651 ****
con <- qread("con_myeloid.qs", nthreads = 10)
anno.immune <- qread("anno_immune.qs")
anno.myeloid <- anno.immune %>%
.[!. %in% c("NKT","NK","Tem","Th1","Th","Treg","B-cells")] %>%
factor(levels = c("ATM", "mono/mac", "Early LAM", "LAM", "cDC1", "cDC2", "mDC"))
con$plotGraph(groups = anno.immune[!anno.immune == "B-cells"],
plot.na = FALSE,
size = 0.2,
alpha = 1,
embedding = "largeVis",
show.labels = TRUE,
font.size = 5) +
labs(x = "largeVis1", y = "largeVis2") +
theme(line = element_blank()) +
scale_colour_manual(values = c("#174D86", "#9EC3B5", "#356790", "#ADD0BA", "#628EA0", "#80A9AA", "#719CA5"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
cm.merged <- con$getJointCountMatrix(raw = FALSE) %>%
.[rownames(.) %in% names(anno.myeloid), ]
c("LYVE1", "SELENOP", # ATM
"FCN1","VCAN", # mono/mac
"CYP27A1", "APOE", # Early LAM
"LPL", "TREM2", # LAM
"CLEC9A", "CADM1", # cDC1
"CD1C", "CLEC10A", # cDC2
"CCR7", "LAMP3") %>% # mDC
sccore::dotPlot(.,
cm.merged,
anno.myeloid,
gene.order = .,
cols = c("white","#377EB8"))
Here, we’re plotting proportions of total cells, so we use the Conos object and the annotation for all cells first.
# Use Cacoa to calculate proportions
cao <- Cacoa$new(data.object = con.major,
cell.groups = anno.major,
ref.level = "vis2",
target.level = "vis3",
sample.groups = con.major$samples %>%
names() %>%
strsplit("_") %>%
sapply('[[', 3) %>%
gsub(1, 2, .) %>%
`names<-`(con.major$samples %>%
names()),
sample.groups.palette = ggsci::pal_jama()(7))
cao$sample.groups <- con.major$samples %>%
names() %>%
strsplit("_") %>%
sapply('[[', 3) %>%
`names<-`(con.major$samples %>%
names()) %>%
gsub("vis", "Visit ", .)
anno.comb <- anno.major[!names(anno.major) %in% names(anno.myeloid)] %>%
{factor(c(., anno.myeloid))}
p <- cao$plotCellGroupSizes(cell.groups = anno.comb,
show.significance = FALSE,
filter.empty.cell.types = FALSE)
plot.dat <- p$data %>%
filter(variable %in% levels(anno.myeloid)) %>%
mutate(sex = rep(meta$sex, anno.myeloid %>% levels() %>% length()),
variable = factor(variable)) %>%
mutate(sex_visit = paste(sex, group, sep = " ") %>% gsub("Vis", "vis", .),
facet = ifelse(variable != "ATM", "low", "high"),
variable = factor(variable, levels = c("ATM", "mono/mac", "Early LAM", "LAM", "cDC1", "cDC2", "mDC")) %>%
renameAnnotation("ATM", " ATM"))
# Kruskal-Wallis
sex.stat <- plot.dat %$%
split(., sex) %>%
lapply(\(x) split(x, x$variable)) %>%
lapply(lapply, \(x) kruskal.test(x$value, g = x$group)$p.value) %>%
lapply(sapply, gtools::stars.pval) %>%
{lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>%
`names<-`(c("Female","Male")) %>%
lapply(\(x) data.frame(ct = names(x), sig = unname(x))) %>%
{lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>%
bind_rows() %>%
mutate(ct = factor(ct, levels = unique(plot.dat$variable))) %>%
arrange(ct) %>%
mutate(sig = gsub(".", "", sig, fixed = TRUE),
facet = ifelse(ct != " ATM", "low", "high"))
# Wilcoxon
stat.test <- plot.dat %>%
dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
group_by(var, sex) %>%
wilcox_test(value~sex_visit, paired = TRUE) %>%
mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>%
filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
add_xy_position(x = "var", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05) %>% # Adjust line position
arrange(desc(y.position)) %>%
mutate(facet = ifelse(var != " ATM", "low", "high"))
# Plot
p.high <- ggplot(plot.dat %>% filter(facet == "high"), aes(variable, value)) +
geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
color = "black", size = 0.5, alpha = 0.2) +
theme_bw() +
labs(x = "", y = "% cells per sample") +
theme(line = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0, hjust = 1)) +
scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
geom_text(inherit.aes = F, data = sex.stat %>% filter(facet == "high"), aes(ct, y = 50, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
stat_pvalue_manual(stat.test %>% filter(facet == "high"), label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) + # Wilcoxon
guides(fill = "none")
x.cor = 1
p.low <- ggplot(plot.dat %>% filter(facet == "low"), aes(variable, value)) +
geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
color = "black", size = 0.5, alpha = 0.2) +
theme_bw() +
labs(x = "", y = "", fill = "") +
theme(line = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0, hjust = 1)) +
scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
geom_text(inherit.aes = FALSE, data = sex.stat %>% filter(facet == "low"), aes(ct, y = 10, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
stat_pvalue_manual(stat.test %>% filter(facet == "low") %>% mutate(x = x-x.cor, xmin = xmin-x.cor, xmax = xmax-x.cor), label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 1, hide.ns = FALSE) # Wilcoxon
plot_grid(plotlist = list(p.high, p.low), rel_widths = c(1,4.5))
Preparations
# Use Cacoa to calculate proportions
cao <- Cacoa$new(data.object = con.major,
cell.groups = anno.major,
ref.level = "vis2",
target.level = "vis3",
sample.groups = con.major$samples %>%
names() %>%
strsplit("_") %>%
sapply('[[', 3) %>%
gsub(1, 2, .) %>%
`names<-`(con.major$samples %>%
names()),
sample.groups.palette = ggsci::pal_jama()(7))
cao$sample.groups <- con.major$samples %>%
names() %>%
strsplit("_") %>%
sapply('[[', 3) %>%
`names<-`(con.major$samples %>%
names()) %>%
gsub("vis", "Visit ", .)
anno.comb <- anno.major[!names(anno.major) %in% names(anno.immune)] %>%
{factor(c(., anno.immune))}
Here, we’re showing the preparations to create models for prediction. This takes some time and is not run again.
# Get markers
markers.immune.all <- con.major$getDifferentialGenes(groups = anno.comb, z.threshold = 1, upregulated.only = T, verbose = T)
# vEC
# Get proportions
proportions <- cao$.__enclos_env__$private$extractCodaData(cell.groups = anno.comb, ret.groups = F) %>%
{100 * ./rowSums(.)} %>%
as.data.frame() %$%
setNames(`Early LAM`, rownames(.))
# Extract markers. Please note, the marker metrics may change a little in each iteration
markers.all <- markers.immune.all$`Early LAM` %>%
filter(Specificity > 0.9,
AUC > 0.65) %>%
arrange(desc(AUC)) %>%
pull(Gene)
# Subset bulk data
cm.bulk.mat <- cm.bulk %>%
as.data.frame() %>%
filter(rownames(.) %in% markers.all) %>%
as.matrix() %>%
t()
# Now, train a final model on all data for visualization and gene selection
cm.test <- cm.bulk.mat %>%
.[rownames(.) %in% names(proportions), ]
lasso_cv <- cv.glmnet(cm.test, proportions, alpha = 1, nfolds = 5)
best_lambda <- lasso_cv$lambda.min
lasso_model <- glmnet(cm.test, proportions, alpha = 1, lambda = best_lambda)
# Predict on all samples using the final model
lasso.predict.all <- predict(lasso_model, newx = cm.test, s = best_lambda)
# Save model
list(lasso_cv = lasso_cv,
lasso_model = lasso_model) %>%
qsave("eLAM_model.qs")
# cEC
## Get proportions
proportions <- cao$.__enclos_env__$private$extractCodaData(cell.groups = anno.comb, ret.groups = F) %>%
{100 * ./rowSums(.)} %>%
as.data.frame() %$%
setNames(LAM, rownames(.))
## Markers
markers.all <- markers.immune.all$LAM %>%
filter(Specificity > 0.9,
AUC > 0.7) %>%
arrange(desc(AUC)) %>%
pull(Gene)
## Subset bulk data
cm.bulk.mat <- cm.bulk %>%
as.data.frame() %>%
filter(rownames(.) %in% markers.all) %>%
as.matrix() %>%
t()
# Now, train a final model on all data for visualization and gene selection
cm.test <- cm.bulk.mat %>%
.[rownames(.) %in% names(proportions), ]
lasso_cv <- cv.glmnet(cm.test, proportions, alpha = 1, nfolds = 15)
best_lambda <- lasso_cv$lambda.min
lasso_model <- glmnet(cm.test, proportions, alpha = 1, lambda = best_lambda)
# Predict on all samples using the final model
lasso.predict.all <- predict(lasso_model, newx = cm.test, s = best_lambda)
# Save model
list(lasso_cv = lasso_cv,
lasso_model = lasso_model) %>%
qsave("LAM_model.qs")
# Load and prepare
tmp <- qread("eLAM_model.qs")
lasso_model <- tmp$lasso_model
best_lambda <- tmp$lasso_cv$lambda.min
cm.bulk.mat <- cm.bulk %>%
as.data.frame() %>%
filter(rownames(.) %in% rownames(coef(lasso_model))) %>%
as.matrix() %>%
t()
proportions <- cao$.__enclos_env__$private$extractCodaData(cell.groups = anno.comb, ret.groups = F) %>%
{100 * ./rowSums(.)} %>%
as.data.frame() %$%
setNames(`Early LAM`, rownames(.))
## Predict proportions
proportions.bulk <- predict(lasso_model, newx = cm.bulk.mat, s = best_lambda)
## Prepare plot
plot.df <- proportions.bulk %>%
as.data.frame() %>%
mutate(donor = rownames(.)) %>%
mutate(visit = strsplit(donor, "_") %>% sget(3),
sex = metadata.all$sex[match(donor, metadata.all$donor)]) %>%
mutate(sex_visit = paste(sex, visit, sep = "_"))
trend.trained <- proportions.bulk %>%
as.data.frame() %>%
mutate(donor = rownames(.)) %>%
filter(donor %in% names(proportions)) %>%
mutate(visit = strsplit(donor, "_") %>% sget(3),
sex = metadata.all$sex[match(donor, metadata.all$donor)]) %>%
group_by(visit, sex) %>%
summarize(s1 = median(s1)) %>%
mutate(sex_visit = paste(sex, visit, sep = "_"))
## `summarise()` has grouped output by 'visit'. You can override using the
## `.groups` argument.
trend.untrained <- proportions.bulk %>%
as.data.frame() %>%
mutate(donor = rownames(.)) %>%
filter(!donor %in% names(proportions)) %>%
mutate(visit = strsplit(donor, "_") %>% sget(3),
sex = metadata.all$sex[match(donor, metadata.all$donor)]) %>%
group_by(visit, sex) %>%
summarize(s1 = median(s1)) %>%
mutate(sex_visit = paste(sex, visit, sep = "_"))
## `summarise()` has grouped output by 'visit'. You can override using the
## `.groups` argument.
plot.df %>%
ggplot() +
geom_boxplot(mapping = aes(sex_visit, s1, fill = sex_visit), outliers = F) +
geom_jitter(mapping = aes(sex_visit, s1, col = sex_visit), alpha = 0.3, col = "black", width = 0.3) +
theme_bw() +
labs(y = "Predicted proportions (%)", title = "Predicted eLAM proportions", x = "", col = "Median trends", fill = "") +
geom_line(mapping = aes(x = sex_visit, y = s1, group = sex, col = "Trained data"), data = trend.trained, linewidth = 1) +
geom_line(mapping = aes(x = sex_visit, y = s1, group = sex, col = "Untrained data"), data = trend.untrained, linewidth = 1) +
scale_color_manual(values = c("Trained data" = "black", "Untrained data" = "grey40")) +
scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
theme(line = element_blank(),
axis.text = element_text(color = "black"),
axis.text.x = element_blank())
Statistics
plot.df %>%
filter(sex == "Female") %>%
rstatix::kruskal_test(s1 ~ visit)
## # A tibble: 1 × 6
## .y. n statistic df p method
## * <chr> <int> <dbl> <int> <dbl> <chr>
## 1 s1 63 21.0 2 0.0000276 Kruskal-Wallis
plot.df %>%
filter(sex == "Female") %>%
rstatix::wilcox_test(s1 ~ visit)
## # A tibble: 3 × 9
## .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 s1 vis1 vis2 21 21 230 0.823 0.823 ns
## 2 s1 vis1 vis3 21 21 379 0.0000251 0.0000753 ****
## 3 s1 vis2 vis3 21 21 376 0.0000379 0.0000758 ****
plot.df %>%
filter(sex == "Male") %>%
rstatix::kruskal_test(s1 ~ visit)
## # A tibble: 1 × 6
## .y. n statistic df p method
## * <chr> <int> <dbl> <int> <dbl> <chr>
## 1 s1 30 18.4 2 0.0000988 Kruskal-Wallis
plot.df %>%
filter(sex == "Male") %>%
rstatix::wilcox_test(s1 ~ visit)
## # A tibble: 3 × 9
## .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 s1 vis1 vis2 10 10 59 0.529 0.529 ns
## 2 s1 vis1 vis3 10 10 99 0.0000217 0.0000651 ****
## 3 s1 vis2 vis3 10 10 98 0.0000433 0.0000866 ****
# Load and prepare
tmp <- qread("LAM_model.qs")
lasso_model <- tmp$lasso_model
best_lambda <- tmp$lasso_cv$lambda.min
cm.bulk.mat <- cm.bulk %>%
as.data.frame() %>%
filter(rownames(.) %in% rownames(coef(lasso_model))) %>%
as.matrix() %>%
t()
proportions <- cao$.__enclos_env__$private$extractCodaData(cell.groups = anno.comb, ret.groups = F) %>%
{100 * ./rowSums(.)} %>%
as.data.frame() %$%
setNames(LAM, rownames(.))
## Predict proportions
proportions.bulk <- predict(lasso_model, newx = cm.bulk.mat, s = best_lambda)
plot.df <- proportions.bulk %>%
as.data.frame() %>%
mutate(donor = rownames(.)) %>%
mutate(visit = strsplit(donor, "_") %>% sget(3),
sex = metadata.all$sex[match(donor, metadata.all$donor)]) %>%
mutate(sex_visit = paste(sex, visit, sep = "_"))
trend.trained <- proportions.bulk %>%
as.data.frame() %>%
mutate(donor = rownames(.)) %>%
filter(donor %in% names(proportions)) %>%
mutate(visit = strsplit(donor, "_") %>% sget(3),
sex = metadata.all$sex[match(donor, metadata.all$donor)]) %>%
group_by(visit, sex) %>%
summarize(s1 = median(s1)) %>%
mutate(sex_visit = paste(sex, visit, sep = "_"))
## `summarise()` has grouped output by 'visit'. You can override using the
## `.groups` argument.
trend.untrained <- proportions.bulk %>%
as.data.frame() %>%
mutate(donor = rownames(.)) %>%
filter(!donor %in% names(proportions)) %>%
mutate(visit = strsplit(donor, "_") %>% sget(3),
sex = metadata.all$sex[match(donor, metadata.all$donor)]) %>%
group_by(visit, sex) %>%
summarize(s1 = median(s1)) %>%
mutate(sex_visit = paste(sex, visit, sep = "_"))
## `summarise()` has grouped output by 'visit'. You can override using the
## `.groups` argument.
plot.df %>%
ggplot() +
geom_boxplot(mapping = aes(sex_visit, s1, fill = sex_visit), outliers = F) +
geom_jitter(mapping = aes(sex_visit, s1, col = sex_visit), alpha = 0.3, col = "black", width = 0.3) +
theme_bw() +
labs(y = "Predicted proportions (%)", title = "Predicted LAM proportions", x = "", col = "Median trends", fill = "") +
geom_line(mapping = aes(x = sex_visit, y = s1, group = sex, col = "Trained data"), data = trend.trained, linewidth = 1) +
geom_line(mapping = aes(x = sex_visit, y = s1, group = sex, col = "Untrained data"), data = trend.untrained, linewidth = 1) +
scale_color_manual(values = c("Trained data" = "black", "Untrained data" = "grey40")) +
scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
theme(line = element_blank(),
axis.text = element_text(color = "black"),
axis.text.x = element_blank())
Statistics
plot.df %>%
filter(sex == "Female") %>%
rstatix::kruskal_test(s1 ~ visit)
## # A tibble: 1 × 6
## .y. n statistic df p method
## * <chr> <int> <dbl> <int> <dbl> <chr>
## 1 s1 63 20.0 2 0.0000445 Kruskal-Wallis
plot.df %>%
filter(sex == "Female") %>%
rstatix::wilcox_test(s1 ~ visit)
## # A tibble: 3 × 9
## .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 s1 vis1 vis2 21 21 196 0.55 0.55 ns
## 2 s1 vis1 vis3 21 21 367 0.00012 0.00024 ***
## 3 s1 vis2 vis3 21 21 379 0.0000251 0.0000753 ****
plot.df %>%
filter(sex == "Male") %>%
rstatix::kruskal_test(s1 ~ visit)
## # A tibble: 1 × 6
## .y. n statistic df p method
## * <chr> <int> <dbl> <int> <dbl> <chr>
## 1 s1 30 17.5 2 0.00016 Kruskal-Wallis
plot.df %>%
filter(sex == "Male") %>%
rstatix::wilcox_test(s1 ~ visit)
## # A tibble: 3 × 9
## .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 s1 vis1 vis2 10 10 54 0.796 0.796 ns
## 2 s1 vis1 vis3 10 10 96 0.00013 0.00026 ***
## 3 s1 vis2 vis3 10 10 99 0.0000217 0.0000651 ****
Preparations
cm.merged <- con$getJointCountMatrix(raw = TRUE) %>%
Matrix::t() %>%
.[, colnames(.) %in% names(anno.myeloid)]
# Calculate DEGs using Cacoa
con.vis13 <- con$samples %>%
.[!grepl("vis2", names(.))] %>%
Conos$new()
sample.groups <- con.vis13$samples %>%
names() %>%
`names<-`(ifelse(grepl("vis1", .), "visit1", "visit3"), .)
cao <- Cacoa$new(con.vis13,
sample.groups,
anno.myeloid,
ref.level = "visit1",
target.level = "visit3",
n.cores = 10)
cao$estimateDEPerCellType(min.cell.frac = 0.1,
min.cell.count = 5)
## Preparing matrices for DE
## Estimating DE per cell type
## DEs not calculated for 2 cell group(s): cDC1, mDC
de <- cao$test.results$de %>%
lapply('[[', "res")
# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[colnames(cm.merged)]
anno.subtype <- anno.myeloid %>%
.[!is.na(.)]
idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())
anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>%
{`names<-`(as.character(.), names(.))}
anno.final <- paste0(anno.donor,"!!",anno.subtype) %>%
`names<-`(anno.donor %>% names()) %>%
.[grepl("ATM|Early LAM|LAM|mono/mac", .)]
# Create pseudo CM
cm.pseudo <- sccore::collapseCellsByType(cm.merged %>% Matrix::t(),
groups = anno.final, min.cell.count = 1) %>%
t() %>%
apply(2, as, "integer")
cm.pseudo %<>%
DESeq2::DESeqDataSetFromMatrix(.,
colnames(.) %>%
strsplit("_|!!") %>%
sget(3) %>%
data.frame() %>%
`dimnames<-`(list(colnames(cm.pseudo), "group")),
design = ~ group) %>%
DESeq2::estimateSizeFactors() %>%
DESeq2::counts(normalized = T)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# Initially, we used this approach to select genes. Since the clusters are not exactly reproducible, here we load the gene lists
# genes <- de[c("ATM","Early LAM","LAM","mono/mac")] %>%
# {`names<-`(lapply(names(.), \(nn) mutate(.[[nn]], ct = nn)), names(.))} %>%
# lapply(filter, padj <= 0.05, abs(log2FoldChange) > 1) %>%
# lapply(pull, Gene) %>%
# {sapply(names(.), \(nn) .[[nn]] %>% `names<-`(., rep(nn, length(.))))} %>%
# Reduce(c, .) %>%
# .[match(unique(.), .)]
genes <- read.delim("Macrophages_DEG_heatmap_genes.tsv") %$%
setNames(gene, cluster) %>%
.[order(names(.))]
idx <- cm.pseudo %>%
colnames() %>%
data.frame(id = .) %>%
mutate(vis = strsplit(id, "_|!!") %>% sget(3),
ct = strsplit(id, "!!") %>% sget(2)) %>%
mutate(ord = order(vis, ct))
x <- cm.pseudo %>%
.[match(genes, rownames(.)), match(colnames(na.omit(.)), colnames(.))] %>%
.[, idx$ord] %>%
Matrix::t() %>%
scale() %>%
Matrix::t()
# Ordering
spc <- con$getDatasetPerCell()
sepc <- meta$sex[match(spc, meta$sample)] %>%
setNames(names(spc))
sex.df <- sepc %>%
{data.frame(nn = names(.), sex = unname(.))} %>%
mutate(nn = strsplit(nn, "!!") %>% sget(1)) %>%
.[match(unique(.$nn), .$nn), ] %>%
filter(!grepl("pool", nn))
ord <- data.frame(Visit = colnames(x) %>%
setNames(colnames(x)) %>%
strsplit("_|!!|vis") %>%
sget(4)) %>%
mutate(.,
donor = rownames(.) %>% strsplit("_vis") %>% sget(1)) %>%
mutate(.,
Sex = sex.df$sex[match(.$donor, sex.df$nn)],
ct = rownames(.) %>% strsplit("!!") %>% sget(2)) %>%
arrange(ct, Visit, Sex) %>%
rownames()
x <- x[, ord]
groups <- colnames(x) %>%
`names<-`(colnames(x))
# Limit scale
x[x > 2] <- 2
x[x < -1.5] <- -1.5
Please note, the order of the clusters is not always reproducible.
# Create top annotation
tannot <- data.frame(Visit = groups[colnames(x)] %>%
strsplit("_|!!|vis") %>%
sget(4)) %>%
mutate(., donor = rownames(.) %>% strsplit("_vis") %>% sget(1)) %>%
mutate(., Sex = sex.df$sex[match(.$donor, sex.df$nn)]) %>%
select(-donor) %>%
{HeatmapAnnotation(df=.,
border=T,
col=list(Visit = ggsci::pal_jama()(7)[c(1:3)] %>% `names<-`(c(1:3)),
Sex = c("darkred","blue3") %>% setNames(c("Female","Male"))),
show_legend=T)}
# Create color palette
pal <- colorRampPalette(c('navy','grey95','firebrick'))(1024)
labeled.gene.subset <- NULL
# Plot
set.seed(1337)
ha <- ComplexHeatmap::Heatmap(x,
name='Expression',
col=pal,
cluster_columns=FALSE,
show_row_names=F,
show_column_names=FALSE,
top_annotation=tannot,
left_annotation=NULL,
border=TRUE,
show_column_dend = FALSE,
show_row_dend = FALSE,
cluster_rows = F,
# row_km = 5, # Only used initially
# row_km_repeats = 500, # Only used initially
row_split = names(genes),
column_split = groups[colnames(x)] %>% strsplit("!!") %>% sapply('[[', 2) %>% unname() %>% factor(levels = c("ATM", "mono/mac", "Early LAM", "LAM")))
ht = draw(ha); row_order(ht) -> cls
Calculate GO enrichment
go <- cls %>%
lapply(\(y) clusterProfiler::enrichGO(rownames(x)[y],
OrgDb = "org.Hs.eg.db",
keyType = "SYMBOL",
ont = "BP",
universe = rownames(cm.merged))) %>%
setNames(names(cls))
##
##
Save order for later
cls %>%
lapply(\(y) rownames(x)[y]) %>%
lapply(data.frame) %>%
lapply(setNames, "gene") %>%
bind_rows(.id = "cluster") %>%
select(gene, cluster) %>%
write.table("Macrophages_DEG_heatmap_genes.tsv", sep = "\t")
Preparations
cts <- c("Early LAM", "LAM", "mono/mac", "ATM")
cm.merged <- con$getJointCountMatrix(raw = TRUE)
# Get markers
markers <- con$getDifferentialGenes(groups = anno.myeloid %>% .[. %in% cts],
z.threshold = 1,
upregulated.only = TRUE,
append.specificity.metrics = TRUE,
append.auc = TRUE)
## Estimating marker genes per sample
## Aggregating marker genes
## Estimating specificity metrics
## All done!
genes <- markers$LAM %>%
filter(AUC > 0.8) %>%
pull(Gene)
# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[rownames(cm.merged)]
anno.subtype <- anno.myeloid[anno.myeloid %in% cts] %>%
.[!is.na(.)] %>%
factor()
idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())
anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>%
{`names<-`(as.character(.), names(.))}
anno.final <- paste0(anno.donor,"!!",anno.subtype) %>%
`names<-`(anno.donor %>% names())
cm.pseudo.tmp <- sccore::collapseCellsByType(cm.merged,
groups = anno.final, min.cell.count = 1) %>%
t() %>%
apply(2, as, "integer")
cm.pseudo <- cm.pseudo.tmp %>%
DESeq2::DESeqDataSetFromMatrix(.,
colnames(.) %>%
strsplit("_|!!") %>%
sget(3) %>%
data.frame() %>%
`dimnames<-`(list(colnames(cm.pseudo.tmp), "group")),
design = ~ group) %>%
DESeq2::estimateSizeFactors() %>%
DESeq2::counts(normalized = TRUE) %>%
t()
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
plot.dat <- cm.pseudo %>%
.[, colnames(.) %in% c("LPL")] %>%
as.data.frame() %>%
tibble::rownames_to_column(var = "sample") %>%
mutate(visit = strsplit(sample, "!!|_") %>%
sget(3) %>%
factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
anno = strsplit(sample, "!!") %>%
sget(2) %>%
factor(levels = c("ATM", "mono/mac", "Early LAM", "LAM")),
donor = strsplit(sample, "!!") %>%
sget(1)) %>%
reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
mutate(sex = meta$sex[match(donor, meta$sample)]) %>%
mutate(sex_visit = paste(sex, visit, sep = " "))
# Kruskal-Wallis
sex.stat <- plot.dat %$%
split(., sex) %>%
lapply(\(x) split(x, x$anno)) %>%
lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>%
lapply(sapply, gtools::stars.pval) %>%
{lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>%
`names<-`(c("Female","Male")) %>%
lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>%
{lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>%
bind_rows() %>%
mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>%
arrange(anno) %>%
mutate(sig = gsub(".", "", sig, fixed = T))
# Wilcoxon
stat.test <- plot.dat %>%
dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
group_by(anno, sex) %>%
wilcox_test(value~sex_visit, paired = F) %>%
mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>%
filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)
plot.dat %>%
ggplot(aes(anno, value)) +
geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
color = "black", size = 0.5, alpha = 0.2) +
theme_bw() +
labs(x = "", y = "Normalized pseudobulk expression", title = "LPL", fill = "") +
theme(line = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
axis.text = element_text(color = "black")) +
scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 500, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) # Wilcoxon
anno <- anno.myeloid %>%
.[. %in% cts] %>%
factor()
cm.clean <- cm.merged %>%
.[rownames(.) %in% names(anno), ]
spc <- con$getDatasetPerCell()
lam.scores <- list(LAM.activity = genes)
seu_obj <- CreateSeuratObject(counts = cm.clean %>% Matrix::t() %>% drop0(),
min.cells = 0,
min.features = 0)
m.scores <- UCell::AddModuleScore_UCell(seu_obj, features = lam.scores, ncores = 32)
plot.df <- reshape2::melt(m.scores$LAM.activity_UCell) %>%
mutate(anno = anno[rownames(.)] %>% factor(levels = c("ATM", "mono/mac", "Early LAM", "LAM")),
visit = spc[rownames(.)] %>% as.character() %>% strsplit("_") %>% sget(3),
donor = spc[rownames(.)] %>% as.character() %>% strsplit("_") %>% sapply(\(x) paste(x[1], x[2], sep = "_")),
donor_vis = spc[rownames(.)]) %>%
mutate(sex = meta$sex[match(donor_vis, meta$sample)],
donor_anno = paste(donor, anno, sep = "-")) %>%
group_by(sex, visit, donor_anno, donor, anno) %>%
summarize(value = mean(value)) %>%
ungroup() %>%
filter(visit != "vis2") %$%
split(., visit) %>%
{lapply(., filter, donor_anno %in% .[[2]]$donor_anno)} %>%
{rbind(data.frame(.[[2]], diff = .[[2]]$value - .[[1]]$value))}
## `summarise()` has grouped output by 'sex', 'visit', 'donor_anno', 'donor'. You
## can override using the `.groups` argument.
plot.df %>%
ggplot(aes(anno, diff, fill = sex)) +
geom_boxplot() +
geom_point(aes(col = sex), alpha = 0.3, position = position_dodge(width = 0.8), col = "black") +
geom_hline(yintercept = 0, color = "black") +
theme_bw() +
labs(x = "", y = "Mean module score difference (V3 - V1)", title = "LAM signature post-surgery weight loss", fill = "", col = "") +
theme(line = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
axis.text = element_text(color = "black")) +
scale_fill_manual(values = c("darkred", "blue3"))
con <- qread("con_aspc.qs", nthreads = 10)
anno.aspc <- qread("anno_aspc.qs")
con$plotGraph(groups = anno.aspc,
plot.na = FALSE,
size = 0.2,
alpha = 1,
embedding = "largeVis",
show.labels = TRUE,
font.size = 5) +
labs(x = "largeVis1", y = "largeVis2") +
theme(line = element_blank()) +
scale_colour_manual(values = RColorBrewer::brewer.pal(5, "Greens")[-1][c(2,1,3,4)])
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
cm.merged <- con$getJointCountMatrix()
c("DPP4", "SEMA3C", "FBN1", # DPP4
"CXCL14", "CFD", "C3", # CXCL14
"PPARG", "ACACB", "SEMA3A", # PPARG
"EPHA3", "MEOX2", "IGFBP7" # EPHA3
) %>%
sccore::dotPlot(.,
cm.merged,
anno.aspc %>% factor(levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")),
gene.order = .,
cols = c("white","green4"))
Here, we’re plotting proportions of total cells, so we use the Conos object and the annotation for all cells.
# Use Cacoa to calculate proportions
cao <- Cacoa$new(data.object = con.major,
cell.groups = anno.major,
ref.level = "vis2",
target.level = "vis3",
sample.groups = con.major$samples %>%
names() %>%
strsplit("_") %>%
sapply('[[', 3) %>%
gsub(1, 2, .) %>%
`names<-`(con.major$samples %>%
names()),
sample.groups.palette = ggsci::pal_jama()(7))
cao$sample.groups <- con.major$samples %>%
names() %>%
strsplit("_") %>%
sapply('[[', 3) %>%
`names<-`(con.major$samples %>%
names()) %>%
gsub("vis", "Visit ", .)
anno.comb <- anno.major[!names(anno.major) %in% names(anno.aspc)] %>%
{factor(c(., anno.aspc))}
p <- cao$plotCellGroupSizes(cell.groups = anno.comb,
show.significance = FALSE,
filter.empty.cell.types = FALSE)
plot.dat <- p$data %>%
filter(variable %in% levels(anno.aspc)) %>%
mutate(sex = rep(meta$sex, anno.aspc %>% levels() %>% length()),
variable = factor(variable)) %>%
mutate(sex_visit = paste(sex, group, sep = " ") %>% gsub("Vis", "vis", .),
variable = factor(variable, levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")))
# Kruskal-Wallis
sex.stat <- plot.dat %$%
split(., sex) %>%
lapply(\(x) split(x, x$variable)) %>%
lapply(lapply, \(x) kruskal.test(x$value, g = x$group)$p.value) %>%
lapply(sapply, gtools::stars.pval) %>%
{lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>%
`names<-`(c("Female","Male")) %>%
lapply(\(x) data.frame(ct = names(x), sig = unname(x))) %>%
{lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>%
bind_rows() %>%
mutate(ct = factor(ct, levels = unique(plot.dat$variable))) %>%
arrange(ct) %>%
mutate(sig = gsub(".", "", sig, fixed = T))
# Wilcoxon
stat.test <- plot.dat %>%
dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
group_by(var, sex) %>%
wilcox_test(value~sex_visit, paired = T)
stat.test[1, "p.adj.signif"] <- "."
stat.test[15, "p.adj.signif"] <- "."
stat.test %<>%
filter(p.adj.signif != "ns", !is.na(p), !var %in% c("ASPC_CXCL14")) %>% # Remove those not significant for KW
add_xy_position(x = "var", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05) %>% # Adjust line position
arrange(desc(y.position))
# Plot
ggplot(plot.dat, aes(variable, value)) +
geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
color = "black", size = 0.5, alpha = 0.2) +
theme_bw() +
labs(x = "", y = "% cells per sample", fill = "") +
theme(line = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0, hjust = 1)) +
scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
geom_text(inherit.aes = F, data = sex.stat, aes(ct, y = 20, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 2, hide.ns = F) # Wilcoxon
Preparations
# Use Cacoa to calculate proportions
cao <- Cacoa$new(data.object = con.major,
cell.groups = anno.major,
ref.level = "vis2",
target.level = "vis3",
sample.groups = con.major$samples %>%
names() %>%
strsplit("_") %>%
sapply('[[', 3) %>%
gsub(1, 2, .) %>%
`names<-`(con.major$samples %>%
names()),
sample.groups.palette = ggsci::pal_jama()(7))
cao$sample.groups <- con.major$samples %>%
names() %>%
strsplit("_") %>%
sapply('[[', 3) %>%
`names<-`(con.major$samples %>%
names()) %>%
gsub("vis", "Visit ", .)
anno.comb <- anno.major[!names(anno.major) %in% names(anno.aspc)] %>%
{factor(c(., anno.aspc))}
Here, we’re showing the preparations to create models for prediction. This takes some time and is not run again.
# Get markers
markers.aspc.all <- con.major$getDifferentialGenes(groups = anno.comb, z.threshold = 1, upregulated.only = T, verbose = T)
# Get proportions
proportions <- cao$.__enclos_env__$private$extractCodaData(cell.groups = anno.comb, ret.groups = F) %>%
{100 * ./rowSums(.)} %>%
as.data.frame() %$%
setNames(`Early LAM`, rownames(.))
# Extract markers. Please note, the marker metrics may change a little in each iteration
markers.all <- markers.aspc.all$ASPC_DPP4 %>%
filter(PAdj < 0.05,
Specificity > 0.7) %>%
arrange(desc(AUC)) %>%
pull(Gene)
# Subset bulk data
cm.bulk.mat <- cm.bulk %>%
as.data.frame() %>%
filter(rownames(.) %in% markers.all) %>%
as.matrix() %>%
t()
# Now, train a final model on all data for visualization and gene selection
cm.test <- cm.bulk.mat %>%
.[rownames(.) %in% names(proportions), ]
lasso_cv <- cv.glmnet(cm.test, proportions, alpha = 1, nfolds = 5)
best_lambda <- lasso_cv$lambda.min
lasso_model <- glmnet(cm.test, proportions, alpha = 1, lambda = best_lambda)
# Predict on all samples using the final model
lasso.predict.all <- predict(lasso_model, newx = cm.test, s = best_lambda)
# Save model
list(lasso_cv = lasso_cv,
lasso_model = lasso_model) %>%
qsave("DPP4_model.qs")
# Load and prepare
tmp <- qread("DPP4_model.qs")
lasso_model <- tmp$lasso_model
best_lambda <- tmp$lasso_cv$lambda.min
cm.bulk.mat <- cm.bulk %>%
as.data.frame() %>%
filter(rownames(.) %in% rownames(coef(lasso_model))) %>%
as.matrix() %>%
t()
proportions <- cao$.__enclos_env__$private$extractCodaData(cell.groups = anno.comb, ret.groups = F) %>%
{100 * ./rowSums(.)} %>%
as.data.frame() %$%
setNames(ASPC_DPP4, rownames(.))
## Predict proportions
proportions.bulk <- predict(lasso_model, newx = cm.bulk.mat, s = best_lambda)
## Prepare plot
plot.df <- proportions.bulk %>%
as.data.frame() %>%
mutate(donor = rownames(.)) %>%
mutate(visit = strsplit(donor, "_") %>% sget(3),
sex = metadata.all$sex[match(donor, metadata.all$donor)]) %>%
mutate(sex_visit = paste(sex, visit, sep = "_"))
trend.trained <- proportions.bulk %>%
as.data.frame() %>%
mutate(donor = rownames(.)) %>%
filter(donor %in% names(proportions)) %>%
mutate(visit = strsplit(donor, "_") %>% sget(3),
sex = metadata.all$sex[match(donor, metadata.all$donor)]) %>%
group_by(visit, sex) %>%
summarize(s1 = median(s1)) %>%
mutate(sex_visit = paste(sex, visit, sep = "_"))
## `summarise()` has grouped output by 'visit'. You can override using the
## `.groups` argument.
trend.untrained <- proportions.bulk %>%
as.data.frame() %>%
mutate(donor = rownames(.)) %>%
filter(!donor %in% names(proportions)) %>%
mutate(visit = strsplit(donor, "_") %>% sget(3),
sex = metadata.all$sex[match(donor, metadata.all$donor)]) %>%
group_by(visit, sex) %>%
summarize(s1 = median(s1)) %>%
mutate(sex_visit = paste(sex, visit, sep = "_"))
## `summarise()` has grouped output by 'visit'. You can override using the
## `.groups` argument.
plot.df %>%
ggplot() +
geom_boxplot(mapping = aes(sex_visit, s1, fill = sex_visit), outliers = F) +
geom_jitter(mapping = aes(sex_visit, s1, col = sex_visit), alpha = 0.3, col = "black", width = 0.3) +
theme_bw() +
labs(y = "Predicted proportions (%)", title = "Predicted ASPC_DPP4 proportions", x = "", col = "Median trends", fill = "") +
geom_line(mapping = aes(x = sex_visit, y = s1, group = sex, col = "Trained data"), data = trend.trained, linewidth = 1) +
geom_line(mapping = aes(x = sex_visit, y = s1, group = sex, col = "Untrained data"), data = trend.untrained, linewidth = 1) +
scale_color_manual(values = c("Trained data" = "black", "Untrained data" = "grey40")) +
scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
theme(line = element_blank(),
axis.text = element_text(color = "black"),
axis.text.x = element_blank())
Statistics
plot.df %>%
filter(sex == "Female") %>%
rstatix::kruskal_test(s1 ~ visit)
## # A tibble: 1 × 6
## .y. n statistic df p method
## * <chr> <int> <dbl> <int> <dbl> <chr>
## 1 s1 63 16.6 2 0.000245 Kruskal-Wallis
plot.df %>%
filter(sex == "Female") %>%
rstatix::wilcox_test(s1 ~ visit)
## # A tibble: 3 × 9
## .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 s1 vis1 vis2 21 21 70 0.000073 0.000219 ***
## 2 s1 vis1 vis3 21 21 106 0.003 0.007 **
## 3 s1 vis2 vis3 21 21 280 0.139 0.139 ns
plot.df %>%
filter(sex == "Male") %>%
rstatix::kruskal_test(s1 ~ visit)
## # A tibble: 1 × 6
## .y. n statistic df p method
## * <chr> <int> <dbl> <int> <dbl> <chr>
## 1 s1 30 8.29 2 0.0159 Kruskal-Wallis
plot.df %>%
filter(sex == "Male") %>%
rstatix::wilcox_test(s1 ~ visit)
## # A tibble: 3 × 9
## .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 s1 vis1 vis2 10 10 14 0.005 0.016 *
## 2 s1 vis1 vis3 10 10 35 0.28 0.28 ns
## 3 s1 vis2 vis3 10 10 75 0.063 0.126 ns
Preparations
cm.merged <- con$getJointCountMatrix(raw = TRUE) %>%
Matrix::t() %>%
.[, colnames(.) %in% names(anno.aspc)]
# Calculate DEGs using Cacoa
con.vis13 <- con$samples %>%
.[!grepl("vis2", names(.))] %>%
Conos$new()
sample.groups <- con.vis13$samples %>%
names() %>%
`names<-`(ifelse(grepl("vis1", .), "visit1", "visit3"), .)
cao <- Cacoa$new(con.vis13,
sample.groups,
anno.aspc,
ref.level = "visit1",
target.level = "visit3",
n.cores = 10)
cao$estimateDEPerCellType()
## Preparing matrices for DE
## Estimating DE per cell type
de <- cao$test.results$de %>%
lapply('[[', "res")
# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[colnames(cm.merged)]
anno.subtype <- anno.aspc %>%
.[!is.na(.)]
idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())
anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>%
{`names<-`(as.character(.), names(.))}
anno.final <- paste0(anno.donor,"!!",anno.subtype) %>%
`names<-`(anno.donor %>% names())
# Create pseudo CM
cm.pseudo <- sccore::collapseCellsByType(cm.merged %>% Matrix::t(),
groups = anno.final, min.cell.count = 1) %>%
t() %>%
apply(2, as, "integer")
cm.pseudo %<>%
DESeq2::DESeqDataSetFromMatrix(.,
colnames(.) %>%
strsplit("_|!!") %>%
sget(3) %>%
data.frame() %>%
`dimnames<-`(list(colnames(cm.pseudo), "group")),
design = ~ group) %>%
DESeq2::estimateSizeFactors() %>%
DESeq2::counts(normalized = T)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# See comment for Figure 3f
# genes <- de %>%
# {`names<-`(lapply(names(.), \(nn) mutate(.[[nn]], ct = nn)), names(.))} %>%
# lapply(filter, padj <= 0.05, abs(log2FoldChange) > 1) %>%
# lapply(pull, Gene) %>%
# {sapply(names(.), \(nn) .[[nn]] %>% `names<-`(., rep(nn, length(.))))} %>%
# Reduce(c, .) %>%
# .[match(unique(.), .)]
genes <- read.delim("ASPC_DEG_heatmap_genes.tsv") %$%
setNames(gene, cluster) %>%
.[order(names(.))]
idx <- cm.pseudo %>%
colnames() %>%
data.frame(id = .) %>%
mutate(vis = strsplit(id, "_|!!") %>% sget(3),
ct = strsplit(id, "!!") %>% sget(2)) %>%
mutate(ord = order(vis, ct))
x <- cm.pseudo %>%
.[match(genes, rownames(.)), match(colnames(na.omit(.)), colnames(.))] %>%
.[, idx$ord] %>%
Matrix::t() %>%
scale() %>%
Matrix::t()
# Ordering
spc <- con$getDatasetPerCell()
sepc <- meta$sex[match(spc, meta$sample)] %>%
setNames(names(spc))
sex.df <- sepc %>%
{data.frame(nn = names(.), sex = unname(.))} %>%
mutate(nn = strsplit(nn, "!!") %>% sget(1)) %>%
.[match(unique(.$nn), .$nn), ] %>%
filter(!grepl("pool", nn))
ord <- data.frame(Visit = colnames(x) %>%
setNames(colnames(x)) %>%
strsplit("_|!!|vis") %>%
sget(4)) %>%
mutate(.,
donor = rownames(.) %>% strsplit("_vis") %>% sget(1)) %>%
mutate(.,
Sex = sex.df$sex[match(.$donor, sex.df$nn)],
ct = rownames(.) %>% strsplit("!!") %>% sget(2)) %>%
arrange(ct, Visit, Sex) %>%
rownames()
x <- x[, ord]
groups <- colnames(x) %>%
`names<-`(colnames(x))
# Limit scale
x[x > 2] <- 2
x[x < -1] <- -1
Please note, the order of the clusters is not always reproducible.
# Create top annotation
tannot <- data.frame(Visit = groups[colnames(x)] %>%
strsplit("_|!!|vis") %>%
sget(4)) %>%
mutate(., donor = rownames(.) %>% strsplit("_vis") %>% sget(1)) %>%
mutate(., Sex = sex.df$sex[match(.$donor, sex.df$nn)]) %>%
select(-donor) %>%
{HeatmapAnnotation(df=.,
border=TRUE,
col=list(Visit = ggsci::pal_jama()(7)[c(1:3)] %>% `names<-`(c(1:3)),
Sex = c("darkred","blue3") %>% setNames(c("Female","Male"))),
show_legend=TRUE)}
# Create color palette
pal <- colorRampPalette(c('navy','grey95','firebrick'))(1024)
labeled.gene.subset <- NULL
# Plot
set.seed(1337)
ha <- ComplexHeatmap::Heatmap(x,
name='Expression',
col=pal,
cluster_columns=FALSE,
show_row_names=FALSE,
show_column_names=FALSE,
top_annotation=tannot,
left_annotation=NULL,
border=TRUE,
show_column_dend = FALSE,
show_row_dend = FALSE,
# row_km = 3, # Only used initially
# row_km_repeats = 100, # Only used initially
column_split = groups[colnames(x)] %>% strsplit("!!") %>% sapply('[[', 2) %>% unname() %>% factor(levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")),
row_split = names(genes))
ht = draw(ha); row_order(ht) -> cls
Calculate GO enrichment
go <- cls %>%
lapply(\(y) clusterProfiler::enrichGO(rownames(x)[y],
OrgDb = "org.Hs.eg.db",
keyType = "SYMBOL",
ont = "BP",
universe = rownames(cm.merged))) %>%
setNames(names(cls))
Save order for later
cls %>%
lapply(\(y) rownames(x)[y]) %>%
lapply(data.frame) %>%
lapply(setNames, "gene") %>%
bind_rows(.id = "cluster") %>%
select(gene, cluster) %>%
write.table("ASPC_DEG_heatmap_genes.tsv", sep = "\t")
Please note, cluster number may be different.
genes.go <- clusterProfiler::bitr("GO:0097193",
fromType="GOALL",
toType="SYMBOL",
OrgDb='org.Hs.eg.db')$SYMBOL %>%
unique()
## 'select()' returned 1:many mapping between keys and columns
genes.cls <- x %>%
rownames() %>%
.[cls[[1]]] # Change if needed
genes <- intersect(genes.go, genes.cls)
cm.merged <- con$getJointCountMatrix(raw = T)
# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[rownames(cm.merged)]
anno.subtype <- anno.aspc %>%
.[!is.na(.)] %>%
factor()
idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())
anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>%
{`names<-`(as.character(.), names(.))}
anno.final <- paste0(anno.donor,"!!",anno.subtype) %>%
`names<-`(anno.donor %>% names())
cm.pseudo.tmp <- sccore::collapseCellsByType(cm.merged,
groups = anno.final, min.cell.count = 1) %>%
t() %>%
apply(2, as, "integer")
cm.pseudo <- cm.pseudo.tmp %>%
DESeq2::DESeqDataSetFromMatrix(.,
colnames(.) %>%
strsplit("_|!!") %>%
sget(3) %>%
data.frame() %>%
`dimnames<-`(list(colnames(cm.pseudo.tmp), "group")),
design = ~ group) %>%
DESeq2::estimateSizeFactors() %>%
DESeq2::counts(normalized = T) %>%
t()
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
plot.dat <- cm.pseudo %>%
.[, colnames(.) %in% genes] %>%
as.data.frame() %>%
tibble::rownames_to_column(var = "sample") %>%
mutate(visit = strsplit(sample, "!!|_") %>%
sget(3) %>%
factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
anno = strsplit(sample, "!!") %>%
sget(2) %>%
factor(levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")),
donor = strsplit(sample, "!!") %>%
sget(1)) %>%
reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
mutate(sex = meta$sex[match(donor, meta$sample)]) %>%
mutate(sex_visit = paste(sex, visit, sep = " ")) %>%
group_by(visit, sex, anno, sex_visit, sample) %>%
summarize(value = mean(value))
## `summarise()` has grouped output by 'visit', 'sex', 'anno', 'sex_visit'. You
## can override using the `.groups` argument.
# Kruskal-Wallis
sex.stat <- plot.dat %$%
split(., sex) %>%
lapply(\(x) split(x, x$anno)) %>%
lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>%
lapply(sapply, gtools::stars.pval) %>%
{lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>%
`names<-`(c("Female","Male")) %>%
lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>%
{lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>%
bind_rows() %>%
mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>%
arrange(anno) %>%
mutate(sig = gsub(".", "", sig, fixed = T))
# Wilcoxon
stat.test <- plot.dat %>%
group_by(anno, sex) %>%
wilcox_test(value~sex_visit, paired = T) %>%
mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>%
filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)
plot.dat %>%
ggplot(aes(anno, value)) +
geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
color = "black", size = 0.5, alpha = 0.2) +
theme_bw() +
labs(x = "", y = "Mean normalized pseudobulk expression", title = "Apoptosis ontology gene expression") +
theme(line = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
axis.text = element_text(color = "black")) +
scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 250, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) + # Wilcoxon
guides(fill = "none")
See Scenic.ipynb
.
See Scenic.ipynb
for mat
object
calculation.
mat <- fastMatMR::fmm_to_mat("aspc_adi_auc_mat.mtx") %>%
`dimnames<-`(list(
read.table("aspc_adi_auc_mat.rownames", header = T)[, 1],
read.table("aspc_adi_auc_mat.colnames", header = T)[, 1]
))
spc <- con.major$getDatasetPerCell()
vpc <- spc %>%
as.character() %>%
strsplit("_|!!") %>%
sget(3) %>%
setNames(names(spc))
anno.comb <- factor(anno.aspc)
plot.dat <- c("JUN(+)", "ZEB1(+)") %>% # ZEB1 is just a random pick to make it easier to manipulate data, omitted later
{mat[, colnames(mat) %in% .]} %>%
as.data.frame() %>%
mutate(.,
visit = unname(vpc)[match(rownames(.),
names(vpc) %>%
strsplit("!!") %>%
sapply(\(x) paste0(x[2],"!!",x[3]))
)
],
anno = unname(anno.comb)[match(rownames(.),
names(anno.comb) %>%
strsplit("!!") %>%
sapply(\(x) paste0(x[2],"!!",x[3]))
)
],
sample = unname(spc)[match(rownames(.),
names(spc) %>%
strsplit("!!") %>%
sapply(\(x) paste0(x[2],"!!",x[3]))
)
]
) %>%
filter(!is.na(anno)) %>%
reshape2::melt(id.vars = c("visit", "anno", "sample")) %>%
mutate(anno_vis = paste0(anno,"_",visit),
anno_var = paste0(anno,"_",variable)) %>%
group_by(visit, anno, variable, sample) %>%
summarize(mm = median(value)) %>%
ungroup() %>%
filter(variable == "JUN(+)") %>%
mutate(anno = factor(anno, levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")),
visit = factor(visit, levels = c("vis1", "vis2", "vis3"), labels = c("Visit 1", "Visit 2", "Visit 3")),
sex = meta$sex[match(.$sample, meta$sample)],
variable = factor(variable)) %>%
mutate(sex_visit = paste(sex, visit, sep = " ") %>% gsub("Visit", "visit", .),
anno = factor(anno, levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")))
## `summarise()` has grouped output by 'visit', 'anno', 'variable'. You can
## override using the `.groups` argument.
# Kruskal-Wallis
sex.stat <- plot.dat %$%
split(., sex) %>%
lapply(\(x) split(x, x$anno)) %>%
lapply(lapply, \(x) kruskal.test(x$mm, g = x$visit)$p.value) %>%
lapply(sapply, gtools::stars.pval) %>%
{lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>%
`names<-`(c("Female","Male")) %>%
lapply(\(x) data.frame(ct = names(x), sig = unname(x))) %>%
{lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>%
bind_rows() %>%
mutate(ct = factor(ct, levels = unique(plot.dat$anno))) %>%
arrange(ct) %>%
mutate(sig = gsub(".", "", sig, fixed = T))
# Wilcoxon
stat.test <- plot.dat %>%
dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
group_by(anno, sex) %>%
wilcox_test(mm~sex_visit, paired = TRUE) %>%
filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05) %>% # Adjust line position
arrange(desc(y.position))
# Plot
ggplot(plot.dat, aes(anno, mm)) +
geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
color = "black", size = 0.5, alpha = 0.2) +
theme_bw() +
labs(x = "", y = "Regulon activity (mean AUC)", fill = "") +
theme(line = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
axis.text = element_text(color = "black")) +
scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
geom_text(inherit.aes = F, data = sex.stat, aes(ct, y = 6, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 2, hide.ns = F) # Wilcoxon
con <- qread("con_adipocytes.qs", nthreads = 10)
anno.adipocytes <- qread("anno_adipocytes_archetypes.qs")
missing.cells <- setdiff(rownames(con$embedding),
names(anno.adipocytes)) %>%
{setNames(rep("", length(.)), .)} %>%
factor()
anno.adipocytes.ext <- factor(c(anno.adipocytes, missing.cells))
con$plotGraph(groups = anno.adipocytes.ext,
show.labels = TRUE,
embedding = "largeVis") +
labs(x = "largeVis1", y = "largeVis2") +
theme(line = element_blank()) +
scale_colour_manual(values = c(RColorBrewer::brewer.pal(4, "Reds")[-1], "gray60"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
cm.merged <- con$getJointCountMatrix()
# c("PLXDC2","CBLB","TCF4",
# "GPAM","ACSL1","LPL","DGAT2",
# "PRSS23","THSD7A","AL139317.5") %>%
c("PRSS23","PDE5A","ADIPOQ", "DGAT2", "GPAM", "LPL", "RORA", "CLSTN2", "CRIM1", "ITGA1") %>%
sccore::dotPlot(.,
cm.merged,
anno.adipocytes,
gene.order = .,
cols = c("white","red3"))
Preparations
cm.merged <- con$getJointCountMatrix(raw = TRUE) %>%
Matrix::t() %>%
.[, colnames(.) %in% names(anno.adipocytes)]
# Calculate DEGs using Cacoa
con.vis13 <- con$samples %>%
.[!grepl("vis2", names(.))] %>%
Conos$new()
sample.groups <- con.vis13$samples %>%
names() %>%
`names<-`(ifelse(grepl("vis1", .), "visit1", "visit3"), .)
cao <- Cacoa$new(con.vis13,
sample.groups,
anno.adipocytes,
ref.level = "visit1",
target.level = "visit3",
n.cores = 10)
cao$estimateDEPerCellType()
## Preparing matrices for DE
## Warning in estimateDEPerCellTypeInner(raw.mats = raw.mats, cell.groups =
## cell.groups, : Excluded 1 sample(s) due to 'min.cell.count'.
## Estimating DE per cell type
de <- cao$test.results$de %>%
lapply('[[', "res")
# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[colnames(cm.merged)]
anno.subtype <- anno.adipocytes %>%
.[!is.na(.)]
idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())
anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>%
{`names<-`(as.character(.), names(.))}
anno.final <- paste0(anno.donor,"!!",anno.subtype) %>%
`names<-`(anno.donor %>% names())
# Create pseudo CM
cm.pseudo <- sccore::collapseCellsByType(cm.merged %>% Matrix::t(),
groups = anno.final, min.cell.count = 1) %>%
t() %>%
apply(2, as, "integer")
cm.pseudo %<>%
DESeq2::DESeqDataSetFromMatrix(.,
colnames(.) %>%
strsplit("_|!!") %>%
sget(3) %>%
data.frame() %>%
`dimnames<-`(list(colnames(cm.pseudo), "group")),
design = ~ group) %>%
DESeq2::estimateSizeFactors() %>%
DESeq2::counts(normalized = T)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# See comment for Figure 3f
# genes <- de %>%
# {`names<-`(lapply(names(.), \(nn) mutate(.[[nn]], ct = nn)), names(.))} %>%
# lapply(filter, padj <= 0.05, abs(log2FoldChange) > 1) %>%
# lapply(pull, Gene) %>%
# {sapply(names(.), \(nn) .[[nn]] %>% `names<-`(., rep(nn, length(.))))} %>%
# Reduce(c, .) %>%
# .[match(unique(.), .)]
genes <- read.delim("Archetypes_DEG_heatmap_genes.tsv") %$%
setNames(gene, cluster) %>%
.[order(names(.))]
idx <- cm.pseudo %>%
colnames() %>%
data.frame(id = .) %>%
mutate(vis = strsplit(id, "_|!!") %>% sget(3),
ct = strsplit(id, "!!") %>% sget(2)) %>%
mutate(ord = order(vis, ct))
x <- cm.pseudo %>%
.[match(genes, rownames(.)), match(colnames(na.omit(.)), colnames(.))] %>%
.[, idx$ord] %>%
Matrix::t() %>%
scale() %>%
Matrix::t()
# Ordering
spc <- con$getDatasetPerCell()
sepc <- meta$sex[match(spc, meta$sample)] %>%
setNames(names(spc))
sex.df <- sepc %>%
{data.frame(nn = names(.), sex = unname(.))} %>%
mutate(nn = strsplit(nn, "!!") %>% sget(1)) %>%
.[match(unique(.$nn), .$nn), ] %>%
filter(!grepl("pool", nn))
ord <- data.frame(Visit = colnames(x) %>%
setNames(colnames(x)) %>%
strsplit("_|!!|vis") %>%
sget(4)) %>%
mutate(.,
donor = rownames(.) %>% strsplit("_vis") %>% sget(1)) %>%
mutate(.,
Sex = sex.df$sex[match(.$donor, sex.df$nn)],
ct = rownames(.) %>% strsplit("!!") %>% sget(2)) %>%
arrange(ct, Visit, Sex) %>%
rownames()
x <- x[, ord]
groups <- colnames(x) %>%
`names<-`(colnames(x))
# Limit scale
x[x > 2] <- 2
x[x < -1] <- -1
Please note, the order of the clusters is not always reproducible, and the clusters them selves may also change slightly.
# Create top annotation
tannot <- data.frame(Visit = groups[colnames(x)] %>%
strsplit("_|!!|vis") %>%
sget(4)) %>%
mutate(., donor = rownames(.) %>% strsplit("_vis") %>% sget(1)) %>%
mutate(., Sex = sex.df$sex[match(.$donor, sex.df$nn)]) %>%
select(-donor) %>%
{HeatmapAnnotation(df=.,
border=TRUE,
col=list(Visit = ggsci::pal_jama()(7)[c(1:3)] %>% `names<-`(c(1:3)),
Sex = c("darkred","blue3") %>% setNames(c("Female","Male"))),
show_legend=TRUE)}
# Create color palette
pal <- colorRampPalette(c('navy','grey95','firebrick'))(1024)
labeled.gene.subset <- NULL
# Plot
set.seed(1337)
ha <- ComplexHeatmap::Heatmap(x,
name='Expression',
col=pal,
cluster_columns=FALSE,
show_row_names=FALSE,
show_column_names=FALSE,
top_annotation=tannot,
left_annotation=NULL,
border=TRUE,
show_column_dend = FALSE,
show_row_dend = FALSE,
# row_km = 4, # Only used initially
# row_km_repeats = 200, # Only used initially
column_split = groups[colnames(x)] %>% strsplit("!!") %>% sapply('[[', 2) %>% unname() %>% factor(levels = c("CLSTN2_archetype", "DGAT2_archetype", "PRSS23_archetype")),
row_split = names(genes))
ht = draw(ha); row_order(ht) -> cls
Calculate GO enrichment
go <- cls %>%
lapply(\(y) clusterProfiler::enrichGO(rownames(x)[y],
OrgDb = "org.Hs.eg.db",
keyType = "SYMBOL",
ont = "BP",
universe = rownames(cm.merged))) %>%
setNames(names(cls))
Save order for later
cls %>%
lapply(\(y) rownames(x)[y]) %>%
lapply(data.frame) %>%
lapply(setNames, "gene") %>%
bind_rows(.id = "cluster") %>%
select(gene, cluster) %>%
write.table("Archetypes_DEG_heatmap_genes.tsv", sep = "\t")
cm.merged <- con$getJointCountMatrix(raw = T)
# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[rownames(cm.merged)]
anno.subtype <- anno.adipocytes %>%
.[!is.na(.)] %>%
factor()
idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())
anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>%
{`names<-`(as.character(.), names(.))}
anno.final <- paste0(anno.donor,"!!",anno.subtype) %>%
`names<-`(anno.donor %>% names())
cm.pseudo.tmp <- sccore::collapseCellsByType(cm.merged,
groups = anno.final, min.cell.count = 1) %>%
t() %>%
apply(2, as, "integer")
cm.pseudo <- cm.pseudo.tmp %>%
DESeq2::DESeqDataSetFromMatrix(.,
colnames(.) %>%
strsplit("_|!!") %>%
sget(3) %>%
data.frame() %>%
`dimnames<-`(list(colnames(cm.pseudo.tmp), "group")),
design = ~ group) %>%
DESeq2::estimateSizeFactors() %>%
DESeq2::counts(normalized = T) %>%
t()
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
gene <- "DECR1"
plot.dat <- cm.pseudo %>%
.[, colnames(.) %in% gene] %>%
as.data.frame() %>%
tibble::rownames_to_column(var = "sample") %>%
mutate(visit = strsplit(sample, "!!|_") %>%
sget(3) %>%
factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
anno = strsplit(sample, "!!") %>%
sget(2) %>%
factor(levels = c("DGAT2_archetype", "CLSTN2_archetype", "PRSS23_archetype")),
donor = strsplit(sample, "!!") %>%
sget(1)) %>%
reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
mutate(sex = meta$sex[match(donor, meta$sample)]) %>%
mutate(sex_visit = paste(sex, visit, sep = " "))
# Kruskal-Wallis
sex.stat <- plot.dat %$%
split(., sex) %>%
lapply(\(x) split(x, x$anno)) %>%
lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>%
lapply(sapply, gtools::stars.pval) %>%
{lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>%
`names<-`(c("Female","Male")) %>%
lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>%
{lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>%
bind_rows() %>%
mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>%
arrange(anno) %>%
mutate(sig = gsub(".", "", sig, fixed = T))
# Wilcoxon
stat.test <- plot.dat %>%
dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
group_by(anno, sex) %>%
wilcox_test(value~sex_visit, paired = F) %>%
mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>%
filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)
plot.dat %>%
ggplot(aes(anno, value)) +
geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
color = "black", size = 0.5, alpha = 0.2) +
theme_bw() +
labs(x = "", y = "Normalized pseudobulk expression", title = gene, fill = "") +
theme(line = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
axis.text = element_text(color = "black")) +
scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 200, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F)# Wilcoxon
gene <- "LIPA"
plot.dat <- cm.pseudo %>%
.[, colnames(.) %in% gene] %>%
as.data.frame() %>%
tibble::rownames_to_column(var = "sample") %>%
mutate(visit = strsplit(sample, "!!|_") %>%
sget(3) %>%
factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
anno = strsplit(sample, "!!") %>%
sget(2) %>%
factor(levels = c("DGAT2_archetype", "CLSTN2_archetype", "PRSS23_archetype")),
donor = strsplit(sample, "!!") %>%
sget(1)) %>%
reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
mutate(sex = meta$sex[match(donor, meta$sample)]) %>%
mutate(sex_visit = paste(sex, visit, sep = " "))
# Kruskal-Wallis
sex.stat <- plot.dat %$%
split(., sex) %>%
lapply(\(x) split(x, x$anno)) %>%
lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>%
lapply(sapply, gtools::stars.pval) %>%
{lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>%
`names<-`(c("Female","Male")) %>%
lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>%
{lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>%
bind_rows() %>%
mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>%
arrange(anno) %>%
mutate(sig = gsub(".", "", sig, fixed = T))
# Wilcoxon
stat.test <- plot.dat %>%
dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
group_by(anno, sex) %>%
wilcox_test(value~sex_visit, paired = F) %>%
mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>%
filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)
plot.dat %>%
ggplot(aes(anno, value)) +
geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
color = "black", size = 0.5, alpha = 0.2) +
theme_bw() +
labs(x = "", y = "Normalized pseudobulk expression", title = gene, fill = "") +
theme(line = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
axis.text = element_text(color = "black")) +
scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 30, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F)# Wilcoxon
gene <- "PLIN5"
plot.dat <- cm.pseudo %>%
.[, colnames(.) %in% gene] %>%
as.data.frame() %>%
tibble::rownames_to_column(var = "sample") %>%
mutate(visit = strsplit(sample, "!!|_") %>%
sget(3) %>%
factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
anno = strsplit(sample, "!!") %>%
sget(2) %>%
factor(levels = c("DGAT2_archetype", "CLSTN2_archetype", "PRSS23_archetype")),
donor = strsplit(sample, "!!") %>%
sget(1)) %>%
reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
mutate(sex = meta$sex[match(donor, meta$sample)]) %>%
mutate(sex_visit = paste(sex, visit, sep = " "))
# Kruskal-Wallis
sex.stat <- plot.dat %$%
split(., sex) %>%
lapply(\(x) split(x, x$anno)) %>%
lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>%
lapply(sapply, gtools::stars.pval) %>%
{lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>%
`names<-`(c("Female","Male")) %>%
lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>%
{lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>%
bind_rows() %>%
mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>%
arrange(anno) %>%
mutate(sig = gsub(".", "", sig, fixed = T))
# Wilcoxon
stat.test <- plot.dat %>%
dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
group_by(anno, sex) %>%
wilcox_test(value~sex_visit, paired = F) %>%
mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>%
filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)
plot.dat %>%
ggplot(aes(anno, value)) +
geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
color = "black", size = 0.5, alpha = 0.2) +
theme_bw() +
labs(x = "", y = "Normalized pseudobulk expression", title = gene, fill = "") +
theme(line = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
axis.text = element_text(color = "black")) +
scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 200, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F)# Wilcoxon
Calculate diffusion embeddings. Please note, this takes some time. We
saved the embeddings in adipocytes_aspc_embeddings.qs
.
## Load Conos object and annotation
data = qread("con_adipocytes_faps.qs", nthreads = 10)
anno.adipocytes <- qread("anno_major.qs") %>%
.[. == "Adipocytes"] %>%
factor()
anno.aspc <- qread("anno_aspc.qs")
anno <- factor(c(anno.adipocytes, anno.aspc))
## Convert raw counts to Seurat
for (name in names(data$samples)) {
seu <- CreateSeuratObject(CreateAssayObject(counts = t(data$samples[[name]]$misc$rawCounts)))
seu$orig.ident <- substr(name, 0, regexpr("vis", name)-2)
seu$visit <- substr(name, regexpr("vis", name), nchar(name))
if (name == names(data$samples)[1]) {
final <- seu
} else {
final <- merge(final, seu)
}
}
## Merge annotation
final <- subset(final, cells = names(anno))
final <- NormalizeData(final)
anno.final <- anno[ match(colnames(final), names(anno))]
final$fine <- anno.final
final$coarse <- substr(anno.final, 0, regexpr("_", anno.final)-1)
final$dataset <- paste(final$orig.ident, final$visit, sep="_")
final <- subset(final, cells = names(which(!is.na(final$fine))))
full <- final
embeddings <- list()
for (nfeats in c(25,50,100,250)) {
# Recreate original object
final <- full
# ASPC
ASPC <- subset(final, coarse == "ASPC")
VariableFeatures(ASPC) <- NULL
obj.list <- SplitObject(ASPC, split.by = "dataset")
feats_ASPC <- suppressWarnings(SelectIntegrationFeatures(obj.list, nfeatures = nfeats, verbose = FALSE))
# Adipocytes
Ad <- subset(final, coarse == "Adipocytes")
VariableFeatures(Ad) <- NULL
obj.list <- SplitObject(Ad, split.by = "dataset")
feats_Ad <- suppressWarnings(SelectIntegrationFeatures(obj.list, nfeatures = nfeats, verbose = FALSE))
# Combine
feats <- unique(c(feats_ASPC, feats_Ad))
## Embed
# PCA
VariableFeatures(final) <- feats
final <- ScaleData(final)
final <- RunPCA(final)
for (useHarmony in c(TRUE, FALSE)) {
if (useHarmony) {
final <- suppressWarnings(RunHarmony(final, group.by.vars = "dataset", verbose = FALSE))
pcs <- final@reductions$harmony@cell.embeddings
} else {
pcs <- final@reductions$pca@cell.embeddings
}
for (dims in c(5, 10, 15, 20)) {
dm <- destiny::DiffusionMap(pcs[,1:dims], verbose = FALSE, n_pcs = NA)
dm_ev <- dm@eigenvectors
colnames(dm_ev) <- paste("DC", c(1:20), sep="_")
embeddings[[(length(embeddings) + 1)]] <- dm_ev
names(embeddings)[length(embeddings)] <- paste(nfeats, useHarmony, dims, sep="_")
}
}
}
Prepare data. We chose to continue with embedding no. 2. The
sds_obj
object is saved for Figure 6E.
# Load and prepare data
embeddings <- qread("adipocytes_aspc_embeddings.qs")
anno.adipocytes <- qread("anno_major.qs") %>%
.[. == "Adipocytes"] %>%
factor()
anno.aspc <- qread("anno_aspc.qs")
anno <- factor(c(anno.adipocytes, anno.aspc))
# Calculate slingshot trajectory
anno.sort <- anno[rownames(embeddings[[2]])] %>%
as.character() %>%
unname()
sds_obj <- slingshot(embeddings[[2]][, 1:2] %>%
`colnames<-`(c("UMAP1","UMAP2")),
anno.sort,
end.clus = "Adipocytes",
start.clus = "ASPC_DPP4")
qsave(sds_obj, "sds_obj.qs")
sds <- as.SlingshotDataSet(sds_obj)
Final plot
plot.df <- embeddings[[2]][,1:2] %>%
as.data.frame() %>%
`colnames<-`(c("UMAP1","UMAP2")) %>%
mutate(., annotation = anno[rownames(.)] %>% as.factor()) %>%
.[complete.cases(.),]
line.df <- lapply(1:length(sds@curves), \(lineage) {
sds@curves[[lineage]]$s[,1:2] %>%
data.frame() %>%
mutate(lineage = lineage %>% as.factor())
}) %>%
setNames(sds@curves %>% names()) %>%
bind_rows() %>%
mutate(lineage = factor(lineage, labels = c("Lineage 1", "Lineage 2")))
ggplot(plot.df, aes(UMAP1, UMAP2, col = annotation)) +
geom_point(size = 0.3) +
geom_path(data = line.df, aes(UMAP1, UMAP2, group = lineage, col = lineage), inherit.aes = F, linewidth = 1) +
theme_bw() +
theme(legend.position = "right",
line = element_blank()) +
labs(col = "", x = "DC1", y = "DC2") +
scale_colour_manual(values = c("#E41A1C", RColorBrewer::brewer.pal(5, "Greens")[-1][c(2,1,3,4)], "black", "grey40")) +
dotSize(3) +
ylim(c(-0.03, 0.013)) +
xlim(c(-0.006, 0.0052))
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_path()`).
# We need the `sds_obj` object from Figure 6a
sds_obj <- qread("sds_obj.qs")
pseudotime_all <- sds_obj@assays@data@listData$pseudotime %>%
as.data.frame() %>%
filter(!is.na(Lineage1)) %>%
{setNames(pull(., Lineage1), rownames(.))}
plot.df %>%
.[names(pseudotime_all), ] %>%
mutate(pseudotime = pseudotime_all) %>%
ggplot(aes(UMAP1, UMAP2, col = pseudotime)) +
geom_point(size = 0.3) +
geom_path(data = line.df %>% filter(lineage == "Lineage 1"), aes(UMAP1, UMAP2), inherit.aes = F, color = "black", linewidth = 1, alpha = 0.4) +
theme_bw() +
theme(line = element_blank()) +
labs(col = "Pseudotime", x = "DC1", y = "DC2") +
scale_color_continuous(low = "blue3", high = "orange") +
ylim(c(-0.005, 0.013)) +
xlim(c(-0.006, 0.0052))
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_path()`).
# We need the `sds_obj` object from Figure 6a
sds_obj <- qread("sds_obj.qs")
pseudotime_all <- sds_obj@assays@data@listData$pseudotime %>%
as.data.frame() %>%
filter(!is.na(Lineage1)) %>%
{setNames(pull(., Lineage1), rownames(.))}
plot.df %>%
.[names(pseudotime_all), ] %>%
mutate(pseudo = pseudotime_all) %>%
mutate(bin = cut(pseudo, breaks = c(0, 0.005, 0.01, 0.0165, 0.023))) %>%
na.omit() %>%
mutate(bin = as.character(bin) %>%
strsplit(",") %>%
sget(2) %>%
gsub("]", "", ., fixed = TRUE)) %>%
arrange(bin) %>%
mutate(bin = factor(bin, labels = c("Early preadipocytes", "Mid preadipocytes", "Late preadipocytes", "Adipocyte"))) %>%
ggplot(aes(UMAP1, UMAP2, col = bin)) +
geom_point(size = 0.3) +
geom_path(data = line.df %>% filter(lineage == "Lineage 1"), aes(UMAP1, UMAP2), inherit.aes = FALSE, color = "black", linewidth = 1, alpha = 0.4) +
theme_bw() +
theme(line = element_blank()) +
labs(col = "Binned pseudotime", x = "DC1", y = "DC2") +
scale_color_manual(values = colorRampPalette(c("pink","darkred"))(4)) +
ylim(c(-0.005, 0.013)) +
xlim(c(-0.006, 0.0052)) +
dotSize(3)
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_path()`).
See Scenic.ipynb
for mat
object
calculation.
mat <- fastMatMR::fmm_to_mat("aspc_adi_auc_mat.mtx") %>%
`dimnames<-`(list(
read.table("aspc_adi_auc_mat.rownames", header = T)[, 1],
read.table("aspc_adi_auc_mat.colnames", header = T)[, 1]
))
con.major <- qread("con_major.qs", nthreads = 10)
spc <- con.major$getDatasetPerCell()
vpc <- spc %>%
as.character() %>%
strsplit("_|!!") %>%
sget(3) %>%
setNames(names(spc))
anno.adipocytes <- qread("anno_major.qs") %>%
.[. == "Adipocytes"] %>%
factor()
anno.aspc <- qread("anno_aspc.qs")
anno <- factor(c(anno.adipocytes, anno.aspc))
# We need the `sds_obj` object from Figure 6a
sds_obj <- qread("sds_obj.qs")
mat.df <- c("ZEB1", "EBF3", "PPARG") %>%
paste0("(+)") %>%
{mat[, colnames(mat) %in% .]} %>%
as.data.frame() %>%
mutate(.,
visit = unname(vpc)[match(rownames(.),
names(vpc) %>%
strsplit("!!") %>%
sapply(\(x) paste0(x[2],"!!",x[3]))
)
] %>%
factor(labels = c("Visit 1", "Visit 2", "Visit 3")),
anno = unname(anno)[match(rownames(.),
names(anno) %>%
strsplit("!!") %>%
sapply(\(x) paste0(x[2],"!!",x[3]))
)
],
sample = unname(spc)[match(rownames(.),
names(spc) %>%
strsplit("!!") %>%
sapply(\(x) paste0(x[2],"!!",x[3]))
)
]
) %>%
filter(!is.na(anno))
pseudotime_all <- sds_obj@assays@data@listData$pseudotime %>%
as.data.frame() %>%
filter(!is.na(Lineage1)) %>%
mutate(.,
cid = rownames(.) %>%
strsplit("!!") %>%
sapply(\(x) paste0(x[2],"!!",x[3])))
mat.df %<>% .[match(pseudotime_all$cid, rownames(.), nomatch = F), ]
pseudotime_all %<>% .[match(rownames(mat.df), .$cid, nomatch = F), ]
mat.df2 <- mat.df %>%
mutate(pseudotime = pseudotime_all$Lineage1) %>%
mutate(bin = cut(pseudotime, breaks = c(0, 0.005, 0.01, 0.0165, 0.023))) %>%
na.omit() %>%
mutate(bin = as.character(bin) %>%
strsplit(",") %>%
sget(2) %>%
gsub("]", "", ., fixed = T)) %>%
select(-anno, -pseudotime) %>%
reshape2::melt(id.vars = c("visit", "bin", "sample"), variable.name = "regulon") %>%
group_by(visit, regulon, bin, sample) %>%
mutate(value = as.numeric(value)) %>%
summarize(mm = median(value))
## `summarise()` has grouped output by 'visit', 'regulon', 'bin'. You can override
## using the `.groups` argument.
mat.df2 %>%
pull(regulon) %>%
unique() %>%
as.character() %>%
lapply(\(vv) {
plot.dat <- mat.df2 %>%
filter(regulon == vv) %>%
ungroup() %>%
mutate(sex = meta$sex[match(.$sample, meta$sample)]) %>%
mutate(sex_visit = paste(sex, visit, sep = " ") %>% gsub("Visit", "visit", .))
# Kruskal-Wallis
sex.stat <- plot.dat %$%
split(., sex) %>%
lapply(\(x) split(x, x$bin)) %>%
lapply(lapply, \(x) kruskal.test(x$mm, g = x$visit)$p.value) %>%
lapply(sapply, gtools::stars.pval) %>%
{lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>%
`names<-`(c("Female","Male")) %>%
lapply(\(x) data.frame(ct = names(x), sig = unname(x))) %>%
{lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>%
bind_rows() %>%
mutate(ct = factor(ct, levels = unique(plot.dat$bin))) %>%
arrange(ct) %>%
mutate(sig = gsub(".", "", sig, fixed = T))
# Wilcoxon
stat.test <- plot.dat %>%
group_by(bin, sex) %>%
wilcox_test(mm~sex_visit, paired = F)
stat.test %<>%
filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
add_xy_position(x = "bin", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05) %>% # Adjust line position
arrange(desc(y.position))
# Plot
pp <- ggplot(plot.dat, aes(bin, mm)) +
geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
color = "black", size = 0.5, alpha = 0.2) +
theme_bw() +
labs(x = "", y = "Mean AUC", fill = "", title = vv) +
theme(line = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
axis.text = element_text(color = "black")) +
scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
geom_text(inherit.aes = F, data = sex.stat, aes(ct, y = 2.2, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 0.2, hide.ns = F) # Wilcoxon
pp
}) %>%
cowplot::plot_grid(plotlist = ., ncol = 3)
We downsampled to 15k nuclei due to calculation times. We provide
sds_obj_reduced.qs
for slingshot pseudotime estimations for
the downsampled dataset.
Here, we calculate for visit 1 and visit 3.
sds_obj <- qread("sds_obj_reduced.qs")
pseudotime <- sds_obj@assays@data@listData$pseudotime[, 1] %>%
.[!is.na(.)]
con <- qread("con_adipocytes_aspc.qs", nthreads = 10)
mat <- con$getJointCountMatrix() %>%
.[match(names(pseudotime), rownames(.)), colSums(.) > 2e2]
mt <- mat %>%
as.matrix() %>%
as.data.frame() %>%
tibble::rownames_to_column() %>%
{message("Mutating"); mutate(.,
pseudotime = pseudotime[rowname],
visit = con$getDatasetPerCell()[rowname] %>%
as.character() %>%
strsplit("_") %>%
sget(3),
donor = strsplit(rowname, "!!") %>%
sget(1))} %>%
.[complete.cases(.), ] %>%
{message("Melting"); reshape2::melt(., id.vars = c("rowname", "pseudotime", "visit", "donor"))} %$%
{message("Splitting"); split(., variable)}
The calculations take app. 11 h with 14k genes and 10k cells. We’ve
added pre-calculated results as
pseudotime_res_novis2.qs
.
res <- mt %>%
sccore::plapply(\(y) {
x = y %>%
filter(!visit == "vis2")
fit_full <- gamm4(data = x, formula = value ~ s(pseudotime, by = factor(visit)), random = ~(1 | donor), REML = F)
fit_reduced <- gamm4(data = x, formula = value ~ s(pseudotime), random = ~(1 | donor), REML = F)
fit_none <- gamm4(data = x, formula = value ~ 1, random = ~(1 | donor), REML = F)
ann <- anova(fit_full$mer, fit_reduced$mer, fit_none$mer)
if (any(ann$`Pr(>Chisq)` <= 0.05, na.rm = T)) {
residuals <- predict(fit_full$gam, se.fit = T)$fit %>%
unname()
r.sq <- c(summary(fit_full$gam)$r.sq, summary(fit_reduced$gam)$r.sq) %>%
setNames(c("full", "reduced"))
out <- list(annova = ann,
residuals = residuals,
r.sq = r.sq)
return(out)
}
}, n.cores = 5, mc.preschedule = T, mc.cleanup = T, progress = F) # 10 cores fails, 5 is max tested that finishes
Load data. We need the sds_obj
from Figure 6a. Also, for
ease of use we provide slingshot pseudotime W/O visit 2 as
pseudotime_novis2.qs
.
res <- qread("pseudotime_res_novis2.qs", nthreads = 10) %>%
.[!sapply(., is.null)]
sds_obj <- qread("sds_obj.qs")
pseudotime <- qread("pseudotime_novis2.qs")
con <- qread("con_adipocytes_aspc.qs", nthreads = 10)
both <- res %>%
sapply(\(gene) {
ann <- gene$annova
(ann[2, "Pr(>Chisq)"] <= 0.05 & ann[3, "Pr(>Chisq)"] <= 0.05)
})
res.both <- res[both]
p.both <- res.both %>%
sapply(\(gene) gene$annova[3, "Pr(>Chisq)"])
tmp <- res.both %>%
lget("annova") %>%
lapply(dplyr::slice, 2:3) %>%
lapply(pull, AIC) %>%
.[sapply(., \(x) abs(x[2]) > abs(x[1]))] %>%
bind_rows() %>%
t() %>%
as.data.frame() %>%
setNames(c("reduced", "full")) %>%
mutate(diff = abs(full) - abs(reduced)) %>%
mutate(diff.frac = diff / abs(reduced))
res.filter <- res[
tmp %>%
filter(diff.frac >= 0.05) %>%
rownames()
]
cm.merged <- con$getJointCountMatrix() %>%
.[match(names(pseudotime), rownames(.)), colnames(.) %in% names(res.filter)]
visit = con$getDatasetPerCell()[rownames(cm.merged)] %>%
as.character() %>%
strsplit("_") %>%
sget(3)
# Vis3
cm.merged.vis <- cm.merged[visit == "vis3", ]
pseudotime.vis <- pseudotime[rownames(cm.merged.vis)]
weights.vis <- sds_obj@assays@data@listData$weights %>% .[match(names(pseudotime.vis), rownames(.)), 1] + 1E-7
scFit <- cm.merged.vis %>%
Matrix::t() %>%
tradeSeq::fitGAM(pseudotime = pseudotime.vis, cellWeights = weights.vis, verbose = TRUE)
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
Smooth3 <- tradeSeq::predictSmooth(scFit, gene = colnames(cm.merged.vis), tidy = FALSE, n=100)
# Vis1
cm.merged.vis <- cm.merged[visit == "vis1", ]
pseudotime.vis <- pseudotime[rownames(cm.merged.vis)]
weights.vis <- sds_obj@assays@data@listData$weights %>% .[match(names(pseudotime.vis), rownames(.)), 1] + 1E-7
scFit <- cm.merged.vis %>%
Matrix::t() %>%
tradeSeq::fitGAM(pseudotime = pseudotime.vis, cellWeights = weights.vis, verbose = TRUE)
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
Smooth1 <- tradeSeq::predictSmooth(scFit, gene = colnames(cm.merged.vis), tidy = FALSE, n=100)
# Combine smooth
Smooth <- cbind(Smooth3, Smooth1)
# Average across replicates and scale
Smooth <- t(scale(t(Smooth)))
# Split smooth
Smooth33 <- Smooth[, 1:100]
Smooth11 <- Smooth[, 101:200]
# Vis3
# Seriate the results
Smooth33 <- Smooth33[ seriation::get_order(seriation::seriate(Smooth33, method="PCA_angle")), ]
## Registered S3 methods overwritten by 'registry':
## method from
## print.registry_field proxy
## print.registry_entry proxy
col_fun = circlize::colorRamp2(c(-4, 0, 4), c("navy", "white", "firebrick"))
p3 <- Heatmap(Smooth33,
col = col_fun,
cluster_columns=F,
cluster_rows=F,
show_column_names = F,
row_names_gp = grid::gpar(fontsize = 5)
)
# Vis1
Smooth11 %<>% .[p3@matrix %>% rownames(), ]
p1 <- Heatmap(Smooth11,
col = col_fun,
cluster_columns=F,
cluster_rows=F,
show_column_names = F,
row_names_gp = grid::gpar(fontsize = 5)
)
p3
p1
See Liana.ipynb
.
For calculation of data, see Liana.ipynb
.
dat.raw <- read.delim("liana_res.csv",
sep = ",",
header = T) %>%
mutate(visit = strsplit(sample, "_") %>%
sget(3))
Function
lianaCircos <- function(df,
top.interactions = 30,
text.size = 1,
pal = RColorBrewer::brewer.pal(9, "Set1"),
cell.types = c("Adipocytes",
"Myeloid immune cells",
"ASPCs",
"EC",
"SMCs",
"Lymphoid immune cells",
"Mast cells",
"Lymphatic ECs",
"Pericytes"),
big.gap = 5,
small.gap = 2,
arrow.width = 3,
link.ramp.rel = T,
link.sort = F,
scale = F,
arrow.head.width = 0.3,
arrow.head.length = 0.3,
link.ramp.col = c("navy", "grey", "firebrick")) {
input_df <- df %>%
slice_max(order_by = score, n = top.interactions) %>%
mutate(source = paste0(source, " ")) %>%
mutate(source_lig = paste0(source, "|", ligand),
target_rec = paste0(target, "|", receptor))
if (link.ramp.rel) {
arr_wd <- rep(arrow.width, nrow(input_df))
} else {
arr_wd <- (((input_df$score - min(input_df$score))/(max(input_df$score) - min(input_df$score))) * (arrow.width)) + 1
}
# Colors and segments
anno.col <- setNames(pal,
cell.types) %>%
c(., "ASPCs " = unname(.["ASPCs"]))
cell_cols <- anno.col[unique(c(unique(input_df$source), unique(input_df$target), "ASPCs "))]
link_cols <- c()
if (!link.ramp.rel) {
for (i in input_df$source_lig) {
link_cols <- c(link_cols, cell_cols[stringr::str_extract(i,
"[^|]+")])
}
} else {
input_df %<>%
arrange(score)
df.down <- input_df %>% filter(score <= 0)
link_down <- colorRampPalette(c(link.ramp.col[1], link.ramp.col[2]))(nrow(df.down))
df.up <- input_df %>% filter(score > 0)
link_up <- colorRampPalette(c(link.ramp.col[2], link.ramp.col[3]))(nrow(df.up))
link_cols <- c(link_down, link_up)
}
segments <- unique(c(paste0(input_df$source, "|", input_df$ligand),
paste0(input_df$target, "|", input_df$receptor)))
grp <- stringr::str_extract(segments, "[^|]+") %>%
setNames(segments)
# Redo colors
cell_cols2 <- grp
for (i in unique(grp)) {
cell_cols2[cell_cols2 == i] <- cell_cols[i]
}
# Plot
input_df %>%
select(source_lig, target_rec, score) %>%
chordDiagram(directional = 1,
group = grp,
scale = scale,
diffHeight = 0.005,
direction.type = c("arrows"),
link.arr.type = "triangle",
annotationTrack = c(),
preAllocateTracks = list(
list(track.height = 0.05),
list(track.height = 0.175),
list(track.height = 0.05)),
big.gap = big.gap,
transparency = 1,
link.arr.lwd = arr_wd,
link.arr.col = link_cols,
link.arr.length = arrow.head.length,
link.arr.width = arrow.head.width,
small.gap = small.gap
)
circos.track(track.index = 2, panel.fun = function(x, y) {
circos.text(CELL_META$xcenter,
CELL_META$ylim[1],
stringr::str_extract(CELL_META$sector.index, "[^|]+$"),
facing = "clockwise",
niceFacing = TRUE,
adj = c(0, 0.55),
cex = 1)
}, bg.border = NA)
# Split segments
for (l in segments) {
highlight.sector(l, track.index = 3, col = cell_cols2[l])
}
# Add ligand/receptor track
## Ligand
highlight.sector(input_df$source_lig,
track.index = 1,
col = "black",
text = "Ligands",
cex = 1,
text.col = "white",
niceFacing = TRUE)
## Receptor
highlight.sector(input_df$target_rec,
track.index = 1,
col = "white",
text = "Receptors",
cex = 1,
text.col = "black",
border = "black",
niceFacing = TRUE)
# Legends
minmax <- input_df %>%
pull(score) %>%
{pmax(abs(min(.)), max(.))} %>%
formatC(digits = 1) %>%
as.numeric()
col.range = c(-minmax, 0, minmax)
lgd_links = Legend(at = col.range,
col_fun = colorRamp2(col.range, link.ramp.col),
title_position = "topleft",
title = "Links")
lgd_ct <- Legend(labels = unique(c(input_df$source, input_df$target)),
title = "Cell type",
type = "points",
legend_gp = gpar(col = "transparent"),
background = cell_cols[unique(c(input_df$source, input_df$target))])
lgd_list_vertical = packLegend(lgd_ct, lgd_links)
draw(lgd_list_vertical,
just = c("left", "bottom"),
x = unit(5, "mm"),
y = unit(5, "mm"))
circos.clear()
}
Plot
dat.plot <- dat.raw %>%
dplyr::rename(ligand = ligand_complex,
receptor = receptor_complex,
score = lrscore) %>%
filter(visit %in% c("visit1", "visit3"),
source == "ASPCs",
ligand %in% c("COL18A1", "NID1", "JAG1", "FGF2", "VEGFA", "BMP1", "ANGPT1", "MFGE8", "C3", "IGF1", "ANXA1", "ALCAM"),
receptor %in% c("ITGB5", "ITGAV", "CD46", "NRP1", "BMPR2", "ITGB1", "IGF2R", "INSR", "EGFR")) %>%
group_by(visit, ligand, receptor, source, target) %>%
summarize(score = mean(score)) %>%
ungroup() %>%
arrange(visit, source, target, ligand, receptor)
## `summarise()` has grouped output by 'visit', 'ligand', 'receptor', 'source'.
## You can override using the `.groups` argument.
dat.vis1 <- dat.plot %>%
filter(visit == "visit1",
score > 0.8648) %>%
mutate(lrst = paste0(ligand, receptor, source, target))
dat.vis3 <- dat.plot %>%
mutate(lrst = paste0(ligand, receptor, source, target)) %>%
filter(visit == "visit3",
lrst %in% dat.vis1$lrst)
dat.rel <- dat.vis3 %>%
mutate(score = score - dat.vis1$score)
dat.rel %>%
lianaCircos()
Preparation
# Load Conos object
con <- qread("con_adipocytes_aspc.qs", nthreads = 10)
anno.faps <- qread("anno_aspc.qs")
anno <- factor(c(anno.adipocytes, anno.faps))
cm.merged <- con$getJointCountMatrix(raw = T) %>%
Matrix::t() %>%
.[, colnames(.) %in% names(anno)]
# Use bins instead of anno
spc <- con$getDatasetPerCell()
anno.bin <- qread("sds_obj.qs")@assays@data@listData$pseudotime %>%
as.data.frame() %>%
filter(!is.na(Lineage1)) %>%
{setNames(.$Lineage1, rownames(.))} %>%
{data.frame(pseudotime = unname(.), anno = anno[names(.)], cid = names(.))} %>%
mutate(bin = cut(pseudotime, breaks = c(0, 0.005, 0.01, 0.0165, 0.023))) %>%
na.omit() %>%
mutate(bin = as.character(bin) %>%
strsplit(",") %>%
sget(2) %>%
gsub("]", "", ., fixed = T),
sample = spc[cid]) %>%
mutate(sex = meta$sex[match(sample, meta$sample)],
visit = strsplit(as.character(sample), "_") %>% sget(3)) %>%
mutate(anno.final = paste(sample, bin, sex, sep = "!!"))
# Create pseudo CM
anno.final <- anno.bin %>%
pull(anno.final) %>%
setNames(anno.bin$cid)
cm.pseudo <- sccore::collapseCellsByType(cm.merged %>% Matrix::t(),
groups = anno.final, min.cell.count = 1) %>%
t() %>%
apply(2, as, "integer")
cm.pseudo %<>%
DESeq2::DESeqDataSetFromMatrix(.,
colnames(.) %>%
strsplit("_|!!") %>%
sget(3) %>%
data.frame() %>%
`dimnames<-`(list(colnames(cm.pseudo), "group")),
design = ~ group) %>%
DESeq2::estimateSizeFactors() %>%
DESeq2::counts(normalized = T)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
genes <- "C3"
idx <- cm.pseudo %>%
colnames() %>%
data.frame(id = .) %>%
mutate(vis = strsplit(id, "_|!!") %>% sget(3),
bin = strsplit(id, "!!") %>% sget(2),
sex = strsplit(id, "!!") %>% sget(3)) %>%
mutate(ord = order(vis, bin, sex))
x <- cm.pseudo %>%
.[match(genes, rownames(.)), match(colnames(na.omit(.)), colnames(.))] %>%
.[idx$ord]
plot.dat <- x %>%
{data.frame(sample = names(.),
value = unname(.))} %>%
mutate(bin = strsplit(sample, "!!") %>%
sget(2),
visit = strsplit(sample, "!!|_") %>%
sget(3) %>%
factor(labels = c("Visit 1", "Visit 2", "Visit 3")),
sex = strsplit(sample, "!!") %>% sget(3)) %>%
mutate(sex_visit = paste(sex, gsub("Visit", "visit", visit), sep = " "))
# Kruskal-Wallis
sex.stat <- plot.dat %$%
split(., sex) %>%
lapply(\(x) split(x, x$bin)) %>%
lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>%
lapply(sapply, gtools::stars.pval) %>%
{lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>%
`names<-`(c("Female","Male")) %>%
lapply(\(x) data.frame(ct = names(x), sig = unname(x))) %>%
{lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>%
bind_rows() %>%
mutate(ct = factor(ct, levels = unique(plot.dat$bin))) %>%
arrange(ct) %>%
mutate(sig = gsub(".", "", sig, fixed = T))
# Wilcoxon
stat.test <- plot.dat %>%
group_by(bin, sex) %>%
wilcox_test(value~sex_visit, paired = T)
stat.test %<>%
filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
add_xy_position(x = "bin", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05) %>% # Adjust line position
arrange(desc(y.position))
# Plot
ggplot(plot.dat, aes(bin, value)) +
geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
color = "black", size = 0.5, alpha = 0.2) +
theme_bw() +
labs(x = "", y = "Normalized pseudobulk expression", fill = "", title = "C3") +
theme(line = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
axis.text = element_text(color = "black")) +
scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
geom_text(inherit.aes = F, data = sex.stat, aes(ct, y = 1.5e3, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 0.2, hide.ns = F)# Wilcoxon
cms <- con.major$samples %>%
lget("misc") %>%
lget("rawCounts") %>%
lapply(Matrix::t)
spc <- con.major$getDatasetPerCell()
cms %>%
lapply(\(x) diff(x@p)) %>%
sapply(median) %>%
{data.frame(sample = names(.),
value = unname(.))} %>%
mutate(visit = strsplit(sample, "_") %>%
sget(3)) %>%
mutate(sample = factor(sample, levels = sample)) %>%
ggplot(aes(sample, value, fill = visit)) +
geom_col() +
theme_bw() +
theme(line = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1, size = 6)) +
labs(x = "", y = "Median genes", fill = "") +
scale_fill_manual(values = ggsci::pal_jama()(3))
cms %>%
lapply(sparseMatrixStats::colSums2) %>%
sapply(median) %>%
{data.frame(sample = names(.),
value = unname(.))} %>%
mutate(visit = strsplit(sample, "_") %>%
sget(3)) %>%
mutate(sample = factor(sample, levels = sample)) %>%
ggplot(aes(sample, value, fill = visit)) +
geom_col() +
theme_bw() +
theme(line = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1, size = 6)) +
labs(x = "", y = "Median UMIs", fill = "") +
scale_fill_manual(values = ggsci::pal_jama()(3))
WATLAS <- read.delim("watlas_labels_l1_adipose_anno_super.tsv")
rownames(WATLAS)=WATLAS$X
WATLAS=WATLAS[2:21]
# Define the order of columns and rows
column_order <- c("Adipocyte", "FAP", "VEC", "LEC", "SMC","Pericyte", "B.cell", "CD4..T.cell", "CD8..T.cell", "ILC", "NK.cell", "Mast",
"Macrophage", "Monocyte", "DC", "pDC",
"Endometrium", "Mesothelial", "Plasmablast",
"Schwann" )
row_order <- c("Adipocytes", "ASPC", "EC", "lEC", "SMCs", "Pericytes", "B-cells", "T-cells", "Mast cells", "Myeloid")
WATLAS_ordered <- WATLAS[, rev(column_order)] # Reorder columns
WATLAS_ordered <- WATLAS_ordered[row_order, ] # Reorder rows
corrplot(WATLAS_ordered %>% t() %>% t(), method="color", col.lim=c(0, 1),
col = rev(COL2("RdBu",200)),
is.corr = F,
tl.col = "black",
tl.pos = "lower",
outline = "#C0C0C0",
addgrid.col= NA)
varToPlot <- spc %>%
as.character() %>%
strsplit("_vis") %>%
sget(1) %>%
setNames(names(spc)) %>%
factor()
con.major$plotGraph(groups = varToPlot,
plot.na = F,
size = 0.1,
alpha = 0.1,
embedding = "UMAP_refined7",
mark.groups = F,
show.labels = T,
show.legend = T,
title = "Donor") +
labs(x = "UMAP1", y = "UMAP2", col = "") +
theme(line = element_blank()) +
dotSize(3)
var <- meta$sex
varToPlot <- grepl.replace(spc %>%
as.character(),
meta$sample,
var) %>%
setNames(names(spc)) %>%
factor(labels = unique(var))
con.major$plotGraph(groups = varToPlot,
plot.na = F,
size = 0.1,
alpha = 0.1,
embedding = "UMAP_refined7",
mark.groups = F,
show.labels = T,
show.legend = T,
title = "Sex") +
labs(x = "UMAP1", y = "UMAP2", col = "") +
theme(line = element_blank()) +
dotSize(3) +
scale_color_manual(values = c("firebrick", "navy"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
varToPlot <- spc %>%
as.character() %>%
strsplit("_|!!") %>%
sget(3) %>%
setNames(names(spc)) %>%
factor()
con.major$plotGraph(groups = varToPlot,
plot.na = F,
size = 0.1,
alpha = 0.1,
embedding = "UMAP_refined7",
mark.groups = F,
show.labels = T,
show.legend = T,
title = "Visit") +
labs(x = "UMAP1", y = "UMAP2", col = "") +
theme(line = element_blank()) +
dotSize(3) +
scale_color_manual(values = ggsci::pal_jama()(3))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
Calculate
spc <- con.major$getDatasetPerCell()
anno.sex <- data.frame(anno = unname(anno.major),
cid = names(anno.major)) %>%
mutate(donor = spc[match(cid, names(spc))]) %>%
mutate(sex = meta$sex[match(donor, meta$sample)]) %>%
mutate(anno_sex = paste(anno, tolower(sex), sep = ", ")) %>%
pull(anno_sex, cid)
res.major <- c("vis1", "vis2", "vis3") %>%
sapply(\(vis) {
con <- con.major$samples %>%
.[!grepl(vis, names(.))] %>%
Conos$new()
if (vis == "vis1") {
vis.ref <- "vis2"
vis.target <- "vis3"
} else {
vis.ref <- "vis1"
if (vis == "vis2") {
vis.target <- "vis3"
} else {
vis.target <- "vis2"
}
}
sample.groups <- con$samples %>%
names() %>%
`names<-`(ifelse(grepl(vis.ref, .), vis.ref, vis.target), .)
cao <- Cacoa$new(con,
sample.groups,
anno.sex,
ref.level = vis.ref,
target.level = vis.target,
n.cores = 32)
cao$estimateCellLoadings()
return(cao)
}, simplify = F, USE.NAMES = T)
Plot
visits <- c("vis1", "vis2", "vis3")
p <- visits %>%
lapply(\(vis) {
comp <- visits[!visits == vis]
dat.p <- res.major[[vis]]$test.results$coda$padj
dat.plot <- res.major[[vis]]$test.results$coda$loadings %>%
tibble::rownames_to_column(var = "anno") %>%
reshape2::melt(id.var = "anno") %>%
mutate(anno = renameAnnotation(anno, "Myeloid immune cells", "Myeloid ICs") %>%
renameAnnotation("Lymphoid immune cells", "Lymphoid ICs"))
yvar <- dat.plot %>%
group_by(anno) %>%
summarize(var = mean(value)) %>%
mutate(padj = dat.p[match(anno, names(dat.p))]) %>%
arrange(padj, desc(abs(var)))
dat.plot %<>%
mutate(anno = factor(anno, levels = rev(unique(yvar$anno))))
ymax <- dat.plot %>%
pull(value) %>%
abs() %>%
max()
out <- dat.plot %>%
ggplot(aes(anno, value, fill = anno)) +
geom_hline(yintercept = 0, col = "black") +
geom_jitter(alpha = 0.2, size = 0.3, col = "grey") +
geom_boxplot(outliers = F) +
coord_flip() +
ylim(c(-ymax, ymax)) +
theme_bw() +
theme(line = element_blank(),
axis.text = element_text(color = "black")) +
guides(fill = "none") +
labs(title = paste0(comp[1], " vs ", comp[2]), x = "", y = "Loadings [AU]") +
scale_fill_manual(values = pal.major)
return(out)
})
p
## [[1]]
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
##
## [[2]]
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
##
## [[3]]
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
Statistics
for (vis in visits) {
comp <- visits[!visits == vis]
message("Comparison: ", comp)
print(res.major[[vis]]$test.results$coda$padj)
}
## Comparison: vis2vis3
## Myeloid immune cells, male Myeloid immune cells, female
## 0.01598402 0.02131202
## Lymphoid immune cells, female Mast cells, male
## 0.02397602 0.01598402
## ASPCs, female ECs, male
## 0.12787213 0.12787213
## Mast cells, female Adipocytes, female
## 0.18724133 0.38716839
## ASPCs, male ECs, female
## 0.33566434 0.51788212
## Lymphoid immune cells, male SMCs, male
## 0.78428238 0.65389156
## Pericytes, female Pericytes, male
## 0.69100131 0.69100131
## SMCs, female Adipocytes, male
## 0.78428238 0.86913087
## Comparison: vis1vis3
## Myeloid immune cells, male Myeloid immune cells, female
## 0.06393606 0.06393606
## Lymphoid immune cells, female Lymphoid immune cells, male
## 0.06393606 0.10229770
## SMCs, male ECs, male
## 0.07992008 0.24508825
## Adipocytes, male ASPCs, female
## 0.59540460 0.61968800
## Adipocytes, female Pericytes, male
## 0.61449661 0.59540460
## Pericytes, female Mast cells, male
## 0.61968800 0.61968800
## ASPCs, male Mast cells, female
## 0.61968800 0.86313686
## ECs, female SMCs, female
## 0.96103896 0.96103896
## Comparison: vis1vis2
## Mast cells, female ASPCs, female
## 0.3942724 0.3942724
## Pericytes, male Myeloid immune cells, male
## 0.3942724 0.5178821
## Mast cells, male Pericytes, female
## 0.4395604 0.5461205
## Lymphoid immune cells, male ECs, male
## 0.5461205 0.5461205
## Myeloid immune cells, female Adipocytes, female
## 0.5461205 0.5461205
## Adipocytes, male Lymphoid immune cells, female
## 0.5461205 0.5778837
## SMCs, male ASPCs, male
## 0.5461205 0.7331335
## SMCs, female ECs, female
## 0.7392607 0.7331335
We integrated our data with the data from Hinte et al. that were graciously provided by the authors. As these data are not public, here we will show how we integrated the data and performed label transferring, but we will not run those steps here.
## Load data
dat.nefa <- qread("scAT_NEFA.qs", nthreads = 10)
con <- qread("con_major.qs", nthreads = 10)
## Integrate
cm <- GetAssayData(dat.nefa, assay = "RNA", layer = "counts")
p2.list <- split(colnames(dat.nefa), dat.nefa$condition) %>%
lapply(\(cid) cm[, cid]) %>%
lapply(basicP2proc, n.cores = 32, get.largevis = F, get.tsne = F, make.geneknn = F)
con$addSamples(p2.list)
con$n.cores <- 64
con$buildGraph()
con$findCommunities(method = leiden.community, resolution = 1)
con$embedGraph(method = "UMAP")
nefa.ids <- cm %>%
colnames() %>%
`names<-`(rep("NEFA", length(.)), .)
hinte.cls <- dat.nefa$cellType %>%
setNames(colnames(dat.nefa))
hinte.cond <- dat.nefa$condition %>%
setNames(colnames(dat.nefa))
# Label transfer, minor
anno.minor <- c("adipocytes", "fap", "immune", "vascular") %>%
sapply(\(x) paste0("anno_",x,".qs")) %>%
lapply(qread) %>%
setNames(rep("", 4)) %>%
unlist() %>%
scHelper::collapseAnnotation("Adipocytes") %>%
scHelper::renameAnnotation(c("Th","Th1"), "CD4 T cells") %>%
scHelper::renameAnnotation("Tem", "CD8 T cells") %>%
scHelper::renameAnnotation("NKT", "NK-like T cells") %>%
scHelper::renameAnnotation("cEC2", "cvEC") %>%
.[unique(names(.))]
anno.minor %<>%
c(qread("anno_major.qs") %>% .[. == "Mast cells"]) %>%
factor()
# Major
anno <- qread("anno_major") %>%
scHelper::renameAnnotation("FAPs", "ASPCs") %>%
scHelper::renameAnnotation("Tissue-resident macrophages", "Myeloid immune cells") %>%
scHelper::renameAnnotation("Endothelial cells", "ECs") %>%
scHelper::renameAnnotation("Lymphatic endothelial cells", "Lymphatic ECs") %>%
.[!. == "SMCs"]
anno.vasc <- qread("anno_vascular.qs") %>%
.[. %in% c("SMCs","Pericytes")]
anno <- factor(c(anno, anno.vasc)) %>%
.[match(unique(names(.)), names(.))]
anno.major <- anno[names(anno.minor)]
# Fix annotation issues
anno.minor <- anno.minor[!is.na(anno.minor)]
anno.major <- anno.major[!is.na(anno.major)]
matched <- names(anno.minor)[ (names(anno.minor) %in% names(anno.major))]
anno.major <- anno.major[ names(anno.major) %in% matched]
anno.minor <- anno.minor[ names(anno.minor) %in% matched]
anno.major <- anno.major[ duplicated(names(anno.major)) == FALSE]
anno.minor <- anno.minor[ duplicated(names(anno.minor)) == FALSE]
qsave(anno.minor, "anno_minor.qs")
## Label transfer
loft.transfer <- con$propagateLabels(labels = anno.major)
loft.transfer.minor <- con$propagateLabels(labels = anno.minor)
# Loft annotations
con$clusters$loft_transfer <- list(groups = loft.transfer$labels)
con$clusters$loft_transfer_uncertainty <- list(groups = formatC(loft.transfer$uncertainty, 1) %>% {setNames(as.numeric(.), names(.))} %>% .[!grepl("!!", names(.))])
con$clusters$loft_transfer_minor <- list(groups = loft.transfer.minor$labels)
con$clusters$loft_transfer_minor_uncertainty <- list(groups = formatC(loft.transfer.minor$uncertainty, 1) %>% {setNames(as.numeric(.), names(.))} %>% .[!grepl("!!", names(.))])
con$clusters$loft_clusters <- list(groups = anno.major)
con$clusters$loft_clusters_minor <- list(groups = anno.minor)
# Hinte annotations
con$clusters$hinte_transfer <- list(groups = hinte.transfer$labels)
con$clusters$hinte_clusters <- list(groups = hinte.cls)
con$clusters$hinte_condition <- list(groups = hinte.cond)
con$clusters$hinte_clusters_major <- list(groups = con$clusters$hinte_clusters$groups %>%
collapseAnnotation(c("APCs", "Endo", "Macro")) %>%
renameAnnotation("Endo", "EndoCs") %>%
renameAnnotation("FAPs", "APCs"))
qsave(con, "hinte_integration.qs", nthreads = 10)
con.integration <- qread("hinte_integration.qs")
loft.transfer <- con.integration$propagateLabels(anno.major)
## Stop after 21 iterations. Norm: 0.024024
## Min weight: 1.67017e-05, max weight: 0.367879, fading: (10, 0.1)
cm.merged <- con.integration$getJointCountMatrix()
spc <- con.integration$getDatasetPerCell()
con.integration$plotGraph(groups = loft.transfer$labels,
font.size = 5) +
scale_color_manual(values = pal.major) +
theme(line = element_blank())
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
con.integration$plotGraph(groups = loft.transfer$labels %>% .[!grepl("!!", names(.))],
plot.na = F,
font.size = 5) +
scale_color_manual(values = pal.major) +
theme(line = element_blank())
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
# Calculate
cm.pseudo <- cm.merged %>%
sccore::collapseCellsByType(spc) %>%
t() %>%
apply(2, as, "integer")
cm.pseudo %<>%
DESeq2::DESeqDataSetFromMatrix(.,
colnames(.) %>%
strsplit("_") %>%
sget(1) %>%
data.frame() %>%
`dimnames<-`(list(colnames(cm.pseudo), "group")),
design = ~ group) %>%
DESeq2::estimateSizeFactors() %>%
DESeq2::counts(normalized = T)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# We rotate Hinte into Loft sample PCA space
cm.loft <- cm.pseudo[, grepl("vis", colnames(cm.pseudo))] %>%
as.data.frame() %>%
filter(rowSums(.) > 0)
cm.hinte <- cm.pseudo[, !grepl("vis", colnames(cm.pseudo))] %>%
as.data.frame() %>%
filter(rowSums(.) > 0)
genesToKeep <- intersect(rownames(cm.loft), rownames(cm.hinte))
pc.res <-
cm.loft[genesToKeep, ] %>%
t() %>%
prcomp(center = T,
scale = T)
pc.tmp <- cm.hinte[genesToKeep, ] %>%
as.data.frame() %>%
filter(rowSums(.) > 0) %>%
t() %>%
scale(pc.res$center, pc.res$scale) %*% pc.res$rotation
# Plot
dat.plot <- rbind(pc.res$x, pc.tmp) %>%
data.frame() %>%
mutate(id = rownames(.)) %>%
mutate(type = grepl.replace(id, c("lean", "t0", "vis1", "vis2", "t1", "vis3"), c("lean", "obese", "obese", "short-term WL", "regressed", "regressed")) %>% factor(levels = c("lean", "obese", "short-term WL", "regressed")),
sex = meta$sex[match(id, meta$sample)] %>%
as.character())
dat.plot$sex[is.na(dat.plot$sex)] <- "Hinte"
dat.plot %>%
mutate(sex = factor(sex, levels = c("Female", "Male", "Hinte"), labels = c("Loft, female", "Loft, male", "Hinte"))) %>%
ggplot(aes(PC1, PC2, col = type, shape = sex)) +
geom_point(
data = filter(dat.plot, sex == "Hinte"),
aes(PC1, PC2), color = "yellow3", shape = 15, size = 5, alpha = 0.7) +
geom_point(size = 3) +
theme_bw() +
scale_color_manual(values = c("purple", brewer.pal(9, "Greens")[c(3, 6, 9)])) +
theme(line = element_blank()) +
labs(shape = "Study and sex", col = "Sample type")
Variation
pc.res$x %>%
data.frame() %>%
mutate(var = pc.res$sdev^2 / sum(pc.res$sdev^2)) %>%
pull(var)
## [1] 2.133072e-01 1.147448e-01 8.491969e-02 6.837396e-02 4.755415e-02
## [6] 4.037000e-02 3.178713e-02 3.010272e-02 2.742972e-02 2.362458e-02
## [11] 2.092853e-02 1.954507e-02 1.843777e-02 1.685591e-02 1.585535e-02
## [16] 1.566574e-02 1.452216e-02 1.406167e-02 1.369267e-02 1.255830e-02
## [21] 1.197001e-02 1.154230e-02 1.093551e-02 1.070401e-02 9.689988e-03
## [26] 9.518977e-03 8.595509e-03 8.244777e-03 7.987228e-03 7.534686e-03
## [31] 6.949427e-03 6.798175e-03 6.464972e-03 5.915445e-03 5.336252e-03
## [36] 5.097727e-03 4.995801e-03 4.669091e-03 4.397231e-03 4.217893e-03
## [41] 4.097873e-03 4.575723e-32
pc.res <-
cm.hinte[genesToKeep, ] %>%
t() %>%
prcomp(center = T,
scale = T)
pc.tmp <- cm.loft[genesToKeep, ] %>%
as.data.frame() %>%
filter(rowSums(.) > 0) %>%
t() %>%
scale(pc.res$center, pc.res$scale) %*% pc.res$rotation
# Plot
dat.plot <- rbind(pc.res$x, pc.tmp) %>%
data.frame() %>%
mutate(id = rownames(.)) %>%
mutate(type = grepl.replace(id, c("lean", "t0", "vis1", "vis2", "t1", "vis3"), c("lean", "obese", "obese", "short-term WL", "regressed", "regressed")) %>% factor(levels = c("lean", "obese", "short-term WL", "regressed")),
sex = meta$sex[match(id, meta$sample)] %>%
as.character())
dat.plot$sex[is.na(dat.plot$sex)] <- "Hinte"
dat.plot %>%
mutate(sex = factor(sex, levels = c("Female", "Male", "Hinte"), labels = c("Loft, female", "Loft, male", "Hinte"))) %>%
ggplot(aes(PC1, PC2, col = type, shape = sex)) +
geom_point(
data = filter(dat.plot, sex == "Hinte"),
aes(PC1, PC2), color = "yellow3", shape = 15, size = 5, alpha = 0.7) +
geom_point(size = 3) +
theme_bw() +
scale_color_manual(values = c("purple", brewer.pal(9, "Greens")[c(3, 6, 9)])) +
theme(line = element_blank()) +
labs(shape = "Study and sex", col = "Sample type")
Variation
pc.res$x %>%
data.frame() %>%
mutate(var = pc.res$sdev^2 / sum(pc.res$sdev^2)) %>%
pull(var)
## [1] 3.975304e-01 2.327407e-01 2.067138e-01 1.630152e-01 2.705167e-31
tmp <- loft.transfer$labels %>%
renameAnnotation("Lymphoid immune cells", "Lymphoid ICs") %>%
renameAnnotation(c("Myeloid immune cells"), "Myeloid ICs") %>%
{data.frame(cid = names(.), anno = unname(.))} %>%
filter(!grepl("!!", cid)) %>%
mutate(sample = con.integration$clusters$hinte_condition$groups[match(cid, names(con.integration$clusters$hinte_condition$groups))])
tmp.sum <- tmp %>%
group_by(sample) %>%
summarize(total = n()) %>%
split(., .$sample)
tmp.sum.ct <- tmp %>%
group_by(sample, anno) %>%
summarize(total = n()) %>%
split(., .$sample)
## `summarise()` has grouped output by 'sample'. You can override using the
## `.groups` argument.
tmp.prop <- Map(\(ct, tot, an) setNames(ct$total / tot$total, an), ct = tmp.sum.ct, tot = tmp.sum, an = lapply(tmp.sum.ct, pull, anno))
df.plot <- tmp.prop %>%
{Map(\(nn, x) data.frame(sample = nn, anno = names(x), value = unname(x)), nn = names(.), x = .)} %>%
bind_rows() %>%
mutate(anno = factor(anno) %>%
factor(levels = levels(.)[c(1, 2, 5, 8, 3, 7, 6, 4, 9)]) %>%
renameAnnotation("Lymphatic ECs", "LECs"))
df.plot %>%
rbind(c("st_t0", "LECs", 0)) %>% # Adding an artificial 0 as LECs aren't detected in all conditions
mutate(sample = strsplit(sample, "_") %>% sapply(\(x) if (length(x) == 1) x[1] else x[2]),
anno = factor(anno, levels = c("Adipocytes", "ASPCs", "ECs", "Myeloid ICs", "LECs", "SMCs", "Pericytes", "Lymphoid ICs", "Mast cells")),
sample = factor(sample, levels = c("lean", "t0", "t1"), labels = c("Lean", "Obese", "Regressed")),
value = as.numeric(value)) %>%
ggplot(aes(anno, value)) +
stat_summary(aes(fill = sample), fun = "mean", geom = "bar", position = position_dodge()) +
geom_point(aes(col = sample), position = position_dodge(width = 0.9)) +
theme_bw() +
labs(x = "", y = "% proportion per sample", title = "Loft to Hinte transfer, major annotation", fill = "", col = "") +
theme(line = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
axis.text = element_text(colour = "black")) +
scale_fill_manual(values = c(brewer.pal(9, "Greens")[c(3,6,9)])) +
scale_color_manual(values = c(brewer.pal(9, "Purples")[c(9,7,5)]))
size_data <- read.csv("Adipocyte_size.csv",sep = ",", header = TRUE)
condition_levels <- c("F1", "F2", "F3", "M1", "M2", "M3")
ggplot(size_data, aes(x = Diameter, y = factor(Condition, levels = rev(condition_levels)), fill = Condition)) +
ggridges::geom_density_ridges(scale = 0.9) +
xlab(expression(paste("Diameter (µm)"))) +
ylab("Condition") +
ggtitle("Adipocyte diameter distribution") +
theme(legend.position="none", plot.title = element_text(hjust = 0.5, size = 14, face = "bold"), text = element_text(family = "serif", size = 12)) +
scale_fill_manual(values = c("#F8B3BB", "#BD6563", "#7D2A25", "#ABD1D4", "#62649E", "#3E4282")) +
theme(rect = element_rect(fill = "transparent"))
## Picking joint bandwidth of 3.16
con <- qread("con_vascular.qs", nthreads = 10)
anno.vascular <- qread("anno_vascular.qs")
spc <- con$getDatasetPerCell()
varToPlot <- spc %>%
as.character() %>%
strsplit("_vis") %>%
sget(1) %>%
setNames(names(spc)) %>%
factor()
con$plotGraph(groups = varToPlot,
plot.na = F,
size = 0.3,
alpha = 0.1,
embedding = "UMAP",
mark.groups = F,
show.labels = T,
show.legend = T,
title = "Donor") +
labs(x = "UMAP2", y = "UMAP2", col = "") +
theme(line = element_blank()) +
dotSize(3)
var <- meta$sex
varToPlot <- grepl.replace(spc %>%
as.character(),
meta$sample,
var) %>%
setNames(names(spc)) %>%
factor(labels = unique(var))
con$plotGraph(groups = varToPlot,
plot.na = F,
size = 0.1,
alpha = 0.1,
embedding = "UMAP",
mark.groups = F,
show.labels = T,
show.legend = T,
title = "Sex") +
labs(x = "UMAP1", y = "UMAP2", col = "") +
theme(line = element_blank()) +
dotSize(3) +
scale_color_manual(values = c("firebrick", "navy"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
varToPlot <- spc %>%
as.character() %>%
strsplit("_|!!") %>%
sget(3) %>%
setNames(names(spc)) %>%
factor()
con$plotGraph(groups = varToPlot,
plot.na = F,
size = 0.3,
alpha = 0.1,
embedding = "UMAP",
mark.groups = F,
show.labels = T,
show.legend = T,
title = "Visit") +
labs(x = "UMAP1", y = "UMAP2", col = "") +
theme(line = element_blank()) +
dotSize(3) +
scale_color_manual(values = ggsci::pal_jama()(3))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
WATLAS <- read.delim("watlas_labels_l2_adipose_anno.tsv")
rownames(WATLAS)=WATLAS$X
# Define Vascular cells
column_order <- c("SOX5..VEC", "BTNL9..VEC", "ACKR1..VEC", "LEC", "SMC", "Pericyte")
row_order <- c("arEC", "cEC", "cEC2","vEC", "lEC", "SMCs", "Pericytes")
WATLAS_ordered <- WATLAS[, rev(column_order)] # Reorder columns
WATLAS_ordered <- WATLAS_ordered[row_order, ] # Reorder rows
corrplot(WATLAS_ordered %>% t() %>% t(), method="color", col.lim=c(0, 1),
col = rev(COL2("RdBu",200)),
is.corr = F,
tl.col = "black",
tl.pos = "lower",
outline = "#C0C0C0",
addgrid.col= NA)
Calculate
anno.minor <- qread("anno_minor.qs")
spc <- con.major$getDatasetPerCell() %>%
.[match(names(anno.minor), names(.))]
anno.minor.sex <- anno.minor %>%
data.frame(anno = ., cid = names(.), donor = unname(spc)) %>%
mutate(sex = meta$sex[match(donor, meta$sample)]) %>%
mutate(anno_sex = paste(anno, sex, sep = "_")) %$%
setNames(anno_sex, cid) %>%
factor()
# Calculate
res.minor <- c("vis1", "vis2", "vis3") %>%
sapply(\(vis) {
con <- con.major$samples %>%
.[!grepl(vis, names(.))] %>%
Conos$new()
if (vis == "vis1") {
vis.ref <- "vis2"
vis.target <- "vis3"
} else {
vis.ref <- "vis1"
if (vis == "vis2") {
vis.target <- "vis3"
} else {
vis.target <- "vis2"
}
}
sample.groups <- con$samples %>%
names() %>%
`names<-`(ifelse(grepl(vis.ref, .), vis.ref, vis.target), .)
cao <- Cacoa$new(con,
sample.groups,
anno.minor.sex,
ref.level = vis.ref,
target.level = vis.target,
n.cores = 32)
cao$estimateCellLoadings()
return(cao)
}, simplify = F, USE.NAMES = T)
Plot
vasc.pal <- RColorBrewer::brewer.pal(9, "Purples")[-c(1,2)][c(1,4,7,2,6,3,5)] %>%
setNames(levels(qread("anno_vascular.qs"))) %>%
{setNames(c(rep(., 3)), c(names(.), paste(names(.), "Female", sep = "_"), paste(names(.), "Male", sep = "_")))}
visits <- c("vis1", "vis2", "vis3")
p <- visits %>%
lapply(\(vis) {
comp <- visits[!visits == vis]
dat.p <- res.minor[[vis]]$test.results$coda$padj
dat.plot <- res.minor[[vis]]$test.results$coda$loadings %>%
tibble::rownames_to_column(var = "anno") %>%
reshape2::melt(id.var = "anno") %>%
filter(anno %in% names(vasc.pal))
yvar <- dat.plot %>%
group_by(anno) %>%
summarize(var = mean(value)) %>%
mutate(padj = dat.p[match(anno, names(dat.p))]) %>%
arrange(padj, desc(abs(var)))
dat.plot %<>%
mutate(anno = factor(anno, levels = rev(unique(yvar$anno))))
ymax <- dat.plot %>%
pull(value) %>%
abs() %>%
max()
out <- dat.plot %>%
ggplot(aes(anno, value, fill = anno)) +
geom_hline(yintercept = 0, col = "black") +
geom_jitter(alpha = 0.2, size = 0.3, col = "grey") +
geom_boxplot(outliers = F) +
coord_flip() +
ylim(c(-ymax, ymax)) +
theme_bw() +
theme(line = element_blank(),
axis.text = element_text(color = "black")) +
guides(fill = "none") +
labs(title = paste0(comp[1], " vs ", comp[2]), x = "", y = "Loadings [AU]") +
scale_fill_manual(values = vasc.pal)
return(out)
})
p
## [[1]]
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
##
## [[2]]
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
##
## [[3]]
Statistics
for (vis in visits) {
comp <- visits[!visits == vis]
message("Comparison: ", comp)
print(res.minor[[vis]]$test.results$coda$padj %>% .[names(.) %in% names(vasc.pal)])
}
## Comparison: vis2vis3
## vEC_Female cvEC_Male lEC_Male vEC_Male
## 0.01731602 0.01731602 0.06611570 0.01731602
## SMCs_Female Pericytes_Female cEC_Male lEC_Female
## 0.10389610 0.04155844 0.01731602 0.41558442
## cEC_Female SMCs_Male arEC_Female Pericytes_Male
## 0.04155844 0.03896104 0.10389610 0.08658009
## cvEC_Female arEC_Male
## 0.22134387 0.15112161
## Comparison: vis1vis3
## vEC_Female vEC_Male SMCs_Male lEC_Male
## 0.00944510 0.00944510 0.00944510 0.03280930
## cEC_Male Pericytes_Male SMCs_Female cEC_Female
## 0.00944510 0.00944510 0.16783217 0.00944510
## arEC_Male Pericytes_Female cvEC_Male lEC_Female
## 0.00944510 0.03055768 0.09937888 0.66955267
## arEC_Female cvEC_Female
## 0.66493506 0.82886003
## Comparison: vis1vis2
## cvEC_Male cvEC_Female vEC_Male SMCs_Male
## 0.1558442 0.4917749 0.4917749 0.4917749
## Pericytes_Male arEC_Female lEC_Male lEC_Female
## 0.4917749 0.4917749 0.7713499 0.8255528
## Pericytes_Female vEC_Female arEC_Male cEC_Male
## 0.7713499 0.7713499 0.8410637 0.8410637
## SMCs_Female cEC_Female
## 0.9660127 0.9890110
watlas_data <- read.delim("watlas_mapped_percent.tsv", stringsAsFactors = FALSE)
watlas_data_count <- read.delim("watlas_mapped_counts.tsv", stringsAsFactors = FALSE)
watlas_data_count <- watlas_data_count %>%
rowwise() %>%
mutate(row_sum = sum(c_across(where(is.numeric)), na.rm = TRUE)) # Sum across numeric columns
valid_samples <- watlas_data_count %>%
filter(row_sum >= 1000) %>%
pull(sample) # Extracts the valid sample IDs
filtered_data <- watlas_data %>%
filter(type == "sn", tissue == "AT", depot == "SAT", grepl("Hs", sample))
filtered_data <- filtered_data %>%
filter(sample %in% valid_samples)
long_data_VEC <- filtered_data %>%
select(wtstatus, arEC, cEC, vEC, lEC, SMCs, Pericytes) %>% # Select relevant columns
tidyr::pivot_longer(cols = -wtstatus, names_to = "Cell_Type", values_to = "Percentage")
long_data_VEC %>%
mutate(Cell_Type = factor(Cell_Type, levels = c("arEC", "cEC", "vEC", "lEC", "Pericytes", "SMCs"))) %>%
ggplot(aes(x = Cell_Type, y = Percentage, fill = wtstatus)) +
geom_boxplot(position = position_dodge(width = 0.8)) +
geom_point(aes(group = wtstatus), position = position_jitterdodge(jitter.width = 0.3, dodge.width = 0.8), alpha = 0.3, ) +
theme_bw() +
labs(title = "Comparison of Cell Types in Lean vs Obese",
x = "Cell Type",
y = "Percentage",
fill = "") +
scale_fill_manual(values = c(lean = "#26547c", obese = "#ef476f")) +
stat_compare_means(aes(group = wtstatus), method = "wilcox.test", label = "p.signif", ) +
theme(axis.text = element_text(color = "black"),
line = element_blank())
We integrated our data with the data from Hinte et al. that were graciously provided by the authors. As these data are not public, here we will only show how we plotted the data.
tmp <- con.integration$clusters$loft_transfer_minor$groups %>%
{data.frame(cid = names(.), anno = unname(.))} %>%
filter(!grepl("!!", cid)) %>%
mutate(sample = con.integration$clusters$hinte_condition$groups[match(cid, names(con.integration$clusters$hinte_condition$groups))])
tmp.sum <- tmp %>%
group_by(sample) %>%
summarize(total = n()) %>%
split(., .$sample)
tmp.sum.ct <- tmp %>%
group_by(sample, anno) %>%
summarize(total = n()) %>%
split(., .$sample)
## `summarise()` has grouped output by 'sample'. You can override using the
## `.groups` argument.
tmp.prop <- Map(\(ct, tot, an) setNames(ct$total / tot$total, an), ct = tmp.sum.ct, tot = tmp.sum, an = lapply(tmp.sum.ct, pull, anno))
df.plot <- tmp.prop %>%
{Map(\(nn, x) data.frame(sample = nn, anno = names(x), value = unname(x)), nn = names(.), x = .)} %>%
bind_rows() %>%
mutate(anno = factor(anno, levels = unique(anno)),
type = sapply(sample, \(x) if (grepl("lean", x)) "Lean" else if (grepl("t0", x)) "Obese" else "Regressed"))
cts <- c("arEC", "cEC", "vEC", "cvEC", "lEC", "Pericytes", "SMCs")
p <- df.plot %>%
filter(anno %in% cts) %>%
mutate(anno = factor(anno, levels = cts)) %>%
rbind(data.frame(sample = c("rg_t0", "rg_t1"), anno = c("lEC", "lEC"), value = c(0, 0), type = c("Obese", "Regressed"))) %>%
ggplot(aes(anno, value)) +
stat_summary(mapping = aes(fill = type), geom = "bar", fun = "mean", position = "dodge") +
geom_point(aes(col = type), position = position_dodge(width = 0.9)) +
theme_bw() +
labs(x = "", y = "% proportion per sample", title = "Loft to Hinte transfer, minor annotation", col = "", fill = "") +
theme(axis.text = element_text(colour = "black"),
line = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
scale_fill_manual(values = brewer.pal(9, "Greens")[c(3, 6, 9)]) +
scale_color_manual(values = brewer.pal(9, "Purples")[c(9, 7, 5)])
p
# Metadata
metadata.all <- read.delim("Meta_Data_WL_Select.txt", h=T, dec = ",") %>%
mutate(sex = factor(sex, labels = c("Female", "Male")),
donor = gsub("D", "Donor_", recordid) %>%
gsub("V", "vis", .))
# Bulk data
cm.bulk <- read.delim("Bulk_norm_counts.txt", header = T) %>%
.[match(unique(.$hgnc_symbol), .$hgnc_symbol), ] %>%
`rownames<-`(.$hgnc_symbol) %>%
select(-hgnc_symbol, -ensembl_gene_id, -entrezgene_id) %>%
scale()
colnames(cm.bulk) %<>%
gsub("D", "Donor_", .) %>%
gsub("V", "vis", .)
See Figure 2d for model setup
# Use Cacoa to calculate proportions
cao <- Cacoa$new(data.object = con.major,
cell.groups = anno.major,
ref.level = "vis2",
target.level = "vis3",
sample.groups = con.major$samples %>%
names() %>%
strsplit("_") %>%
sapply('[[', 3) %>%
gsub(1, 2, .) %>%
`names<-`(con.major$samples %>%
names()),
sample.groups.palette = ggsci::pal_jama()(7))
cao$sample.groups <- con.major$samples %>%
names() %>%
strsplit("_") %>%
sapply('[[', 3) %>%
`names<-`(con.major$samples %>%
names()) %>%
gsub("vis", "Visit ", .)
anno.comb <- anno.major[!names(anno.major) %in% names(anno.vascular)] %>%
{factor(c(., anno.vascular))}
# Get proportions
proportions <- cao$.__enclos_env__$private$extractCodaData(cell.groups = anno.comb, ret.groups = F) %>%
{100 * ./rowSums(.)} %>%
as.data.frame() %$%
setNames(cEC, rownames(.))
tmp <- qread("cEC_model.qs")
lasso_model <- tmp$lasso_model
best_lambda <- tmp$lasso_cv$lambda.min
cm.bulk.mat <- cm.bulk %>%
as.data.frame() %>%
filter(rownames(.) %in% rownames(coef(lasso_model, s = best_lambda))) %>%
as.matrix() %>%
t()
# Now, train a final model on all data for visualization and gene selection
cm.test <- cm.bulk.mat %>%
.[rownames(.) %in% names(proportions), ]
lasso_cv <- cv.glmnet(cm.test, proportions, alpha = 1, nfolds = 40)
## Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
## fold
best_lambda <- lasso_cv$lambda.min
lasso_model <- glmnet(cm.test, proportions, alpha = 1, lambda = best_lambda)
# Predict on all samples using the final model
lasso.predict.all <- predict(lasso_model, newx = cm.test, s = best_lambda)
r2.all <- cor(proportions, lasso.predict.all)^2
data.frame(Proportions = proportions,
Predictions = unname(lasso.predict.all)) %>%
ggplot(aes(Proportions, Predictions)) +
geom_point() +
theme_bw() +
theme(axis.text = element_text(colour = "black")) +
labs(title = paste0("r2 = ", formatC(r2.all, digits = 3)), y = "Predicted proportions (%)", x = "Proportions (%)") +
geom_abline(intercept = 0, slope = 1, colour = "firebrick")
See Figure 2e for model setup
# Use Cacoa to calculate proportions
cao <- Cacoa$new(data.object = con.major,
cell.groups = anno.major,
ref.level = "vis2",
target.level = "vis3",
sample.groups = con.major$samples %>%
names() %>%
strsplit("_") %>%
sapply('[[', 3) %>%
gsub(1, 2, .) %>%
`names<-`(con.major$samples %>%
names()),
sample.groups.palette = ggsci::pal_jama()(7))
cao$sample.groups <- con.major$samples %>%
names() %>%
strsplit("_") %>%
sapply('[[', 3) %>%
`names<-`(con.major$samples %>%
names()) %>%
gsub("vis", "Visit ", .)
anno.comb <- anno.major[!names(anno.major) %in% names(anno.vascular)] %>%
{factor(c(., anno.vascular))}
# Get proportions
proportions <- cao$.__enclos_env__$private$extractCodaData(cell.groups = anno.comb, ret.groups = F) %>%
{100 * ./rowSums(.)} %>%
as.data.frame() %$%
setNames(vEC, rownames(.))
tmp <- qread("vEC_model.qs")
lasso_model <- tmp$lasso_model
best_lambda <- tmp$lasso_cv$lambda.min
cm.bulk.mat <- cm.bulk %>%
as.data.frame() %>%
filter(rownames(.) %in% rownames(coef(lasso_model, s = best_lambda))) %>%
as.matrix() %>%
t()
# Now, train a final model on all data for visualization and gene selection
cm.test <- cm.bulk.mat %>%
.[rownames(.) %in% names(proportions), ]
lasso_cv <- cv.glmnet(cm.test, proportions, alpha = 1, nfolds = 40)
## Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
## fold
best_lambda <- lasso_cv$lambda.min
lasso_model <- glmnet(cm.test, proportions, alpha = 1, lambda = best_lambda)
# Predict on all samples using the final model
lasso.predict.all <- predict(lasso_model, newx = cm.test, s = best_lambda)
r2.all <- cor(proportions, lasso.predict.all)^2
data.frame(Proportions = proportions,
Predictions = unname(lasso.predict.all)) %>%
ggplot(aes(Proportions, Predictions)) +
geom_point() +
theme_bw() +
theme(axis.text = element_text(colour = "black")) +
labs(title = paste0("r2 = ", formatC(r2.all, digits = 3)), y = "Predicted proportions (%)", x = "Proportions (%)") +
geom_abline(intercept = 0, slope = 1, colour = "firebrick")
Load data
## Plot Individual genes from RNA-seq data
Bulk_norm_counts <- read.delim("Bulk_norm_counts.txt", h=T)
colData <- read.delim("Meta_Data_WL_Select.txt", h=T, dec = ",")
gene_of_interest <- "TNF"
# Subset norm_counts to get counts for the gene of interest
gene_counts <- Bulk_norm_counts[Bulk_norm_counts$hgnc_symbol == gene_of_interest, ]
rownames(gene_counts)=gene_counts$hgnc_symbol
gene_counts=gene_counts[4:96]
# Reshape gene_counts into long format and merge with colData
gene_counts_long <- gene_counts %>%
tibble::rownames_to_column(var = "gene") %>% # Create a 'gene' column from row names
tidyr::pivot_longer(cols = -gene, names_to = "recordid", values_to = "count")
gene_counts_long <- gene_counts_long %>%
left_join(colData, by = "recordid") # Assuming recordid matches between gene_counts and colData
# Define a helper function to map p-values to stars
pval_to_stars <- function(p) {
if (p < 0.0001) {
return("****")
} else if (p < 0.001) {
return("***")
} else if (p < 0.01) {
return("**")
} else if (p < 0.05) {
return("*")
} else {
return("ns") # Not significant
}
}
# Perform a Kruskal-Wallis test separately for each gender
kw_res_F <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "F"))
kw_res_M <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "M"))
# Extract Kruskal-Wallis p-values
kw_pval_F <- kw_res_F$p.value
kw_pval_M <- kw_res_M$p.value
# Perform paired Wilcoxon tests for each gender with Holm's correction
paired_wilcox <- function(data, sex_group) {
data_sex <- data %>% filter(sex == sex_group)
pairwise_tests <- list(
visit1_vs_visit2 = wilcox.test(
x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
y = data_sex %>% filter(visit == "visit_2") %>% pull(count),
paired = TRUE
)$p.value,
visit1_vs_visit3 = wilcox.test(
x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
paired = TRUE
)$p.value,
visit2_vs_visit3 = wilcox.test(
x = data_sex %>% filter(visit == "visit_2") %>% pull(count),
y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
paired = TRUE
)$p.value
)
corrected_pvals <- p.adjust(unlist(pairwise_tests), method = "holm")
return(corrected_pvals)
}
wilcox_pvals_F <- paired_wilcox(gene_counts_long, "F")
wilcox_pvals_M <- paired_wilcox(gene_counts_long, "M")
# Map the Wilcoxon p-values to stars
wilcox_stars_F <- sapply(wilcox_pvals_F, pval_to_stars)
wilcox_stars_M <- sapply(wilcox_pvals_M, pval_to_stars)
# Create the boxplot with significance annotations
plot <- ggplot(gene_counts_long, aes(x = interaction(visit, sex), y = count, fill = visit)) +
geom_boxplot(outlier.shape = NA) + # Remove outliers
geom_jitter(color = "grey", size = 1, width = 0.2, alpha = 0.5) + # Add individual points
labs(title = "Gene Expression Across Visits and Genders", x = "Visit and Sex", y = "Gene Count") +
theme_minimal() +
theme(plot.title = element_text(size = 10)) +
geom_signif(
comparisons = list(
c("visit_1.F", "visit_2.F"),
c("visit_1.F", "visit_3.F"),
c("visit_2.F", "visit_3.F"),
c("visit_1.M", "visit_2.M"),
c("visit_1.M", "visit_3.M"),
c("visit_2.M", "visit_3.M")
),
annotations = c(
wilcox_stars_F["visit1_vs_visit2"],
wilcox_stars_F["visit1_vs_visit3"],
wilcox_stars_F["visit2_vs_visit3"],
wilcox_stars_M["visit1_vs_visit2"],
wilcox_stars_M["visit1_vs_visit3"],
wilcox_stars_M["visit2_vs_visit3"]
),
textsize = 4,
map_signif_level = FALSE
)
# Add Kruskal-Wallis p-values as text
plot <- plot +
annotate("text", x = 1, y = max(gene_counts_long$count) * 1.1,
label = paste("KW Female p-value:", format(kw_pval_F, digits = 3)),
hjust = 0, size = 3) +
annotate("text", x = 4, y = max(gene_counts_long$count) * 1.1,
label = paste("KW Male p-value:", format(kw_pval_M, digits = 3)),
hjust = 0, size = 3)
# Display the plot
print(plot)
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
gene_of_interest <- "CCL3"
# Subset norm_counts to get counts for the gene of interest
gene_counts <- Bulk_norm_counts[Bulk_norm_counts$hgnc_symbol == gene_of_interest, ]
rownames(gene_counts)=gene_counts$hgnc_symbol
gene_counts=gene_counts[4:96]
# Reshape gene_counts into long format and merge with colData
gene_counts_long <- gene_counts %>%
tibble::rownames_to_column(var = "gene") %>% # Create a 'gene' column from row names
tidyr::pivot_longer(cols = -gene, names_to = "recordid", values_to = "count")
gene_counts_long <- gene_counts_long %>%
left_join(colData, by = "recordid") # Assuming recordid matches between gene_counts and colData
# Define a helper function to map p-values to stars
pval_to_stars <- function(p) {
if (p < 0.0001) {
return("****")
} else if (p < 0.001) {
return("***")
} else if (p < 0.01) {
return("**")
} else if (p < 0.05) {
return("*")
} else {
return("ns") # Not significant
}
}
# Perform a Kruskal-Wallis test separately for each gender
kw_res_F <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "F"))
kw_res_M <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "M"))
# Extract Kruskal-Wallis p-values
kw_pval_F <- kw_res_F$p.value
kw_pval_M <- kw_res_M$p.value
# Perform paired Wilcoxon tests for each gender with Holm's correction
paired_wilcox <- function(data, sex_group) {
data_sex <- data %>% filter(sex == sex_group)
pairwise_tests <- list(
visit1_vs_visit2 = wilcox.test(
x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
y = data_sex %>% filter(visit == "visit_2") %>% pull(count),
paired = TRUE
)$p.value,
visit1_vs_visit3 = wilcox.test(
x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
paired = TRUE
)$p.value,
visit2_vs_visit3 = wilcox.test(
x = data_sex %>% filter(visit == "visit_2") %>% pull(count),
y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
paired = TRUE
)$p.value
)
corrected_pvals <- p.adjust(unlist(pairwise_tests), method = "holm")
return(corrected_pvals)
}
wilcox_pvals_F <- paired_wilcox(gene_counts_long, "F")
wilcox_pvals_M <- paired_wilcox(gene_counts_long, "M")
# Map the Wilcoxon p-values to stars
wilcox_stars_F <- sapply(wilcox_pvals_F, pval_to_stars)
wilcox_stars_M <- sapply(wilcox_pvals_M, pval_to_stars)
# Create the boxplot with significance annotations
plot <- ggplot(gene_counts_long, aes(x = interaction(visit, sex), y = count, fill = visit)) +
geom_boxplot(outlier.shape = NA) + # Remove outliers
geom_jitter(color = "grey", size = 1, width = 0.2, alpha = 0.5) + # Add individual points
labs(title = "Gene Expression Across Visits and Genders", x = "Visit and Sex", y = "Gene Count") +
theme_minimal() +
theme(plot.title = element_text(size = 10)) +
geom_signif(
comparisons = list(
c("visit_1.F", "visit_2.F"),
c("visit_1.F", "visit_3.F"),
c("visit_2.F", "visit_3.F"),
c("visit_1.M", "visit_2.M"),
c("visit_1.M", "visit_3.M"),
c("visit_2.M", "visit_3.M")
),
annotations = c(
wilcox_stars_F["visit1_vs_visit2"],
wilcox_stars_F["visit1_vs_visit3"],
wilcox_stars_F["visit2_vs_visit3"],
wilcox_stars_M["visit1_vs_visit2"],
wilcox_stars_M["visit1_vs_visit3"],
wilcox_stars_M["visit2_vs_visit3"]
),
textsize = 4,
map_signif_level = FALSE
)
# Add Kruskal-Wallis p-values as text
plot <- plot +
annotate("text", x = 1, y = max(gene_counts_long$count) * 1.1,
label = paste("KW Female p-value:", format(kw_pval_F, digits = 3)),
hjust = 0, size = 3) +
annotate("text", x = 4, y = max(gene_counts_long$count) * 1.1,
label = paste("KW Male p-value:", format(kw_pval_M, digits = 3)),
hjust = 0, size = 3)
# Display the plot
print(plot)
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
gene_of_interest <- "CCL5"
# Subset norm_counts to get counts for the gene of interest
gene_counts <- Bulk_norm_counts[Bulk_norm_counts$hgnc_symbol == gene_of_interest, ]
rownames(gene_counts)=gene_counts$hgnc_symbol
gene_counts=gene_counts[4:96]
# Reshape gene_counts into long format and merge with colData
gene_counts_long <- gene_counts %>%
tibble::rownames_to_column(var = "gene") %>% # Create a 'gene' column from row names
tidyr::pivot_longer(cols = -gene, names_to = "recordid", values_to = "count")
gene_counts_long <- gene_counts_long %>%
left_join(colData, by = "recordid") # Assuming recordid matches between gene_counts and colData
# Define a helper function to map p-values to stars
pval_to_stars <- function(p) {
if (p < 0.0001) {
return("****")
} else if (p < 0.001) {
return("***")
} else if (p < 0.01) {
return("**")
} else if (p < 0.05) {
return("*")
} else {
return("ns") # Not significant
}
}
# Perform a Kruskal-Wallis test separately for each gender
kw_res_F <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "F"))
kw_res_M <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "M"))
# Extract Kruskal-Wallis p-values
kw_pval_F <- kw_res_F$p.value
kw_pval_M <- kw_res_M$p.value
# Perform paired Wilcoxon tests for each gender with Holm's correction
paired_wilcox <- function(data, sex_group) {
data_sex <- data %>% filter(sex == sex_group)
pairwise_tests <- list(
visit1_vs_visit2 = wilcox.test(
x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
y = data_sex %>% filter(visit == "visit_2") %>% pull(count),
paired = TRUE
)$p.value,
visit1_vs_visit3 = wilcox.test(
x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
paired = TRUE
)$p.value,
visit2_vs_visit3 = wilcox.test(
x = data_sex %>% filter(visit == "visit_2") %>% pull(count),
y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
paired = TRUE
)$p.value
)
corrected_pvals <- p.adjust(unlist(pairwise_tests), method = "holm")
return(corrected_pvals)
}
wilcox_pvals_F <- paired_wilcox(gene_counts_long, "F")
wilcox_pvals_M <- paired_wilcox(gene_counts_long, "M")
# Map the Wilcoxon p-values to stars
wilcox_stars_F <- sapply(wilcox_pvals_F, pval_to_stars)
wilcox_stars_M <- sapply(wilcox_pvals_M, pval_to_stars)
# Create the boxplot with significance annotations
plot <- ggplot(gene_counts_long, aes(x = interaction(visit, sex), y = count, fill = visit)) +
geom_boxplot(outlier.shape = NA) + # Remove outliers
geom_jitter(color = "grey", size = 1, width = 0.2, alpha = 0.5) + # Add individual points
labs(title = "Gene Expression Across Visits and Genders", x = "Visit and Sex", y = "Gene Count") +
theme_minimal() +
theme(plot.title = element_text(size = 10)) +
geom_signif(
comparisons = list(
c("visit_1.F", "visit_2.F"),
c("visit_1.F", "visit_3.F"),
c("visit_2.F", "visit_3.F"),
c("visit_1.M", "visit_2.M"),
c("visit_1.M", "visit_3.M"),
c("visit_2.M", "visit_3.M")
),
annotations = c(
wilcox_stars_F["visit1_vs_visit2"],
wilcox_stars_F["visit1_vs_visit3"],
wilcox_stars_F["visit2_vs_visit3"],
wilcox_stars_M["visit1_vs_visit2"],
wilcox_stars_M["visit1_vs_visit3"],
wilcox_stars_M["visit2_vs_visit3"]
),
textsize = 4,
map_signif_level = FALSE
)
# Add Kruskal-Wallis p-values as text
plot <- plot +
annotate("text", x = 1, y = max(gene_counts_long$count) * 1.1,
label = paste("KW Female p-value:", format(kw_pval_F, digits = 3)),
hjust = 0, size = 3) +
annotate("text", x = 4, y = max(gene_counts_long$count) * 1.1,
label = paste("KW Male p-value:", format(kw_pval_M, digits = 3)),
hjust = 0, size = 3)
# Display the plot
print(plot)
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
con <- qread("con_lymphoid.qs", nthreads = 10)
anno.lymphoid <- qread("anno_lymphoid.qs")
spc <- con$getDatasetPerCell()
con$plotGraph(groups = anno.lymphoid,
plot.na = F,
size = 0.8,
alpha = 1,
embedding = "largeVis",
shuffle.colors = T,
show.labels = T,
font.size = 5,
show.legend = T,
mark.groups = T) +
labs(x = "largeVis1", y = "largeVis2", col = "") +
theme(line = element_blank()) +
scale_colour_manual(values = RColorBrewer::brewer.pal(9, "YlOrBr")[-c(1,2)][c(1,4,5,2,6)]) +
dotSize(3)
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
varToPlot <- spc %>%
as.character() %>%
strsplit("_vis") %>%
sget(1) %>%
setNames(names(spc)) %>%
.[names(.) %in% names(anno.lymphoid)]
con$plotGraph(groups = varToPlot,
plot.na = F,
size = 0.5,
alpha = 0.8,
embedding = "largeVis",
mark.groups = F,
show.labels = T,
show.legend = T,
title = "Donor") +
labs(x = "largeVis1", y = "largeVis2", col = "") +
theme(line = element_blank()) +
dotSize(3)
var <- meta$sex
varToPlot <- grepl.replace(spc %>%
as.character(),
meta$sample,
var) %>%
setNames(names(spc)) %>%
factor(labels = unique(var)) %>%
.[names(.) %in% names(anno.lymphoid)]
con$plotGraph(groups = varToPlot,
plot.na = F,
size = 0.5,
alpha = 0.8,
embedding = "largeVis",
mark.groups = F,
show.labels = T,
show.legend = T,
title = "Sex") +
labs(x = "largeVis1", y = "largeVis2", col = "") +
theme(line = element_blank()) +
dotSize(3) +
scale_color_manual(values = c("firebrick", "navy"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
varToPlot <- spc %>%
as.character() %>%
strsplit("_|!!") %>%
sget(3) %>%
setNames(names(spc)) %>%
factor() %>%
.[names(.) %in% names(anno.lymphoid)]
con$plotGraph(groups = varToPlot,
plot.na = F,
size = 0.5,
alpha = 0.8,
embedding = "largeVis",
mark.groups = F,
show.labels = T,
show.legend = T,
title = "Visit") +
labs(x = "largeVis1", y = "largeVis2", col = "") +
theme(line = element_blank()) +
dotSize(3) +
scale_color_manual(values = ggsci::pal_jama()(3))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
cm.merged <- con$getJointCountMatrix() %>%
.[rownames(.) %in% names(anno.lymphoid), ]
c("FOXP3", "CTLA4",
"IL7R", "CD4",
"CD40LG", "CD8A",
"GZMK", "GZMH",
"GNLY", "KLRD1",
"KLRF1", "NCAM1"
) %>%
sccore::dotPlot(.,
cm.merged,
anno.lymphoid %>%
factor(levels = c("Treg", "CD4 T cells", "CD8 T cells", "NK-like T cells", "NK")),
gene.order = .,
cols = c("white","yellow4"))
# Use Cacoa to calculate proportions
cao <- Cacoa$new(data.object = con.major,
cell.groups = anno.major,
ref.level = "vis2",
target.level = "vis3",
sample.groups = con.major$samples %>%
names() %>%
strsplit("_") %>%
sapply('[[', 3) %>%
gsub(1, 2, .) %>%
`names<-`(con.major$samples %>%
names()),
sample.groups.palette = ggsci::pal_jama()(7))
cao$sample.groups <- con.major$samples %>%
names() %>%
strsplit("_") %>%
sapply('[[', 3) %>%
`names<-`(con.major$samples %>%
names()) %>%
gsub("vis", "Visit ", .)
anno.comb <- anno.major[!names(anno.major) %in% names(anno.lymphoid)] %>%
{factor(c(., anno.lymphoid))}
p <- cao$plotCellGroupSizes(cell.groups = anno.comb,
show.significance = F,
filter.empty.cell.types = F)
plot.dat <- p$data %>%
filter(variable %in% levels(anno.lymphoid)) %>%
mutate(sex = rep(meta$sex, anno.lymphoid %>% levels() %>% length()),
variable = factor(variable)) %>%
mutate(sex_visit = paste(sex, group, sep = " ") %>% gsub("Vis", "vis", .),
variable = factor(variable, levels = c("Treg", "CD4 T cells", "CD8 T cells", "NK-like T cells", "NK")))
# Kruskal-Wallis
sex.stat <- plot.dat %$%
split(., sex) %>%
lapply(\(x) split(x, x$variable)) %>%
lapply(lapply, \(x) kruskal.test(x$value, g = x$group)$p.value) %>%
lapply(sapply, gtools::stars.pval) %>%
{lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>%
`names<-`(c("Female","Male")) %>%
lapply(\(x) data.frame(ct = names(x), sig = unname(x))) %>%
{lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>%
bind_rows() %>%
mutate(ct = factor(ct, levels = unique(plot.dat$variable))) %>%
arrange(ct) %>%
mutate(sig = gsub(".", "", sig, fixed = T))
# Wilcoxon
stat.test <- plot.dat %>%
dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
group_by(var, sex) %>%
wilcox_test(value~sex_visit, paired = T) %>%
mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>%
filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
add_xy_position(x = "var", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05) %>% # Adjust line position
arrange(desc(y.position))
# Plot
ggplot(plot.dat, aes(variable, value)) +
geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
color = "black", size = 0.5, alpha = 0.2) +
theme_bw() +
labs(x = "", y = "% cells per sample", fill = "") +
theme(line = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0, hjust = 1)) +
scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
geom_text(inherit.aes = F, data = sex.stat, aes(ct, y = 5, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 1, hide.ns = F) # Wilcoxon
con <- qread("con_myeloid.qs", nthreads = 10)
anno.immune <- qread("anno_immune.qs")
spc <- con$getDatasetPerCell()
varToPlot <- spc %>%
as.character() %>%
strsplit("_vis") %>%
sget(1) %>%
setNames(names(spc)) %>%
factor()
con$plotGraph(groups = varToPlot,
plot.na = F,
size = 0.3,
alpha = 0.1,
embedding = "largeVis",
mark.groups = F,
show.labels = T,
show.legend = T,
title = "Donor") +
labs(x = "largeVis1", y = "largeVis2", col = "") +
theme(line = element_blank()) +
dotSize(3)
var <- meta$sex
varToPlot <- grepl.replace(spc %>%
as.character(),
meta$sample,
var) %>%
setNames(names(spc)) %>%
factor(labels = unique(var))
con$plotGraph(groups = varToPlot,
plot.na = F,
size = 0.1,
alpha = 0.1,
embedding = "largeVis",
mark.groups = F,
show.labels = T,
show.legend = T,
title = "Sex") +
labs(x = "largeVis1", y = "largeVis2", col = "") +
theme(line = element_blank()) +
dotSize(3) +
scale_color_manual(values = c("firebrick", "navy"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
varToPlot <- spc %>%
as.character() %>%
strsplit("_|!!") %>%
sget(3) %>%
setNames(names(spc)) %>%
factor()
con$plotGraph(groups = varToPlot,
plot.na = F,
size = 0.3,
alpha = 0.1,
embedding = "largeVis",
mark.groups = F,
show.labels = T,
show.legend = T,
title = "Visit") +
labs(x = "largeVis1", y = "largeVis2", col = "") +
theme(line = element_blank()) +
dotSize(3) +
scale_color_manual(values = ggsci::pal_jama()(3))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
WATLAS <- read.delim("watlas_labels_l2_adipose_anno.tsv")
rownames(WATLAS)=WATLAS$X
# Define MACS
column_order <- c("LYVE1..Macrophage", "CYP27A1..Macrophage", "LPL..Macrophage")
row_order <- c("ATM", "mono/mac", "Early LAM", "LAM")
WATLAS_ordered <- WATLAS[, rev(column_order)] # Reorder columns
WATLAS_ordered <- WATLAS_ordered[row_order, ] # Reorder rows
corrplot(t(WATLAS_ordered), method="color", order = , col.lim=c(0.5, 1),
col = rev(COL2("RdBu",200)),
is.corr = F,
tl.col = "black",
tl.pos = "lower",
outline = "#C0C0C0",
addgrid.col= NA)
Calculate
anno.minor <- qread("anno_minor.qs")
spc <- con.major$getDatasetPerCell() %>%
.[match(names(anno.minor), names(.))]
anno.minor.sex <- anno.minor %>%
data.frame(anno = ., cid = names(.), donor = unname(spc)) %>%
mutate(sex = meta$sex[match(donor, meta$sample)]) %>%
mutate(anno_sex = paste(anno, sex, sep = "_")) %$%
setNames(anno_sex, cid) %>%
factor()
# Calculate
res.minor <- c("vis1", "vis2", "vis3") %>%
sapply(\(vis) {
con <- con.major$samples %>%
.[!grepl(vis, names(.))] %>%
Conos$new()
if (vis == "vis1") {
vis.ref <- "vis2"
vis.target <- "vis3"
} else {
vis.ref <- "vis1"
if (vis == "vis2") {
vis.target <- "vis3"
} else {
vis.target <- "vis2"
}
}
sample.groups <- con$samples %>%
names() %>%
`names<-`(ifelse(grepl(vis.ref, .), vis.ref, vis.target), .)
cao <- Cacoa$new(con,
sample.groups,
anno.minor.sex,
ref.level = vis.ref,
target.level = vis.target,
n.cores = 32)
cao$estimateCellLoadings()
return(cao)
}, simplify = F, USE.NAMES = T)
Plot
myeloid.pal <- c("#174D86", "#9EC3B5", "#356790", "#ADD0BA", "#628EA0", "#80A9AA", "#719CA5") %>%
setNames(levels(qread("/work/02_data/09_export/anno_immune.qs") %>%
.[!. %in% c("NKT","NK","Tem","Th1","Th","Treg","B-cells")] %>%
factor(levels = c("ATM", "mono/mac", "Early LAM", "LAM", "cDC1", "cDC2", "mDC")))) %>%
{setNames(c(rep(., 3)), c(names(.), paste(names(.), "Female", sep = "_"), paste(names(.), "Male", sep = "_")))} %>%
.[grepl("ATM|LAM|mono", names(.))]
visits <- c("vis1", "vis2", "vis3")
p <- visits %>%
lapply(\(vis) {
comp <- visits[!visits == vis]
dat.p <- res.minor[[vis]]$test.results$coda$padj
dat.plot <- res.minor[[vis]]$test.results$coda$loadings %>%
tibble::rownames_to_column(var = "anno") %>%
reshape2::melt(id.var = "anno") %>%
filter(anno %in% names(myeloid.pal))
yvar <- dat.plot %>%
group_by(anno) %>%
summarize(var = mean(value)) %>%
mutate(padj = dat.p[match(anno, names(dat.p))]) %>%
arrange(padj, desc(abs(var)))
dat.plot %<>%
mutate(anno = factor(anno, levels = rev(unique(yvar$anno))))
ymax <- dat.plot %>%
pull(value) %>%
abs() %>%
max()
out <- dat.plot %>%
ggplot(aes(anno, value, fill = anno)) +
geom_hline(yintercept = 0, col = "black") +
geom_jitter(alpha = 0.2, size = 0.3, col = "grey") +
geom_boxplot(outliers = F) +
coord_flip() +
ylim(c(-ymax, ymax)) +
theme_bw() +
theme(line = element_blank(),
axis.text = element_text(color = "black")) +
guides(fill = "none") +
labs(title = paste0(comp[1], " vs ", comp[2]), x = "", y = "Loadings [AU]") +
scale_fill_manual(values = myeloid.pal)
return(out)
})
p
## [[1]]
##
## [[2]]
##
## [[3]]
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
Statistics
for (vis in visits) {
comp <- visits[!visits == vis]
message("Comparison: ", comp)
print(res.minor[[vis]]$test.results$coda$padj %>% .[names(.) %in% names(myeloid.pal)])
}
## Comparison: vis2vis3
## Early LAM_Male LAM_Male LAM_Female Early LAM_Female
## 0.01731602 0.01731602 0.09647495 0.15112161
## mono/mac_Male mono/mac_Female ATM_Female ATM_Male
## 0.09590410 0.34909091 0.49350649 0.85248085
## Comparison: vis1vis3
## Early LAM_Male LAM_Male LAM_Female Early LAM_Female
## 0.00944510 0.01731602 0.03280930 0.16783217
## mono/mac_Male ATM_Female ATM_Male mono/mac_Female
## 0.44675325 0.82886003 0.82886003 0.98192004
## Comparison: vis1vis2
## Early LAM_Male ATM_Male Early LAM_Female LAM_Female
## 0.5085441 0.7316017 0.7713499 0.7713499
## LAM_Male ATM_Female mono/mac_Male mono/mac_Female
## 0.8410637 0.9660127 0.9890110 0.9890110
Load data
## Plot Individual genes from RNA-seq data
Bulk_norm_counts <- read.delim("Bulk_norm_counts.txt", h=T)
colData <- read.delim("Meta_Data_WL_Select.txt", h=T, dec = ",")
TREM2
gene_of_interest <- "TREM2"
# Subset norm_counts to get counts for the gene of interest
gene_counts <- Bulk_norm_counts[Bulk_norm_counts$hgnc_symbol == gene_of_interest, ]
rownames(gene_counts)=gene_counts$hgnc_symbol
gene_counts=gene_counts[4:96]
# Reshape gene_counts into long format and merge with colData
gene_counts_long <- gene_counts %>%
tibble::rownames_to_column(var = "gene") %>% # Create a 'gene' column from row names
tidyr::pivot_longer(cols = -gene, names_to = "recordid", values_to = "count")
gene_counts_long <- gene_counts_long %>%
left_join(colData, by = "recordid") # Assuming recordid matches between gene_counts and colData
# Define a helper function to map p-values to stars
pval_to_stars <- function(p) {
if (p < 0.0001) {
return("****")
} else if (p < 0.001) {
return("***")
} else if (p < 0.01) {
return("**")
} else if (p < 0.05) {
return("*")
} else {
return("ns") # Not significant
}
}
# Perform a Kruskal-Wallis test separately for each gender
kw_res_F <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "F"))
kw_res_M <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "M"))
# Extract Kruskal-Wallis p-values
kw_pval_F <- kw_res_F$p.value
kw_pval_M <- kw_res_M$p.value
# Perform paired Wilcoxon tests for each gender with Holm's correction
paired_wilcox <- function(data, sex_group) {
data_sex <- data %>% filter(sex == sex_group)
pairwise_tests <- list(
visit1_vs_visit2 = wilcox.test(
x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
y = data_sex %>% filter(visit == "visit_2") %>% pull(count),
paired = TRUE
)$p.value,
visit1_vs_visit3 = wilcox.test(
x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
paired = TRUE
)$p.value,
visit2_vs_visit3 = wilcox.test(
x = data_sex %>% filter(visit == "visit_2") %>% pull(count),
y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
paired = TRUE
)$p.value
)
corrected_pvals <- p.adjust(unlist(pairwise_tests), method = "holm")
return(corrected_pvals)
}
wilcox_pvals_F <- paired_wilcox(gene_counts_long, "F")
wilcox_pvals_M <- paired_wilcox(gene_counts_long, "M")
# Map the Wilcoxon p-values to stars
wilcox_stars_F <- sapply(wilcox_pvals_F, pval_to_stars)
wilcox_stars_M <- sapply(wilcox_pvals_M, pval_to_stars)
# Create the boxplot with significance annotations
plot <- ggplot(gene_counts_long, aes(x = interaction(visit, sex), y = count, fill = visit)) +
geom_boxplot(outlier.shape = NA) + # Remove outliers
geom_jitter(color = "grey", size = 1, width = 0.2, alpha = 0.5) + # Add individual points
labs(title = "Gene Expression Across Visits and Genders", x = "Visit and Sex", y = "Gene Count") +
theme_minimal() +
theme(plot.title = element_text(size = 10)) +
geom_signif(
comparisons = list(
c("visit_1.F", "visit_2.F"),
c("visit_1.F", "visit_3.F"),
c("visit_2.F", "visit_3.F"),
c("visit_1.M", "visit_2.M"),
c("visit_1.M", "visit_3.M"),
c("visit_2.M", "visit_3.M")
),
annotations = c(
wilcox_stars_F["visit1_vs_visit2"],
wilcox_stars_F["visit1_vs_visit3"],
wilcox_stars_F["visit2_vs_visit3"],
wilcox_stars_M["visit1_vs_visit2"],
wilcox_stars_M["visit1_vs_visit3"],
wilcox_stars_M["visit2_vs_visit3"]
),
textsize = 4,
map_signif_level = FALSE
)
# Add Kruskal-Wallis p-values as text
plot <- plot +
annotate("text", x = 1, y = max(gene_counts_long$count) * 1.1,
label = paste("KW Female p-value:", format(kw_pval_F, digits = 3)),
hjust = 0, size = 3) +
annotate("text", x = 4, y = max(gene_counts_long$count) * 1.1,
label = paste("KW Male p-value:", format(kw_pval_M, digits = 3)),
hjust = 0, size = 3)
# Display the plot
print(plot)
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
SPP1
gene_of_interest <- "SPP1"
# Subset norm_counts to get counts for the gene of interest
gene_counts <- Bulk_norm_counts[Bulk_norm_counts$hgnc_symbol == gene_of_interest, ]
rownames(gene_counts)=gene_counts$hgnc_symbol
gene_counts=gene_counts[4:96]
# Reshape gene_counts into long format and merge with colData
gene_counts_long <- gene_counts %>%
tibble::rownames_to_column(var = "gene") %>% # Create a 'gene' column from row names
tidyr::pivot_longer(cols = -gene, names_to = "recordid", values_to = "count")
gene_counts_long <- gene_counts_long %>%
left_join(colData, by = "recordid") # Assuming recordid matches between gene_counts and colData
# Define a helper function to map p-values to stars
pval_to_stars <- function(p) {
if (p < 0.0001) {
return("****")
} else if (p < 0.001) {
return("***")
} else if (p < 0.01) {
return("**")
} else if (p < 0.05) {
return("*")
} else {
return("ns") # Not significant
}
}
# Perform a Kruskal-Wallis test separately for each gender
kw_res_F <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "F"))
kw_res_M <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "M"))
# Extract Kruskal-Wallis p-values
kw_pval_F <- kw_res_F$p.value
kw_pval_M <- kw_res_M$p.value
# Perform paired Wilcoxon tests for each gender with Holm's correction
paired_wilcox <- function(data, sex_group) {
data_sex <- data %>% filter(sex == sex_group)
pairwise_tests <- list(
visit1_vs_visit2 = wilcox.test(
x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
y = data_sex %>% filter(visit == "visit_2") %>% pull(count),
paired = TRUE
)$p.value,
visit1_vs_visit3 = wilcox.test(
x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
paired = TRUE
)$p.value,
visit2_vs_visit3 = wilcox.test(
x = data_sex %>% filter(visit == "visit_2") %>% pull(count),
y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
paired = TRUE
)$p.value
)
corrected_pvals <- p.adjust(unlist(pairwise_tests), method = "holm")
return(corrected_pvals)
}
wilcox_pvals_F <- paired_wilcox(gene_counts_long, "F")
wilcox_pvals_M <- paired_wilcox(gene_counts_long, "M")
# Map the Wilcoxon p-values to stars
wilcox_stars_F <- sapply(wilcox_pvals_F, pval_to_stars)
wilcox_stars_M <- sapply(wilcox_pvals_M, pval_to_stars)
# Create the boxplot with significance annotations
plot <- ggplot(gene_counts_long, aes(x = interaction(visit, sex), y = count, fill = visit)) +
geom_boxplot(outlier.shape = NA) + # Remove outliers
geom_jitter(color = "grey", size = 1, width = 0.2, alpha = 0.5) + # Add individual points
labs(title = "Gene Expression Across Visits and Genders", x = "Visit and Sex", y = "Gene Count") +
theme_minimal() +
theme(plot.title = element_text(size = 10)) +
geom_signif(
comparisons = list(
c("visit_1.F", "visit_2.F"),
c("visit_1.F", "visit_3.F"),
c("visit_2.F", "visit_3.F"),
c("visit_1.M", "visit_2.M"),
c("visit_1.M", "visit_3.M"),
c("visit_2.M", "visit_3.M")
),
annotations = c(
wilcox_stars_F["visit1_vs_visit2"],
wilcox_stars_F["visit1_vs_visit3"],
wilcox_stars_F["visit2_vs_visit3"],
wilcox_stars_M["visit1_vs_visit2"],
wilcox_stars_M["visit1_vs_visit3"],
wilcox_stars_M["visit2_vs_visit3"]
),
textsize = 4,
map_signif_level = FALSE
)
# Add Kruskal-Wallis p-values as text
plot <- plot +
annotate("text", x = 1, y = max(gene_counts_long$count) * 1.1,
label = paste("KW Female p-value:", format(kw_pval_F, digits = 3)),
hjust = 0, size = 3) +
annotate("text", x = 4, y = max(gene_counts_long$count) * 1.1,
label = paste("KW Male p-value:", format(kw_pval_M, digits = 3)),
hjust = 0, size = 3)
# Display the plot
print(plot)
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
MMP9
gene_of_interest <- "MMP9"
# Subset norm_counts to get counts for the gene of interest
gene_counts <- Bulk_norm_counts[Bulk_norm_counts$hgnc_symbol == gene_of_interest, ]
rownames(gene_counts)=gene_counts$hgnc_symbol
gene_counts=gene_counts[4:96]
# Reshape gene_counts into long format and merge with colData
gene_counts_long <- gene_counts %>%
tibble::rownames_to_column(var = "gene") %>% # Create a 'gene' column from row names
tidyr::pivot_longer(cols = -gene, names_to = "recordid", values_to = "count")
gene_counts_long <- gene_counts_long %>%
left_join(colData, by = "recordid") # Assuming recordid matches between gene_counts and colData
# Define a helper function to map p-values to stars
pval_to_stars <- function(p) {
if (p < 0.0001) {
return("****")
} else if (p < 0.001) {
return("***")
} else if (p < 0.01) {
return("**")
} else if (p < 0.05) {
return("*")
} else {
return("ns") # Not significant
}
}
# Perform a Kruskal-Wallis test separately for each gender
kw_res_F <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "F"))
kw_res_M <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "M"))
# Extract Kruskal-Wallis p-values
kw_pval_F <- kw_res_F$p.value
kw_pval_M <- kw_res_M$p.value
# Perform paired Wilcoxon tests for each gender with Holm's correction
paired_wilcox <- function(data, sex_group) {
data_sex <- data %>% filter(sex == sex_group)
pairwise_tests <- list(
visit1_vs_visit2 = wilcox.test(
x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
y = data_sex %>% filter(visit == "visit_2") %>% pull(count),
paired = TRUE
)$p.value,
visit1_vs_visit3 = wilcox.test(
x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
paired = TRUE
)$p.value,
visit2_vs_visit3 = wilcox.test(
x = data_sex %>% filter(visit == "visit_2") %>% pull(count),
y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
paired = TRUE
)$p.value
)
corrected_pvals <- p.adjust(unlist(pairwise_tests), method = "holm")
return(corrected_pvals)
}
wilcox_pvals_F <- paired_wilcox(gene_counts_long, "F")
wilcox_pvals_M <- paired_wilcox(gene_counts_long, "M")
# Map the Wilcoxon p-values to stars
wilcox_stars_F <- sapply(wilcox_pvals_F, pval_to_stars)
wilcox_stars_M <- sapply(wilcox_pvals_M, pval_to_stars)
# Create the boxplot with significance annotations
plot <- ggplot(gene_counts_long, aes(x = interaction(visit, sex), y = count, fill = visit)) +
geom_boxplot(outlier.shape = NA) + # Remove outliers
geom_jitter(color = "grey", size = 1, width = 0.2, alpha = 0.5) + # Add individual points
labs(title = "Gene Expression Across Visits and Genders", x = "Visit and Sex", y = "Gene Count") +
theme_minimal() +
theme(plot.title = element_text(size = 10)) +
geom_signif(
comparisons = list(
c("visit_1.F", "visit_2.F"),
c("visit_1.F", "visit_3.F"),
c("visit_2.F", "visit_3.F"),
c("visit_1.M", "visit_2.M"),
c("visit_1.M", "visit_3.M"),
c("visit_2.M", "visit_3.M")
),
annotations = c(
wilcox_stars_F["visit1_vs_visit2"],
wilcox_stars_F["visit1_vs_visit3"],
wilcox_stars_F["visit2_vs_visit3"],
wilcox_stars_M["visit1_vs_visit2"],
wilcox_stars_M["visit1_vs_visit3"],
wilcox_stars_M["visit2_vs_visit3"]
),
textsize = 4,
map_signif_level = FALSE
)
# Add Kruskal-Wallis p-values as text
plot <- plot +
annotate("text", x = 1, y = max(gene_counts_long$count) * 1.1,
label = paste("KW Female p-value:", format(kw_pval_F, digits = 3)),
hjust = 0, size = 3) +
annotate("text", x = 4, y = max(gene_counts_long$count) * 1.1,
label = paste("KW Male p-value:", format(kw_pval_M, digits = 3)),
hjust = 0, size = 3)
# Display the plot
print(plot)
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
# Metadata
metadata.all <- read.delim("Meta_Data_WL_Select.txt", h=T, dec = ",") %>%
mutate(sex = factor(sex, labels = c("Female", "Male")),
donor = gsub("D", "Donor_", recordid) %>%
gsub("V", "vis", .))
# Bulk data
cm.bulk <- read.delim("Bulk_norm_counts.txt", header = T) %>%
.[match(unique(.$hgnc_symbol), .$hgnc_symbol), ] %>%
`rownames<-`(.$hgnc_symbol) %>%
select(-hgnc_symbol, -ensembl_gene_id, -entrezgene_id) %>%
scale()
colnames(cm.bulk) %<>%
gsub("D", "Donor_", .) %>%
gsub("V", "vis", .)
See Figure 3d for model setup
# Use Cacoa to calculate proportions
cao <- Cacoa$new(data.object = con.major,
cell.groups = anno.major,
ref.level = "vis2",
target.level = "vis3",
sample.groups = con.major$samples %>%
names() %>%
strsplit("_") %>%
sapply('[[', 3) %>%
gsub(1, 2, .) %>%
`names<-`(con.major$samples %>%
names()),
sample.groups.palette = ggsci::pal_jama()(7))
cao$sample.groups <- con.major$samples %>%
names() %>%
strsplit("_") %>%
sapply('[[', 3) %>%
`names<-`(con.major$samples %>%
names()) %>%
gsub("vis", "Visit ", .)
anno.comb <- anno.major[!names(anno.major) %in% names(anno.immune)] %>%
{factor(c(., anno.immune))}
# Get proportions
proportions <- cao$.__enclos_env__$private$extractCodaData(cell.groups = anno.comb, ret.groups = F) %>%
{100 * ./rowSums(.)} %>%
as.data.frame() %$%
setNames(`Early LAM`, rownames(.))
tmp <- qread("eLAM_model.qs")
lasso_model <- tmp$lasso_model
best_lambda <- tmp$lasso_cv$lambda.min
cm.bulk.mat <- cm.bulk %>%
as.data.frame() %>%
filter(rownames(.) %in% rownames(coef(lasso_model, s = best_lambda))) %>%
as.matrix() %>%
t()
# Now, train a final model on all data for visualization and gene selection
cm.test <- cm.bulk.mat %>%
.[rownames(.) %in% names(proportions), ]
lasso_cv <- cv.glmnet(cm.test, proportions, alpha = 1, nfolds = 40)
## Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
## fold
best_lambda <- lasso_cv$lambda.min
lasso_model <- glmnet(cm.test, proportions, alpha = 1, lambda = best_lambda)
# Predict on all samples using the final model
lasso.predict.all <- predict(lasso_model, newx = cm.test, s = best_lambda)
r2.all <- cor(proportions, lasso.predict.all)^2
p <- data.frame(Proportions = proportions,
Predictions = unname(lasso.predict.all)) %>%
ggplot(aes(Proportions, Predictions)) +
geom_point() +
theme_bw() +
theme(axis.text = element_text(colour = "black")) +
labs(title = paste0("r2 = ", formatC(r2.all, digits = 3)), y = "Predicted proportions (%)", x = "Proportions (%)") +
geom_abline(intercept = 0, slope = 1, colour = "firebrick")
p
See Figure 3e for model setup
# Use Cacoa to calculate proportions
cao <- Cacoa$new(data.object = con.major,
cell.groups = anno.major,
ref.level = "vis2",
target.level = "vis3",
sample.groups = con.major$samples %>%
names() %>%
strsplit("_") %>%
sapply('[[', 3) %>%
gsub(1, 2, .) %>%
`names<-`(con.major$samples %>%
names()),
sample.groups.palette = ggsci::pal_jama()(7))
cao$sample.groups <- con.major$samples %>%
names() %>%
strsplit("_") %>%
sapply('[[', 3) %>%
`names<-`(con.major$samples %>%
names()) %>%
gsub("vis", "Visit ", .)
anno.comb <- anno.major[!names(anno.major) %in% names(anno.immune)] %>%
{factor(c(., anno.immune))}
# Get proportions
proportions <- cao$.__enclos_env__$private$extractCodaData(cell.groups = anno.comb, ret.groups = F) %>%
{100 * ./rowSums(.)} %>%
as.data.frame() %$%
setNames(LAM, rownames(.))
tmp <- qread("LAM_model.qs")
lasso_model <- tmp$lasso_model
best_lambda <- tmp$lasso_cv$lambda.min
cm.bulk.mat <- cm.bulk %>%
as.data.frame() %>%
filter(rownames(.) %in% rownames(coef(lasso_model, s = best_lambda))) %>%
as.matrix() %>%
t()
# Now, train a final model on all data for visualization and gene selection
cm.test <- cm.bulk.mat %>%
.[rownames(.) %in% names(proportions), ]
lasso_cv <- cv.glmnet(cm.test, proportions, alpha = 1, nfolds = 40)
## Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
## fold
best_lambda <- lasso_cv$lambda.min
lasso_model <- glmnet(cm.test, proportions, alpha = 1, lambda = best_lambda)
# Predict on all samples using the final model
lasso.predict.all <- predict(lasso_model, newx = cm.test, s = best_lambda)
r2.all <- cor(proportions, lasso.predict.all)^2
data.frame(Proportions = proportions,
Predictions = unname(lasso.predict.all)) %>%
ggplot(aes(Proportions, Predictions)) +
geom_point() +
theme_bw() +
theme(axis.text = element_text(colour = "black")) +
labs(title = paste0("r2 = ", formatC(r2.all, digits = 3)), y = "Predicted proportions (%)", x = "Proportions (%)") +
geom_abline(intercept = 0, slope = 1, colour = "firebrick")
watlas_data <- read.delim("watlas_mapped_percent.tsv", stringsAsFactors = FALSE)
watlas_data_count <- read.delim("watlas_mapped_counts.tsv", stringsAsFactors = FALSE)
watlas_data_count <- watlas_data_count %>%
rowwise() %>%
mutate(row_sum = sum(c_across(where(is.numeric)), na.rm = TRUE)) # Sum across numeric columns
valid_samples <- watlas_data_count %>%
filter(row_sum >= 1000) %>%
pull(sample) # Extracts the valid sample IDs
filtered_data <- watlas_data %>%
filter(type == "sn", tissue == "AT", depot == "SAT", grepl("Hs", sample))
filtered_data <- filtered_data %>%
filter(sample %in% valid_samples)
long_data_mac <- filtered_data %>%
select(wtstatus, ATM, Early.LAM, LAM, mono.mac) %>% # Select relevant columns
tidyr::pivot_longer(cols = -wtstatus, names_to = "Cell_Type", values_to = "Percentage")
long_data_mac %>%
ggplot(aes(x = Cell_Type, y = Percentage, fill = wtstatus)) +
geom_boxplot(position = position_dodge(width = 0.8)) +
geom_point(aes(group = wtstatus), position = position_jitterdodge(jitter.width = 0.3, dodge.width = 0.8), alpha = 0.3, ) +
theme_bw() +
labs(title = "Comparison of Cell Types in Lean vs Obese",
x = "Cell Type",
y = "Percentage",
fill = "") +
scale_fill_manual(values = c(lean = "#26547c", obese = "#ef476f")) +
stat_compare_means(aes(group = wtstatus), method = "wilcox.test", label = "p.signif", ) +
theme(axis.text = element_text(color = "black"),
line = element_blank())
We integrated our data with the data from Hinte et al. that were graciously provided by the authors. As these data are not public, here we will only show how we plotted the data.
tmp <- con.integration$clusters$loft_transfer_minor$groups %>%
{data.frame(cid = names(.), anno = unname(.))} %>%
filter(!grepl("!!", cid)) %>%
mutate(sample = con.integration$clusters$hinte_condition$groups[match(cid, names(con.integration$clusters$hinte_condition$groups))])
tmp.sum <- tmp %>%
group_by(sample) %>%
summarize(total = n()) %>%
split(., .$sample)
tmp.sum.ct <- tmp %>%
group_by(sample, anno) %>%
summarize(total = n()) %>%
split(., .$sample)
## `summarise()` has grouped output by 'sample'. You can override using the
## `.groups` argument.
tmp.prop <- Map(\(ct, tot, an) setNames(ct$total / tot$total, an), ct = tmp.sum.ct, tot = tmp.sum, an = lapply(tmp.sum.ct, pull, anno))
df.plot <- tmp.prop %>%
{Map(\(nn, x) data.frame(sample = nn, anno = names(x), value = unname(x)), nn = names(.), x = .)} %>%
bind_rows() %>%
mutate(anno = factor(anno, levels = unique(anno)),
type = sapply(sample, \(x) if (grepl("lean", x)) "Lean" else if (grepl("t0", x)) "Obese" else "Regressed"))
cts <- c("ATM", "mono/mac", "Early LAM", "LAM")
df.plot %>%
filter(anno %in% cts) %>%
mutate(anno = factor(anno, levels = cts)) %>%
ggplot(aes(anno, value)) +
stat_summary(mapping = aes(fill = type), geom = "bar", fun = "mean", position = "dodge") +
geom_point(aes(col = type), position = position_dodge(width = 0.9)) +
theme_bw() +
labs(x = "", y = "% proportion per sample", title = "Loft to Hinte transfer, minor annotation", col = "", fill = "") +
theme(axis.text = element_text(colour = "black"),
line = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
scale_fill_manual(values = brewer.pal(9, "Greens")[c(3, 6, 9)]) +
scale_color_manual(values = brewer.pal(9, "Purples")[c(9, 7, 5)])
Preparations
Load data for S3H-J first !!!
cts <- c("Early LAM", "LAM", "mono/mac", "ATM")
cm.merged <- con$getJointCountMatrix(raw = T)
# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[rownames(cm.merged)]
anno.subtype <- anno.immune[anno.immune %in% cts] %>%
.[!is.na(.)] %>%
factor()
idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())
anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>%
{`names<-`(as.character(.), names(.))}
anno.final <- paste0(anno.donor,"!!",anno.subtype) %>%
`names<-`(anno.donor %>% names()) %>%
gsub("Foam-like Mac", "Early LAM", .)
cm.pseudo.tmp <- sccore::collapseCellsByType(cm.merged,
groups = anno.final, min.cell.count = 1) %>%
t() %>%
apply(2, as, "integer")
cm.pseudo <- cm.pseudo.tmp %>%
DESeq2::DESeqDataSetFromMatrix(.,
colnames(.) %>%
strsplit("_|!!") %>%
sget(3) %>%
data.frame() %>%
`dimnames<-`(list(colnames(cm.pseudo.tmp), "group")),
design = ~ group) %>%
DESeq2::estimateSizeFactors() %>%
DESeq2::counts(normalized = T) %>%
t()
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
plot.dat <- cm.pseudo %>%
.[, colnames(.) %in% c("HLA-B")] %>%
as.data.frame() %>%
tibble::rownames_to_column(var = "sample") %>%
mutate(visit = strsplit(sample, "!!|_") %>%
sget(3) %>%
factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
anno = strsplit(sample, "!!") %>%
sget(2) %>%
factor(levels = c("ATM", "mono/mac", "Early LAM", "LAM")),
donor = strsplit(sample, "!!") %>%
sget(1)) %>%
reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
mutate(sex = meta$sex[match(donor, meta$sample)]) %>%
mutate(sex_visit = paste(sex, visit, sep = " "))
# Kruskal-Wallis
sex.stat <- plot.dat %$%
split(., sex) %>%
lapply(\(x) split(x, x$anno)) %>%
lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>%
lapply(sapply, gtools::stars.pval) %>%
{lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>%
`names<-`(c("Female","Male")) %>%
lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>%
{lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>%
bind_rows() %>%
mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>%
arrange(anno) %>%
mutate(sig = gsub(".", "", sig, fixed = T))
# Wilcoxon
stat.test <- plot.dat %>%
dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
group_by(anno, sex) %>%
wilcox_test(value~sex_visit, paired = F) %>%
mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>%
filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)
plot.dat %>%
ggplot(aes(anno, value)) +
geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
color = "black", size = 0.5, alpha = 0.2) +
theme_bw() +
labs(x = "", y = "Normalized pseudobulk expression", title = "HLA-B") +
theme(line = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
axis.text = element_text(color = "black")) +
scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 400, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) # Wilcoxon
Load and prepare
cm.merged.raw <- con.integration$getJointCountMatrix(raw = T)
cts <- c("ATM", "mono/mac", "Early LAM", "LAM")
anno <- getConosCluster(con.integration, "loft_transfer_minor")
anno.subtype <- anno[anno %in% cts]
# Create sample-wise annotation
anno.donor <- con.integration$getDatasetPerCell()[rownames(cm.merged.raw)] %>%
.[!grepl("!!", names(.))] %>%
factor()
idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())
anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>%
{`names<-`(as.character(.), names(.))}
anno.final <- paste0(anno.donor,"!!",anno.subtype) %>%
`names<-`(anno.donor %>% names())
cm.pseudo.tmp <- sccore::collapseCellsByType(cm.merged.raw,
groups = anno.final, min.cell.count = 1) %>%
t() %>%
apply(2, as, "integer")
cm.pseudo <- cm.pseudo.tmp %>%
DESeq2::DESeqDataSetFromMatrix(.,
colnames(.) %>%
strsplit("!!") %>%
sget(1) %>%
data.frame() %>%
`dimnames<-`(list(colnames(cm.pseudo.tmp), "group")),
design = ~ group) %>%
DESeq2::estimateSizeFactors() %>%
DESeq2::counts(normalized = TRUE)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
idx <- cm.pseudo %>%
colnames() %>%
data.frame(id = .) %>%
mutate(type = strsplit(id, "_|!!") %>%
sapply(\(x) if (length(x) == 3) x[2] else x[1]),
ct = strsplit(id, "!!") %>% sget(2) %>%
factor(levels = cts)) %>%
mutate(ord = order(ct, type))
genes.order <- read.delim("Macrophages_DEG_heatmap_genes.tsv") %$%
setNames(gene, cluster) %>%
.[order(names(.))]
x <- cm.pseudo %>%
.[genes.order, ] %>%
.[rowSums(.) > 0, idx$ord] %>%
Matrix::t() %>%
scale() %>%
Matrix::t()
row.order <- names(genes.order)[genes.order %in% rownames(x)]
groups <- colnames(x) %>%
`names<-`(colnames(x))
# Limit scale
x[x > 2] <- 2
x[x < -1.5] <- -1.5
# Create top annotation
tannot <- idx %$%
.[match(colnames(x), id), ] %>%
select(type) %>%
dplyr::rename(Type = type) %$%
{HeatmapAnnotation(df=.,
border=T,
col=list(Type = setNames(brewer.pal(9, "Greens")[c(3, 6, 9)], unique(Type))),
show_legend=T)}
# Create color palette
pal <- colorRampPalette(c('navy','grey95','firebrick'))(1024)
labeled.gene.subset <- NULL
# Plot
set.seed(1337)
ha <- ComplexHeatmap::Heatmap(x,
name='Expression',
col=pal,
cluster_columns=FALSE,
cluster_rows = F,
show_row_names=FALSE,
show_column_names=FALSE,
top_annotation=tannot,
left_annotation=NULL,
border=TRUE,
show_column_dend = FALSE,
show_row_dend = FALSE,
row_split = row.order,
# row_km = 5,
# row_km_repeats = 500,
column_split = idx %$%
.[match(colnames(x), id), ] %>%
pull(ct) %>%
factor(levels = cts) %>%
levels() %>%
{c("C1", rep(.[1], 4), "C2", rep(.[2], 4), "C3", rep(.[3], 4), "C4", rep(.[4], 4))} %>%
factor(., levels = unique(.)),
column_gap = unit(rep(0:1, 4), "mm") %>% .[-length(.)],
border_gp = gpar(col = "black", lwd = 1.5))
ht = draw(ha)
We integrated our data with the data from Hinte et al. that were graciously provided by the authors. As these data are not public, here we will only show how we plotted the data.
cts <- c("Early LAM", "LAM", "mono/mac", "ATM")
anno <- getConosCluster(con.integration, "loft_transfer_minor") %>%
.[. %in% cts] %>%
factor()
cm.clean <- con.integration$getJointCountMatrix(raw = T) %>%
.[rownames(.) %in% names(anno), ]
spc <- con.integration$getDatasetPerCell()
markers <- con$getDifferentialGenes(groups = anno,
z.threshold = 1,
upregulated.only = TRUE,
append.specificity.metrics = TRUE,
append.auc = TRUE)
## Estimating marker genes per sample
## Aggregating marker genes
## Estimating specificity metrics
## All done!
genes <- markers$LAM %>%
filter(AUC > 0.8) %>%
pull(Gene)
lam.scores <- list(LAM.activity = genes)
seu_obj <- CreateSeuratObject(counts = cm.clean %>% Matrix::t() %>% drop0(),
min.cells = 0,
min.features = 0)
m.scores <- UCell::AddModuleScore_UCell(seu_obj, features = lam.scores, ncores = 32)
tmp <- reshape2::melt(m.scores$LAM.activity_UCell) %>%
mutate(anno = anno[rownames(.)],
group = spc[rownames(.)] %>% as.character() %>% strsplit("_") %>% sapply(\(x) if (length(x) == 3) x[3] else if (length(x) == 2) paste(x[1], x[2], sep = "_") else x[1])) %>%
mutate(type = grepl.replace(group, unique(group), c("obese", "short-term WL", "regressed", "lean", "obese", "regressed", "obese", "regressed"))) %>%
mutate(anno = anno[rownames(.)] %>% factor(levels = c("ATM", "mono/mac", "Early LAM", "LAM"))) %>%
mutate(type_group = paste(type, group, sep = "_") %>% factor(levels = c("obese_rg_t0", "obese_st_t0", "obese_vis1", "short-term WL_vis2", "lean_lean", "regressed_rg_t1", "regressed_st_t1", "regressed_vis3"))) %>%
group_by(group, type, anno) %>%
summarize(value = mean(value)) %>%
mutate(group = factor(group)) %>%
arrange(group, anno)
## `summarise()` has grouped output by 'group', 'type'. You can override using the
## `.groups` argument.
dat.plot1 <- tmp %>%
filter(!group %in% c("lean")) %$%
split(., type) %>%
{rbind(data.frame(.[[2]], diff = .[[2]]$value - .[[1]]$value), data.frame(.[[3]], diff = .[[3]]$value - .[[1]]$value[.[[1]]$group == "vis1"]))} %>%
mutate(group = factor(group, labels = c("Hinte_rg", "Hinte_st", "Loft_short-term WL", "Loft_long-term WL"))) %>%
filter(group %in% c("Hinte_rg", "Hinte_st")) %>%
mutate(group = factor(group),
group2 = "t1-t0")
dat.plot2 <- tmp %>%
filter(group %in% c("lean", "rg_t0", "st_t0")) %>%
mutate(group = factor(group)) %$%
split(., group) %>%
{rbind(data.frame(.[[2]], diff = .[[1]]$value - .[[2]]$value), data.frame(.[[3]], diff = .[[1]]$value - .[[3]]$value))} %>%
mutate(group = factor(group, labels = c("lean_rg", "lean_st"))) %>%
mutate(group = factor(group),
group2 = "lean-t0")
dat.plot <- rbind(dat.plot1, dat.plot2) %>%
mutate(group = as.character(group) %>% strsplit("_") %>% sget(2) %>% factor())
dat.plot %>%
ggplot(aes(anno, diff, fill = group2)) +
stat_summary(geom = "bar", fun = "mean", position = "dodge") +
geom_point(aes(col = group, group = group2), position = position_dodge(width = 0.8)) +
geom_hline(yintercept = 0, color = "black") +
theme_bw() +
labs(x = "", y = "Difference in mean UCell module scores for\ntop gene marker expression", title = "LAM top markers module scores", fill = "Difference", col = "Dataset") +
theme(line = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
axis.text = element_text(color = "black")) +
scale_fill_manual(values = c("grey70", "grey40")) +
scale_color_manual(values = brewer.pal(9, "Purples")[c(5,9)])
con <- qread("con_aspc.qs", nthreads = 10)
anno.aspc <- qread("anno_aspc.qs")
spc <- con$getDatasetPerCell()
varToPlot <- spc %>%
as.character() %>%
strsplit("_vis") %>%
sget(1) %>%
setNames(names(spc)) %>%
factor()
con$plotGraph(groups = varToPlot,
plot.na = F,
size = 0.3,
alpha = 0.1,
embedding = "largeVis",
mark.groups = F,
show.labels = T,
show.legend = T,
title = "Donor") +
labs(x = "largeVis1", y = "largeVis2", col = "") +
theme(line = element_blank()) +
dotSize(3)
var <- meta$sex
varToPlot <- grepl.replace(spc %>%
as.character(),
meta$sample,
var) %>%
setNames(names(spc)) %>%
factor(labels = unique(var))
con$plotGraph(groups = varToPlot,
plot.na = F,
size = 0.1,
alpha = 0.1,
embedding = "largeVis",
mark.groups = F,
show.labels = T,
show.legend = T,
title = "Sex") +
labs(x = "largeVis1", y = "largeVis2", col = "") +
theme(line = element_blank()) +
dotSize(3) +
scale_color_manual(values = c("firebrick", "navy"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
varToPlot <- spc %>%
as.character() %>%
strsplit("_|!!") %>%
sget(3) %>%
setNames(names(spc)) %>%
factor()
con$plotGraph(groups = varToPlot,
plot.na = F,
size = 0.3,
alpha = 0.1,
embedding = "largeVis",
mark.groups = F,
show.labels = T,
show.legend = T,
title = "Visit") +
labs(x = "largeVis1", y = "largeVis2", col = "") +
theme(line = element_blank()) +
dotSize(3) +
scale_color_manual(values = ggsci::pal_jama()(3))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
FAP_sub <- read.delim("fap.tsv")
rownames(FAP_sub)=FAP_sub$anno
FAP_sub=FAP_sub[2:5]
# Define ASPCs
column_order <- c("FAP2","FAP1", "FAP4","FAP3")
row_order <- c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")
FAP_ordered <- FAP_sub[, rev(column_order)] # Reorder columns
FAP_ordered <- FAP_ordered[row_order, ] # Reorder rows
corrplot(t(t(FAP_ordered)), method="color", order = , col.lim=c(-1, 1.09),
col = rev(COL2("RdBu",200)),
is.corr = F,
tl.col = "black",
tl.pos = "lower",
outline = "#C0C0C0",
addgrid.col= NA)
WATLAS <- read.delim("watlas_labels_l2_adipose_anno.tsv")
rownames(WATLAS)=WATLAS$X
# Define ASPCs
column_order <- c("DPP4..FAP", "CXCL14..FAP", "PPARG..FAP", "ICAM1..FAP")
row_order <- c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")
WATLAS_ordered <- WATLAS[, rev(column_order)] # Reorder columns
WATLAS_ordered <- WATLAS_ordered[row_order, ] # Reorder rows
corrplot(t(WATLAS_ordered), method="color", order = , col.lim=c(0.4, 1),
col = rev(COL2("RdBu",200)),
is.corr = F,
tl.col = "black",
tl.pos = "lower",
outline = "#C0C0C0",
addgrid.col= NA)
Calculate
anno.minor <- qread("anno_minor.qs")
spc <- con.major$getDatasetPerCell() %>%
.[match(names(anno.minor), names(.))]
anno.minor.sex <- anno.minor %>%
data.frame(anno = ., cid = names(.), donor = unname(spc)) %>%
mutate(sex = meta$sex[match(donor, meta$sample)]) %>%
mutate(anno_sex = paste(anno, sex, sep = "_")) %$%
setNames(anno_sex, cid) %>%
factor()
# Calculate
res.minor <- c("vis1", "vis2", "vis3") %>%
sapply(\(vis) {
con <- con.major$samples %>%
.[!grepl(vis, names(.))] %>%
Conos$new()
if (vis == "vis1") {
vis.ref <- "vis2"
vis.target <- "vis3"
} else {
vis.ref <- "vis1"
if (vis == "vis2") {
vis.target <- "vis3"
} else {
vis.target <- "vis2"
}
}
sample.groups <- con$samples %>%
names() %>%
`names<-`(ifelse(grepl(vis.ref, .), vis.ref, vis.target), .)
cao <- Cacoa$new(con,
sample.groups,
anno.minor.sex,
ref.level = vis.ref,
target.level = vis.target,
n.cores = 32)
cao$estimateCellLoadings()
return(cao)
}, simplify = F, USE.NAMES = T)
Plot
aspc.pal <- RColorBrewer::brewer.pal(5, "Greens")[-1][c(2,1,3,4)] %>%
setNames(levels(qread("/work/02_data/09_export/anno_aspc.qs"))) %>%
{setNames(c(rep(., 3)), c(names(.), paste(names(.), "Female", sep = "_"), paste(names(.), "Male", sep = "_")))}
visits <- c("vis1", "vis2", "vis3")
p <- visits %>%
lapply(\(vis) {
comp <- visits[!visits == vis]
dat.p <- res.minor[[vis]]$test.results$coda$padj
dat.plot <- res.minor[[vis]]$test.results$coda$loadings %>%
tibble::rownames_to_column(var = "anno") %>%
reshape2::melt(id.var = "anno") %>%
filter(anno %in% names(aspc.pal))
yvar <- dat.plot %>%
group_by(anno) %>%
summarize(var = mean(value)) %>%
mutate(padj = dat.p[match(anno, names(dat.p))]) %>%
arrange(padj, desc(abs(var)))
dat.plot %<>%
mutate(anno = factor(anno, levels = rev(unique(yvar$anno))))
ymax <- dat.plot %>%
pull(value) %>%
abs() %>%
max()
out <- dat.plot %>%
ggplot(aes(anno, value, fill = anno)) +
geom_hline(yintercept = 0, col = "black") +
geom_jitter(alpha = 0.2, size = 0.3, col = "grey") +
geom_boxplot(outliers = F) +
coord_flip() +
ylim(c(-ymax, ymax)) +
theme_bw() +
theme(line = element_blank(),
axis.text = element_text(color = "black")) +
guides(fill = "none") +
labs(title = paste0(comp[1], " vs ", comp[2]), x = "", y = "Loadings [AU]") +
scale_fill_manual(values = aspc.pal)
return(out)
})
p
## [[1]]
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
##
## [[2]]
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
##
## [[3]]
Statistics
for (vis in visits) {
comp <- visits[!visits == vis]
message("Comparison: ", comp)
print(res.minor[[vis]]$test.results$coda$padj %>% .[names(.) %in% names(aspc.pal)])
}
## Comparison: vis2vis3
## ASPC_PPARG_Male ASPC_EPHA3_Male ASPC_DPP4_Female ASPC_EPHA3_Female
## 0.1038961 0.1038961 0.8524809 0.9595111
## ASPC_PPARG_Female ASPC_CXCL14_Female ASPC_DPP4_Male ASPC_CXCL14_Male
## 0.9376623 0.9400124 0.9439072 0.9595111
## Comparison: vis1vis3
## ASPC_DPP4_Female ASPC_DPP4_Male ASPC_PPARG_Female ASPC_PPARG_Male
## 0.00944510 0.00944510 0.02770563 0.03055768
## ASPC_EPHA3_Female ASPC_EPHA3_Male ASPC_CXCL14_Male ASPC_CXCL14_Female
## 0.04155844 0.08500590 0.57448434 0.73008073
## Comparison: vis1vis2
## ASPC_DPP4_Female ASPC_DPP4_Male ASPC_PPARG_Female ASPC_EPHA3_Male
## 0.2077922 0.2077922 0.4363636 0.5085441
## ASPC_EPHA3_Female ASPC_CXCL14_Female ASPC_PPARG_Male ASPC_CXCL14_Male
## 0.7713499 0.8410637 0.8255528 0.9256198
See Figure 4d for model setup
# Use Cacoa to calculate proportions
cao <- Cacoa$new(data.object = con.major,
cell.groups = anno.major,
ref.level = "vis2",
target.level = "vis3",
sample.groups = con.major$samples %>%
names() %>%
strsplit("_") %>%
sapply('[[', 3) %>%
gsub(1, 2, .) %>%
`names<-`(con.major$samples %>%
names()),
sample.groups.palette = ggsci::pal_jama()(7))
cao$sample.groups <- con.major$samples %>%
names() %>%
strsplit("_") %>%
sapply('[[', 3) %>%
`names<-`(con.major$samples %>%
names()) %>%
gsub("vis", "Visit ", .)
anno.comb <- anno.major[!names(anno.major) %in% names(anno.aspc)] %>%
{factor(c(., anno.aspc))}
# Get proportions
proportions <- cao$.__enclos_env__$private$extractCodaData(cell.groups = anno.comb, ret.groups = F) %>%
{100 * ./rowSums(.)} %>%
as.data.frame() %$%
setNames(ASPC_DPP4, rownames(.))
tmp <- qread("DPP4_model.qs")
lasso_model <- tmp$lasso_model
best_lambda <- tmp$lasso_cv$lambda.min
cm.bulk.mat <- cm.bulk %>%
as.data.frame() %>%
filter(rownames(.) %in% rownames(coef(lasso_model, s = best_lambda))) %>%
as.matrix() %>%
t()
# Now, train a final model on all data for visualization and gene selection
cm.test <- cm.bulk.mat %>%
.[rownames(.) %in% names(proportions), ]
lasso_cv <- cv.glmnet(cm.test, proportions, alpha = 1, nfolds = 40)
## Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
## fold
best_lambda <- lasso_cv$lambda.min
lasso_model <- glmnet(cm.test, proportions, alpha = 1, lambda = best_lambda)
# Predict on all samples using the final model
lasso.predict.all <- predict(lasso_model, newx = cm.test, s = best_lambda)
r2.all <- cor(proportions, lasso.predict.all)^2
data.frame(Proportions = proportions,
Predictions = unname(lasso.predict.all)) %>%
ggplot(aes(Proportions, Predictions)) +
geom_point() +
theme_bw() +
theme(axis.text = element_text(colour = "black")) +
labs(title = paste0("r2 = ", formatC(r2.all, digits = 3)), y = "Predicted proportions (%)", x = "Proportions (%)") +
geom_abline(intercept = 0, slope = 1, colour = "firebrick")
cm.merged <- con$getJointCountMatrix(raw = T)
# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[rownames(cm.merged)]
anno.subtype <- anno.aspc %>%
.[!is.na(.)] %>%
factor()
idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())
anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>%
{`names<-`(as.character(.), names(.))}
anno.final <- paste0(anno.donor,"!!",anno.subtype) %>%
`names<-`(anno.donor %>% names())
cm.pseudo.tmp <- sccore::collapseCellsByType(cm.merged,
groups = anno.final, min.cell.count = 1) %>%
t() %>%
apply(2, as, "integer")
cm.pseudo <- cm.pseudo.tmp %>%
DESeq2::DESeqDataSetFromMatrix(.,
colnames(.) %>%
strsplit("_|!!") %>%
sget(3) %>%
data.frame() %>%
`dimnames<-`(list(colnames(cm.pseudo.tmp), "group")),
design = ~ group) %>%
DESeq2::estimateSizeFactors() %>%
DESeq2::counts(normalized = T) %>%
t()
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
gene = "HSP90B1"
plot.dat <- cm.pseudo %>%
.[, colnames(.) %in% gene] %>%
as.data.frame() %>%
tibble::rownames_to_column(var = "sample") %>%
mutate(visit = strsplit(sample, "!!|_") %>%
sget(3) %>%
factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
anno = strsplit(sample, "!!") %>%
sget(2) %>%
factor(levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")),
donor = strsplit(sample, "!!") %>%
sget(1)) %>%
reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
mutate(sex = meta$sex[match(donor, meta$sample)]) %>%
mutate(sex_visit = paste(sex, visit, sep = " "))
# Kruskal-Wallis
sex.stat <- plot.dat %$%
split(., sex) %>%
lapply(\(x) split(x, x$anno)) %>%
lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>%
lapply(sapply, gtools::stars.pval) %>%
{lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>%
`names<-`(c("Female","Male")) %>%
lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>%
{lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>%
bind_rows() %>%
mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>%
arrange(anno) %>%
mutate(sig = gsub(".", "", sig, fixed = T))
# Wilcoxon
stat.test <- plot.dat %>%
group_by(anno, sex) %>%
wilcox_test(value~sex_visit, paired = T) %>%
mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>%
filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)
plot.dat %>%
ggplot(aes(anno, value)) +
geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
color = "black", size = 0.5, alpha = 0.2) +
theme_bw() +
labs(x = "", y = "Normalized pseudobulk expression", title = gene) +
theme(line = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
axis.text = element_text(color = "black")) +
scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 300, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) + # Wilcoxon
guides(fill = "none")
gene = "JUN"
plot.dat <- cm.pseudo %>%
.[, colnames(.) %in% gene] %>%
as.data.frame() %>%
tibble::rownames_to_column(var = "sample") %>%
mutate(visit = strsplit(sample, "!!|_") %>%
sget(3) %>%
factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
anno = strsplit(sample, "!!") %>%
sget(2) %>%
factor(levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")),
donor = strsplit(sample, "!!") %>%
sget(1)) %>%
reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
mutate(sex = meta$sex[match(donor, meta$sample)]) %>%
mutate(sex_visit = paste(sex, visit, sep = " "))
# Kruskal-Wallis
sex.stat <- plot.dat %$%
split(., sex) %>%
lapply(\(x) split(x, x$anno)) %>%
lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>%
lapply(sapply, gtools::stars.pval) %>%
{lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>%
`names<-`(c("Female","Male")) %>%
lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>%
{lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>%
bind_rows() %>%
mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>%
arrange(anno) %>%
mutate(sig = gsub(".", "", sig, fixed = T))
# Wilcoxon
stat.test <- plot.dat %>%
group_by(anno, sex) %>%
wilcox_test(value~sex_visit, paired = T) %>%
mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>%
filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)
plot.dat %>%
ggplot(aes(anno, value)) +
geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
color = "black", size = 0.5, alpha = 0.2) +
theme_bw() +
labs(x = "", y = "Normalized pseudobulk expression", title = gene) +
theme(line = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
axis.text = element_text(color = "black")) +
scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 600, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) + # Wilcoxon
guides(fill = "none")
gene = "FOS"
plot.dat <- cm.pseudo %>%
.[, colnames(.) %in% gene] %>%
as.data.frame() %>%
tibble::rownames_to_column(var = "sample") %>%
mutate(visit = strsplit(sample, "!!|_") %>%
sget(3) %>%
factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
anno = strsplit(sample, "!!") %>%
sget(2) %>%
factor(levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")),
donor = strsplit(sample, "!!") %>%
sget(1)) %>%
reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
mutate(sex = meta$sex[match(donor, meta$sample)]) %>%
mutate(sex_visit = paste(sex, visit, sep = " "))
# Kruskal-Wallis
sex.stat <- plot.dat %$%
split(., sex) %>%
lapply(\(x) split(x, x$anno)) %>%
lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>%
lapply(sapply, gtools::stars.pval) %>%
{lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>%
`names<-`(c("Female","Male")) %>%
lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>%
{lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>%
bind_rows() %>%
mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>%
arrange(anno) %>%
mutate(sig = gsub(".", "", sig, fixed = T))
# Wilcoxon
stat.test <- plot.dat %>%
group_by(anno, sex) %>%
wilcox_test(value~sex_visit, paired = F) %>%
mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>%
filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)
plot.dat %>%
ggplot(aes(anno, value)) +
geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
color = "black", size = 0.5, alpha = 0.2) +
theme_bw() +
labs(x = "", y = "Normalized pseudobulk expression", title = gene) +
theme(line = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
axis.text = element_text(color = "black")) +
scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 2e3, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) + # Wilcoxon
guides(fill = "none")
gene = "FOSB"
plot.dat <- cm.pseudo %>%
.[, colnames(.) %in% gene] %>%
as.data.frame() %>%
tibble::rownames_to_column(var = "sample") %>%
mutate(visit = strsplit(sample, "!!|_") %>%
sget(3) %>%
factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
anno = strsplit(sample, "!!") %>%
sget(2) %>%
factor(levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")),
donor = strsplit(sample, "!!") %>%
sget(1)) %>%
reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
mutate(sex = meta$sex[match(donor, meta$sample)]) %>%
mutate(sex_visit = paste(sex, visit, sep = " "))
# Kruskal-Wallis
sex.stat <- plot.dat %$%
split(., sex) %>%
lapply(\(x) split(x, x$anno)) %>%
lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>%
lapply(sapply, gtools::stars.pval) %>%
{lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>%
`names<-`(c("Female","Male")) %>%
lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>%
{lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>%
bind_rows() %>%
mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>%
arrange(anno) %>%
mutate(sig = gsub(".", "", sig, fixed = T))
# Wilcoxon
stat.test <- plot.dat %>%
group_by(anno, sex) %>%
wilcox_test(value~sex_visit, paired = T) %>%
mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>%
filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)
plot.dat %>%
ggplot(aes(anno, value)) +
geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
color = "black", size = 0.5, alpha = 0.2) +
theme_bw() +
labs(x = "", y = "Normalized pseudobulk expression", title = gene) +
theme(line = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
axis.text = element_text(color = "black")) +
scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 1.6e3, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) + # Wilcoxon
guides(fill = "none")
cts <- c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")
anno <- getConosCluster(con.integration, "loft_transfer_minor")
anno.subtype <- anno[anno %in% cts]
# Create sample-wise annotation
anno.donor <- con.integration$getDatasetPerCell()[rownames(cm.merged.raw)] %>%
.[!grepl("!!", names(.))] %>%
factor()
idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())
anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>%
{`names<-`(as.character(.), names(.))}
anno.final <- paste0(anno.donor,"!!",anno.subtype) %>%
`names<-`(anno.donor %>% names())
cm.pseudo.tmp <- sccore::collapseCellsByType(cm.merged.raw,
groups = anno.final, min.cell.count = 1) %>%
t() %>%
apply(2, as, "integer")
cm.pseudo <- cm.pseudo.tmp %>%
DESeq2::DESeqDataSetFromMatrix(.,
colnames(.) %>%
strsplit("!!") %>%
sget(1) %>%
data.frame() %>%
`dimnames<-`(list(colnames(cm.pseudo.tmp), "group")),
design = ~ group) %>%
DESeq2::estimateSizeFactors() %>%
DESeq2::counts(normalized = TRUE)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
idx <- cm.pseudo %>%
colnames() %>%
data.frame(id = .) %>%
mutate(type = strsplit(id, "_|!!") %>%
sapply(\(x) if (length(x) == 4) x[2] else x[1]),
ct = strsplit(id, "!!") %>% sget(2) %>%
factor(levels = cts)) %>%
mutate(ord = order(ct, type))
genes.order <- read.delim("ASPC_DEG_heatmap_genes.tsv") %$%
setNames(gene, cluster) %>%
.[order(names(.))]
x <- cm.pseudo %>%
.[genes.order, ] %>%
.[rowSums(.) > 0, idx$ord] %>%
Matrix::t() %>%
scale() %>%
Matrix::t()
row.order <- names(genes.order)[genes.order %in% rownames(x)]
groups <- colnames(x) %>%
`names<-`(colnames(x))
# Limit scale
x[x > 2] <- 2
x[x < -1.5] <- -1.5
# Create top annotation
tannot <- idx %$%
.[match(colnames(x), id), ] %>%
select(type) %>%
dplyr::rename(Type = type) %$%
{HeatmapAnnotation(df=.,
border=T,
col=list(Type = setNames(brewer.pal(9, "Greens")[c(3, 6, 9)], unique(Type))),
show_legend=T)}
# Create color palette
pal <- colorRampPalette(c('navy','grey95','firebrick'))(1024)
labeled.gene.subset <- NULL
# Plot
set.seed(1337)
ha <- ComplexHeatmap::Heatmap(x,
name='Expression',
col=pal,
cluster_columns=FALSE,
cluster_rows = F,
show_row_names=FALSE,
show_column_names=FALSE,
top_annotation=tannot,
left_annotation=NULL,
border=TRUE,
show_column_dend = FALSE,
show_row_dend = FALSE,
row_split = row.order,
column_split = idx %$%
.[match(colnames(x), id), ] %>%
pull(ct) %>%
factor(levels = cts) %>%
levels() %>%
{c("C1", rep(.[1], 4), "C2", rep(.[2], 4), "C3", rep(.[3], 4), "C4", rep(.[4], 4))} %>%
factor(., levels = unique(.)),
column_gap = unit(rep(0:1, 4), "mm") %>% .[-length(.)],
border_gp = gpar(col = "black", lwd = 1.5))
ht = draw(ha)
# We extracted genes from the selected GO term using this snippet
# genes.go <- clusterProfiler::bitr("GO:0097193",
# fromType="GOALL",
# toType="SYMBOL",
# OrgDb='org.Hs.eg.db')$SYMBOL %>%
# unique()
# Then, we intersected with the DEG genes plotted in our heatmap. The resulting genes are the following:
genes <- c("CLU", "CRIP1", "GRINA", "HNRNPK", "MMP2", "P4HB", "CXCL12", "TMBIM6", "ARL6IP5")
cts <- c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")
anno <- getConosCluster(con.integration, "loft_transfer_minor")
anno.subtype <- anno[anno %in% cts] %>%
factor()
cm.merged.sub <- cm.merged.raw %>%
.[rownames(.) %in% names(anno.subtype), ]
cm.clean <- cm.merged.sub[rownames(cm.merged.sub) %in% names(anno.subtype), ]
lam.scores <- list(LAM.activity = genes)
seu_obj <- CreateSeuratObject(counts = cm.clean %>% Matrix::t() %>% drop0(),
min.cells = 0,
min.features = 0)
m.scores <- UCell::AddModuleScore_UCell(seu_obj, features = lam.scores, ncores = 32)
Prepare
cm.sub <- m.scores$LAM.activity_UCell
# Create sample-wise annotation
anno.donor <- con.integration$getDatasetPerCell()[names(cm.sub)]
anno.subtype <- anno.subtype %>%
.[!is.na(.)] %>%
factor()
idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())
anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>%
{`names<-`(as.character(.), names(.))}
anno.final <- paste0(anno.donor,"!!",anno.subtype) %>%
`names<-`(anno.donor %>% names())
# Prepare plot
plot.tmp <- cm.sub %>%
data.frame(cid = names(.), value = .) %>%
mutate(donor_anno = anno.final[cid]) %>%
mutate(donor = strsplit(donor_anno, "!!") %>%
sget(1),
anno = strsplit(donor_anno, "!!") %>%
sget(2),
sex = meta$sex[match(donor, meta$sample)]) %>%
mutate(visit = strsplit(donor, "_") %>%
sapply(\(x) if (length(x) == 3) x[3] else if (length(x) == 2) x[2] else x[1]),
type = strsplit(donor, "_") %>% sget(1)) %>%
mutate(sex_visit = paste(sex, visit, sep = "_") %>%
gsub("NA_", "", .) %>%
factor(levels = c("Female_vis1", "Female_vis2", "Female_vis3", "Male_vis1", "Male_vis2", "Male_vis3", "lean", "t0", "t1"))) %>%
group_by(donor_anno, donor, anno, sex, sex_visit, type, visit) %>%
summarize(value = mean(value))
## `summarise()` has grouped output by 'donor_anno', 'donor', 'anno', 'sex',
## 'sex_visit', 'type'. You can override using the `.groups` argument.
plot.tmp %>%
filter(sex_visit %in% c("lean", "t0", "t1")) %>%
mutate(anno = factor(anno, levels = cts)) %>%
ggplot(aes(anno, value, fill = sex_visit)) +
stat_summary(geom = "bar", fun = "mean", position = "dodge") +
geom_point(aes(col = type, group = sex_visit), position = position_dodge(width = 0.9)) +
theme_bw() +
theme(axis.text = element_text(color = "black"),
line = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
labs(x = "", y = "Mean UCell module score per sample", fill = "Type", title = "'Intrinsic apoptotic signaling pathway' - cluster 1", col = "Dataset") +
scale_fill_manual(values = brewer.pal(9, "Greens")[c(3, 6, 9)]) +
scale_color_manual(values = brewer.pal(9, "Purples")[c(9, 7, 5)])
See Scenic.ipynb
for mat
object
calculation.
mat <- fastMatMR::fmm_to_mat("aspc_adi_auc_mat.mtx") %>%
`dimnames<-`(list(
read.table("aspc_adi_auc_mat.rownames", header = T)[, 1],
read.table("aspc_adi_auc_mat.colnames", header = T)[, 1]
))
con.major <- qread("con_major.qs", nthreads = 10)
spc <- con.major$getDatasetPerCell()
vpc <- spc %>%
as.character() %>%
strsplit("_|!!") %>%
sget(3) %>%
setNames(names(spc))
anno.adipocytes <- qread("anno_major.qs") %>%
.[. == "Adipocytes"] %>%
factor()
anno.comb <- factor(c(anno.adipocytes, anno.aspc))
plot.dat.tmp <- c("FOS(+)", "FOSB(+)") %>% # ZEB1 is just a random pick to make it easier to manipulate data, omitted later
{mat[, colnames(mat) %in% .]} %>%
as.data.frame() %>%
mutate(.,
visit = unname(vpc)[match(rownames(.),
names(vpc) %>%
strsplit("!!") %>%
sapply(\(x) paste0(x[2],"!!",x[3]))
)
],
anno = unname(anno.comb)[match(rownames(.),
names(anno.comb) %>%
strsplit("!!") %>%
sapply(\(x) paste0(x[2],"!!",x[3]))
)
],
sample = unname(spc)[match(rownames(.),
names(spc) %>%
strsplit("!!") %>%
sapply(\(x) paste0(x[2],"!!",x[3]))
)
]
) %>%
filter(!is.na(anno)) %>%
reshape2::melt(id.vars = c("visit", "anno", "sample")) %>%
mutate(anno_vis = paste0(anno,"_",visit),
anno_var = paste0(anno,"_",variable)) %>%
group_by(visit, anno, variable, sample) %>%
summarize(mm = median(value)) %>%
ungroup() %>%
filter(#variable == "JUN(+)",
!anno == "Adipocytes") %>%
mutate(anno = factor(anno, levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3", "Adipocytes")),
visit = factor(visit, levels = c("vis1", "vis2", "vis3"), labels = c("Visit 1", "Visit 2", "Visit 3")),
sex = meta$sex[match(.$sample, meta$sample)],
variable = factor(variable)) %>%
mutate(sex_visit = paste(sex, visit, sep = " ") %>% gsub("Visit", "visit", .),
anno = factor(anno, levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")))
## `summarise()` has grouped output by 'visit', 'anno', 'variable'. You can
## override using the `.groups` argument.
plot.dat <- plot.dat.tmp %>%
filter(variable == "FOS(+)")
# Kruskal-Wallis
sex.stat <- plot.dat %$%
split(., sex) %>%
lapply(\(x) split(x, x$anno)) %>%
lapply(lapply, \(x) kruskal.test(x$mm, g = x$visit)$p.value) %>%
lapply(sapply, gtools::stars.pval) %>%
{lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>%
`names<-`(c("Female","Male")) %>%
lapply(\(x) data.frame(ct = names(x), sig = unname(x))) %>%
{lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>%
bind_rows() %>%
mutate(ct = factor(ct, levels = unique(plot.dat$anno))) %>%
arrange(ct) %>%
mutate(sig = gsub(".", "", sig, fixed = T))
# Wilcoxon
stat.test <- plot.dat %>%
dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
group_by(anno, sex) %>%
wilcox_test(mm~sex_visit, paired = TRUE) %>%
filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05) %>% # Adjust line position
arrange(desc(y.position))
# Plot
ggplot(plot.dat, aes(anno, mm)) +
geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
color = "black", size = 0.5, alpha = 0.2) +
theme_bw() +
labs(x = "", y = "Mean AUC", fill = "") +
theme(line = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
axis.text = element_text(color = "black")) +
scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
geom_text(inherit.aes = F, data = sex.stat, aes(ct, y = 6, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 2, hide.ns = F) # Wilcoxon
plot.dat <- plot.dat.tmp %>%
filter(variable == "FOSB(+)")
# Kruskal-Wallis
sex.stat <- plot.dat %$%
split(., sex) %>%
lapply(\(x) split(x, x$anno)) %>%
lapply(lapply, \(x) kruskal.test(x$mm, g = x$visit)$p.value) %>%
lapply(sapply, gtools::stars.pval) %>%
{lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>%
`names<-`(c("Female","Male")) %>%
lapply(\(x) data.frame(ct = names(x), sig = unname(x))) %>%
{lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>%
bind_rows() %>%
mutate(ct = factor(ct, levels = unique(plot.dat$anno))) %>%
arrange(ct) %>%
mutate(sig = gsub(".", "", sig, fixed = T))
# Wilcoxon
stat.test <- plot.dat %>%
dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
group_by(anno, sex) %>%
wilcox_test(mm~sex_visit, paired = TRUE) %>%
filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05) %>% # Adjust line position
arrange(desc(y.position))
# Plot
ggplot(plot.dat, aes(anno, mm)) +
geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
color = "black", size = 0.5, alpha = 0.2) +
theme_bw() +
labs(x = "", y = "Mean AUC", fill = "") +
theme(line = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
axis.text = element_text(color = "black")) +
scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
geom_text(inherit.aes = F, data = sex.stat, aes(ct, y = 6, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 2, hide.ns = F) # Wilcoxon
Here, we’re showing how we calculated regulon activity from the
output files from Scenic (see Scenic.ipynb
). This is not
run here.
reg <- read.table("aspc_adi_macro_reg.csv", sep = ",", header = T, skip = 1) %>%
`colnames<-`(., c(.[1, 1:2], colnames(.)[-c(1,2)])) %>%
dplyr::slice(-1)
tfs <- c("FOS", "FOSB", "JUN", "JUND", "JUNB", "ZEB1", "EBF1", "PPARG", "EBF3", "EBF2")
geneSets <- reg %>%
filter(TF %in% tfs) %$%
split(., TF) %>%
lapply(filter, AUC == max(AUC)) %>%
lapply(pull, TargetGenes) %>%
lapply(gsub, pattern = "\\[|\\(|\\'|\b'|\\]|\\)", replacement = "") %>%
lapply(strsplit, ", ") %>%
lget(1) %>%
lapply(\(x) x[seq(1, length(x)-1, 2)])
anno <- getConosCluster(con.integration, "loft_transfer_minor")
anno.sub <- anno %>%
.[. %in% c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3", "Adipocytes")]
cm.sub <- cm.merged[rownames(cm.merged) %in% names(anno.sub), ] %>%
Matrix::t()
auc <- AUCell_run(cm.sub, geneSets)
tmp.regulons <- auc@assays@data@listData$AUC %>%
{lapply(split(., rownames(.)), `names<-`, colnames(.))}
qsave(tmp.regulons, "regulons.qs")
qsave(geneSets, "genesets.qs")
cm <- qread("regulons.qs")[c("FOS", "JUN", "JUND", "JUNB")] %>%
as.data.frame()
spc.hinte <- con.integration$getDatasetPerCell() %>%
.[!grepl("!!", names(.))]
anno.sub <- anno %>%
.[. %in% c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3", "Adipocytes")]
cg <- anno.sub[!anno.sub == "Adipocytes"] %>%
.[match(names(spc.hinte), names(.), nomatch = F)]
spc.final <- spc.hinte[names(cg)] %>%
{setNames(strsplit(as.character(.), "_") %>% sapply(\(x) if (length(x) == 1) x else x[2]), names(.))}
cg.visit <- data.frame(anno = unname(cg), sample = unname(spc.final), row.names = names(cg)) %>%
mutate(anno_sample = paste(anno, sample, sep = "_")) %$%
setNames(anno_sample, rownames(.)) %>%
factor(levels = rev(c("ASPC_DPP4_lean", "ASPC_DPP4_t0", "ASPC_DPP4_t1", "ASPC_CXCL14_lean", "ASPC_CXCL14_t0", "ASPC_CXCL14_t1", "ASPC_PPARG_lean", "ASPC_PPARG_t0", "ASPC_PPARG_t1", "ASPC_EPHA3_lean", "ASPC_EPHA3_t0", "ASPC_EPHA3_t1")))
dotPlot(c("FOS", "JUN", "JUND", "JUNB"), cm, cg.visit, gene.order = rev(c("FOS", "JUN", "JUND", "JUNB")), ) +
coord_flip() +
guides(size = "none", col = "none") +
guides(size = "legend", col = "colorbar") +
labs(size = "Fraction of cells in group (%)", col = "Regulon activity (mean AUC)", x = "", y = "") +
scale_color_gradient(low = "grey", high = "firebrick")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
con <- qread("con_adipocytes.qs", nthreads = 10)
anno.adipocytes <- qread("anno_adipocytes_archetypes.qs")
spc <- con$getDatasetPerCell()
varToPlot <- spc %>%
as.character() %>%
strsplit("_vis") %>%
sget(1) %>%
setNames(names(spc)) %>%
factor()
con$plotGraph(groups = varToPlot,
plot.na = F,
size = 0.3,
alpha = 0.1,
embedding = "largeVis",
mark.groups = F,
show.labels = T,
show.legend = T,
title = "Donor") +
labs(x = "largeVis1", y = "largeVis2", col = "") +
theme(line = element_blank()) +
dotSize(3)
var <- meta$sex
varToPlot <- grepl.replace(spc %>%
as.character(),
meta$sample,
var) %>%
setNames(names(spc)) %>%
factor(labels = unique(var))
con$plotGraph(groups = varToPlot,
plot.na = F,
size = 0.3,
alpha = 0.1,
embedding = "largeVis",
mark.groups = F,
show.labels = T,
show.legend = T,
title = "Sex") +
labs(x = "largeVis1", y = "largeVis2", col = "") +
theme(line = element_blank()) +
dotSize(3) +
scale_color_manual(values = c("firebrick", "navy"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
varToPlot <- spc %>%
as.character() %>%
strsplit("_|!!") %>%
sget(3) %>%
setNames(names(spc)) %>%
factor()
con$plotGraph(groups = varToPlot,
plot.na = F,
size = 0.3,
alpha = 0.1,
embedding = "largeVis",
mark.groups = F,
show.labels = T,
show.legend = T,
title = "Visit") +
labs(x = "largeVis1", y = "largeVis2", col = "") +
theme(line = element_blank()) +
dotSize(3) +
scale_color_manual(values = ggsci::pal_jama()(3))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
Please note, the clustering algorithm holds some randomness, clusters will not always be the same.
con$findCommunities(name = "leiden1", resolution = 0.5)
con$findCommunities(name = "leiden2", resolution = 0.5)
con$findCommunities(name = "leiden3", resolution = 0.5)
con$findCommunities(name = "leiden4", resolution = 0.5)
seq(4) %>%
lapply(\(x) {
con$plotGraph(embedding = "largeVis", clustering = paste0("leiden",x), font.size = 6) +
theme(line = element_blank()) +
scale_color_manual(values = RColorBrewer::brewer.pal(4, "Reds")[-1])
}) %>%
cowplot::plot_grid(plotlist = ., ncol = 2)
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
Please note, the clustering algorithm holds some randomness, stabilities will not always be the same.
con$findCommunities(resolution = 0.5, test.stability = T, name = "l05stab")
## running 100 subsampling iterations ...
## done
## calculating flat stability stats ...
## adjusted Rand ...
## done
## calculating hierarchical stability stats ...
## upper clustering ...
## clusterTree Jaccard ...
## done
plot_grid(plotlist = list(
con$plotClusterStability("l05stab", "ari") +
theme_bw() +
theme(line = element_blank(),
axis.text.x = element_blank()) +
geom_boxplot(aes(col = "#E41A1C"), notch = T) +
labs(x = " \n "),
con$plotClusterStability("l05stab", "hjc") +
theme_bw() +
theme(line = element_blank()) +
scale_color_manual(values = RColorBrewer::brewer.pal(4, "Reds")[-1])
), ncol = 2, rel_widths = c(1.3,3))
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the conos package.
## Please report the issue at <https://github.com/kharchenkolab/conos/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Please note, the clustering algorithm holds some randomness, stabilities will not always be the same.
con$findCommunities(resolution = 1, test.stability = T, name = "l1stab")
## running 100 subsampling iterations ...
## done
## calculating flat stability stats ...
## adjusted Rand ...
## done
## calculating hierarchical stability stats ...
## upper clustering ...
## clusterTree Jaccard ...
## done
plot_grid(plotlist = list(
con$plotClusterStability("l05stab", "ari") +
theme_bw() +
theme(line = element_blank(),
axis.text.x = element_blank()) +
geom_boxplot(aes(col = "#E41A1C"), notch = T) +
labs(x = " \n "),
con$plotClusterStability("l1stab", "hjc") +
theme_bw() +
theme(line = element_blank()) +
scale_color_manual(values = RColorBrewer::brewer.pal(8, "Reds")[-1])
), ncol = 2, rel_widths = c(1.3,3))
cm.merged <- con$getJointCountMatrix()
c("PRSS23","PDE5A","ADIPOQ", "DGAT2", "GPAM", "LPL", "RORA", "CLSTN2", "CRIM1", "ITGA1") %>%
sccore::dotPlot(.,
cm.merged,
getConosCluster(con, "leiden0.5"),
gene.order = .,
cols = c("white","red3"))
cm.merged <- con$getJointCountMatrix(raw = T)
# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[rownames(cm.merged)]
anno.subtype <- anno.adipocytes %>%
.[!is.na(.)] %>%
factor()
idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())
anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>%
{`names<-`(as.character(.), names(.))}
anno.final <- paste0(anno.donor,"!!",anno.subtype) %>%
`names<-`(anno.donor %>% names())
cm.pseudo.tmp <- sccore::collapseCellsByType(cm.merged,
groups = anno.final, min.cell.count = 1) %>%
t() %>%
apply(2, as, "integer")
cm.pseudo <- cm.pseudo.tmp %>%
DESeq2::DESeqDataSetFromMatrix(.,
colnames(.) %>%
strsplit("_|!!") %>%
sget(3) %>%
data.frame() %>%
`dimnames<-`(list(colnames(cm.pseudo.tmp), "group")),
design = ~ group) %>%
DESeq2::estimateSizeFactors() %>%
DESeq2::counts(normalized = T) %>%
t()
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
gene = "MAP3K5"
plot.dat <- cm.pseudo %>%
.[, colnames(.) %in% gene] %>%
as.data.frame() %>%
tibble::rownames_to_column(var = "sample") %>%
mutate(visit = strsplit(sample, "!!|_") %>%
sget(3) %>%
factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
anno = strsplit(sample, "!!") %>%
sget(2) %>%
factor(levels = c("DGAT2_archetype", "CLSTN2_archetype", "PRSS23_archetype")),
donor = strsplit(sample, "!!") %>%
sget(1)) %>%
reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
mutate(sex = meta$sex[match(donor, meta$sample)]) %>%
mutate(sex_visit = paste(sex, visit, sep = " "))
# Kruskal-Wallis
sex.stat <- plot.dat %$%
split(., sex) %>%
lapply(\(x) split(x, x$anno)) %>%
lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>%
lapply(sapply, gtools::stars.pval) %>%
{lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>%
`names<-`(c("Female","Male")) %>%
lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>%
{lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>%
bind_rows() %>%
mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>%
arrange(anno) %>%
mutate(sig = gsub(".", "", sig, fixed = T))
# Wilcoxon
stat.test <- plot.dat %>%
dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
group_by(anno, sex) %>%
wilcox_test(value~sex_visit, paired = F) %>%
mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>%
filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)
plot.dat %>%
ggplot(aes(anno, value)) +
geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
color = "black", size = 0.5, alpha = 0.2) +
theme_bw() +
labs(x = "", y = "Normalized pseudobulk expression", title = gene) +
theme(line = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
axis.text = element_text(color = "black")) +
scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 200, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) + # Wilcoxon
guides(fill = "none")
gene = "AKT3"
plot.dat <- cm.pseudo %>%
.[, colnames(.) %in% gene] %>%
as.data.frame() %>%
tibble::rownames_to_column(var = "sample") %>%
mutate(visit = strsplit(sample, "!!|_") %>%
sget(3) %>%
factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
anno = strsplit(sample, "!!") %>%
sget(2) %>%
factor(levels = c("DGAT2_archetype", "CLSTN2_archetype", "PRSS23_archetype")),
donor = strsplit(sample, "!!") %>%
sget(1)) %>%
reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
mutate(sex = meta$sex[match(donor, meta$sample)]) %>%
mutate(sex_visit = paste(sex, visit, sep = " "))
# Kruskal-Wallis
sex.stat <- plot.dat %$%
split(., sex) %>%
lapply(\(x) split(x, x$anno)) %>%
lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>%
lapply(sapply, gtools::stars.pval) %>%
{lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>%
`names<-`(c("Female","Male")) %>%
lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>%
{lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>%
bind_rows() %>%
mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>%
arrange(anno) %>%
mutate(sig = gsub(".", "", sig, fixed = T))
# Wilcoxon
stat.test <- plot.dat %>%
dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
group_by(anno, sex) %>%
wilcox_test(value~sex_visit, paired = F) %>%
mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>%
filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)
plot.dat %>%
ggplot(aes(anno, value)) +
geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
color = "black", size = 0.5, alpha = 0.2) +
theme_bw() +
labs(x = "", y = "Normalized pseudobulk expression", title = gene) +
theme(line = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
axis.text = element_text(color = "black")) +
scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 100, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) + # Wilcoxon
guides(fill = "none")
gene = "ADAMTS10"
plot.dat <- cm.pseudo %>%
.[, colnames(.) %in% gene] %>%
as.data.frame() %>%
tibble::rownames_to_column(var = "sample") %>%
mutate(visit = strsplit(sample, "!!|_") %>%
sget(3) %>%
factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
anno = strsplit(sample, "!!") %>%
sget(2) %>%
factor(levels = c("DGAT2_archetype", "CLSTN2_archetype", "PRSS23_archetype")),
donor = strsplit(sample, "!!") %>%
sget(1)) %>%
reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
mutate(sex = meta$sex[match(donor, meta$sample)]) %>%
mutate(sex_visit = paste(sex, visit, sep = " "))
# Kruskal-Wallis
sex.stat <- plot.dat %$%
split(., sex) %>%
lapply(\(x) split(x, x$anno)) %>%
lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>%
lapply(sapply, gtools::stars.pval) %>%
{lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>%
`names<-`(c("Female","Male")) %>%
lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>%
{lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>%
bind_rows() %>%
mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>%
arrange(anno) %>%
mutate(sig = gsub(".", "", sig, fixed = T))
# Wilcoxon
stat.test <- plot.dat %>%
dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
group_by(anno, sex) %>%
wilcox_test(value~sex_visit, paired = F) %>%
mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>%
filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)
plot.dat %>%
ggplot(aes(anno, value)) +
geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
color = "black", size = 0.5, alpha = 0.2) +
theme_bw() +
labs(x = "", y = "Normalized pseudobulk expression", title = gene) +
theme(line = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
axis.text = element_text(color = "black")) +
scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 200, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) + # Wilcoxon
guides(fill = "none")
First, we integrate adipocytes from both studies. This is not run here, and the data are not shared.
con.adipocytes <- qread("con_adipocytes.qs", nthreads = 10)
anno.adi <- qread("anno_adipocytes_archetypes.qs")
anno <- getConosCluster(con.integration, "loft_transfer")
anno.hinte <- anno %>%
.[!grepl("!!", names(.))] %>%
.[. == "Adipocytes"]
samples.processed <- con.integration$samples %>%
.[!grepl("vis", names(.))] %>%
lapply(conos:::getRawCountMatrix) %>%
lapply(\(cm) cm[, colnames(cm) %in% names(anno.hinte)]) %>%
lapply(basicP2proc, n.cores = 32, get.largevis = F, get.tsne = F, make.geneknn = F)
con.adipocytes$addSamples(samples.processed)
con.adipocytes$buildGraph()
con.adipocytes$findCommunities(resolution = 0.5)
con.adipocytes$embedGraph()
anno.archetypes <- con.adipocytes$propagateLabels(anno.adi)
qsave(con.adipocytes, "con_adipocytes_integrated.qs", nthreads = 10)
qsave(anno.archetypes, "anno_archetypes_integrated.qs")
Load final data
anno.archetypes <- qread("anno_archetypes_integrated.qs")
con.adipocytes <- qread("con_adipocytes_integrated.qs", nthreads = 10)
# We pick top1k cells per subtype based on the uncertainty parameter from the label propagation
anno.df <- anno.archetypes %$%
data.frame(uncertainty = unname(uncertainty),
label = unname(labels),
cid = names(labels)) %>%
filter(!grepl("!!", cid)) %$%
split(., label) %>%
lapply(arrange, uncertainty) %>%
lapply(dplyr::slice, seq(1e3)) %>%
bind_rows()
anno.archetypes.hinte <- anno.df %$%
setNames(label, cid) %>%
factor()
anno.subtype <- anno.archetypes.hinte
# Create sample-wise annotation
anno.donor <- con.integration$getDatasetPerCell()[rownames(cm.merged.raw)] %>%
.[!grepl("!!", names(.))] %>%
factor()
idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())
anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>%
{`names<-`(as.character(.), names(.))}
anno.final <- paste0(anno.donor,"!!",anno.subtype) %>%
`names<-`(anno.donor %>% names())
cm.pseudo.tmp <- sccore::collapseCellsByType(cm.merged.raw,
groups = anno.final, min.cell.count = 1) %>%
t() %>%
apply(2, as, "integer")
cm.pseudo <- cm.pseudo.tmp %>%
DESeq2::DESeqDataSetFromMatrix(.,
colnames(.) %>%
strsplit("!!") %>%
sget(1) %>%
data.frame() %>%
`dimnames<-`(list(colnames(cm.pseudo.tmp), "group")),
design = ~ group) %>%
DESeq2::estimateSizeFactors() %>%
DESeq2::counts(normalized = TRUE)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
idx <- cm.pseudo %>%
colnames() %>%
data.frame(id = .) %>%
mutate(type = strsplit(id, "_|!!") %>%
sapply(\(x) if (length(x) == 3) x[1] else x[2]),
ct = strsplit(id, "!!") %>% sget(2)) %>%
mutate(ord = order(ct, type))
# Order genereted in Figure 5c
genes.order <- read.delim("Archetypes_DEG_heatmap_genes.tsv") %$%
setNames(gene, cluster %>% factor(labels = seq(4, 1, -1)) %>% as.character()) %>%
.[order(names(.))]
x <- cm.pseudo %>%
.[genes.order, ] %>%
.[rowSums(.) > 0, idx$ord] %>%
Matrix::t() %>%
scale() %>%
Matrix::t()
groups <- colnames(x) %>%
`names<-`(colnames(x))
# Limit scale
x[x > 2] <- 2
x[x < -1] <- -1
# Create top annotation
tannot <- idx %$%
.[match(colnames(x), id), ] %>%
select(type) %>%
dplyr::rename(Type = type) %$%
{HeatmapAnnotation(df=.,
border=T,
col=list(Type = setNames(brewer.pal(9, "Greens")[c(3, 6, 9)], unique(Type))),
show_legend=T)}
# Create color palette
pal <- colorRampPalette(c('navy','grey95','firebrick'))(1024)
labeled.gene.subset <- NULL
# Plot
set.seed(1337)
ha <- ComplexHeatmap::Heatmap(x,
name='Expression',
col=pal,
cluster_columns=FALSE,
cluster_rows = F,
show_row_names=FALSE,
show_column_names=FALSE,
top_annotation=tannot,
left_annotation=NULL,
border=TRUE,
show_column_dend = FALSE,
show_row_dend = FALSE,
row_split = names(genes.order)[genes.order %in% rownames(x)],
column_split = anno.archetypes.hinte %>%
levels() %>%
{c("C1", rep(.[1], 4), "C2", rep(.[2], 4), "C3", rep(.[3], 4))} %>%
factor(., levels = unique(.)),
column_gap = unit(rep(0:1, 3), "mm") %>% .[-length(.)],
border_gp = gpar(col = "black", lwd = 1.5))
ht = draw(ha)
We integrated our data with the data from Hinte et al. that were graciously provided by the authors. As these data are not public, here we will only show how we plotted the data.
genes.order <- read.delim("Archetypes_DEG_heatmap_genes.tsv") %$%
setNames(gene, cluster %>% factor(labels = seq(4, 1, -1)) %>% as.character()) %>%
.[order(names(.))] %>%
{split(unname(.), names(.))}
cts <- anno.archetypes.hinte %>%
levels()
anno <- getConosCluster(con.integration, "loft_transfer_minor")
anno.subtype <- anno[names(anno) %in% names(anno.archetypes.hinte)] %>%
factor()
cm.merged.sub <- cm.merged.raw %>%
.[rownames(.) %in% names(anno.subtype), ]
cm.clean <- cm.merged.sub[rownames(cm.merged.sub) %in% names(anno.subtype), ]
cluster.scores <- genes.order %>%
lapply(\(x) {
cls.scores <- list(cluster.scores = x)
seu_obj <- CreateSeuratObject(counts = cm.clean %>% Matrix::t() %>% drop0(),
min.cells = 0,
min.features = 0)
m.scores <- UCell::AddModuleScore_UCell(seu_obj, features = cls.scores, ncores = 32)
return(m.scores)
})
Prepare
cm.sub <- cluster.scores %>%
lget("cluster.scores_UCell") %>%
lapply(\(x) mutate(x, cid = rownames(x))) %>%
bind_rows(.id = "cluster")
# Create sample-wise annotation
anno.donor <- con.integration$getDatasetPerCell()[unique(cm.sub$cid)]
anno.subtype <- anno.archetypes.hinte %>%
.[!is.na(.)] %>%
factor()
idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())
anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>%
{`names<-`(as.character(.), names(.))}
anno.final <- paste0(anno.donor,"!!",anno.subtype) %>%
`names<-`(anno.donor %>% names())
# Prepare plot
plot.tmp <- cm.sub %>%
dplyr::rename(value = cluster.scores_UCell) %>%
mutate(donor_anno = anno.final[cid]) %>%
mutate(donor = strsplit(donor_anno, "!!") %>%
sget(1),
anno = strsplit(donor_anno, "!!") %>%
sget(2)) %>%
mutate(visit = strsplit(donor, "_") %>%
sapply(\(x) if (length(x) == 2) x[2] else x[1])) %>%
group_by(donor_anno, donor, anno, visit, cluster) %>%
summarize(value = median(value))
## `summarise()` has grouped output by 'donor_anno', 'donor', 'anno', 'visit'. You
## can override using the `.groups` argument.
plot.tmp %>%
filter(visit %in% c("lean", "t0", "t1")) %>%
ggplot(aes(anno, value, fill = visit)) +
# geom_boxplot() +
facet_grid(~cluster) +
stat_summary(geom = "bar", fun = mean, position = "dodge") +
geom_point(aes(col = visit), position = position_dodge(width = 0.9)) +
theme_bw() +
theme(axis.text = element_text(color = "black"),
line = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
labs(x = "", y = "Mean UCell module score per sample", fill = "", title = "", col = "") +
scale_fill_manual(values = brewer.pal(9, "Greens")[c(3, 6, 9)]) +
scale_color_manual(values = brewer.pal(9, "Purples")[c(9, 7, 5)])
gene <- "PLIN5"
plot.dat <- cm.pseudo %>%
t() %>%
.[, colnames(.) %in% gene] %>%
as.data.frame() %>%
tibble::rownames_to_column(var = "sample") %>%
mutate(visit = strsplit(sample, "!!") %>%
sget(1) %>%
strsplit("_") %>%
sapply(\(x) if (length(x) == 1) x[1] else x[2]) %>%
factor(levels = c("lean", "t0", "t1")),
anno = strsplit(sample, "!!") %>%
sget(2) %>%
factor(levels = c("DGAT2_archetype", "CLSTN2_archetype", "PRSS23_archetype"))) %>%
reshape2::melt(id.vars = c("sample", "visit", "anno"))
plot.dat %>%
ggplot(aes(anno, value, fill = visit)) +
stat_summary(fun = "mean", geom = "bar", position = position_dodge()) +
geom_point(aes(col = visit),
position = position_dodge(width = 0.9)) +
theme_bw() +
labs(x = "", y = "Normalized pseudobulk expression", title = gene, fill = "", col = "") +
theme(line = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
axis.text = element_text(color = "black")) +
scale_fill_manual(values = c(brewer.pal(9, "Greens")[c(3,6,9)])) +
scale_color_manual(values = c(brewer.pal(9, "Purples")[c(9,7,5)]))
gene <- "ADAMTS10"
plot.dat <- cm.pseudo %>%
t() %>%
.[, colnames(.) %in% gene] %>%
as.data.frame() %>%
tibble::rownames_to_column(var = "sample") %>%
mutate(visit = strsplit(sample, "!!") %>%
sget(1) %>%
strsplit("_") %>%
sapply(\(x) if (length(x) == 1) x[1] else x[2]) %>%
factor(levels = c("lean", "t0", "t1")),
anno = strsplit(sample, "!!") %>%
sget(2) %>%
factor(levels = c("DGAT2_archetype", "CLSTN2_archetype", "PRSS23_archetype"))) %>%
reshape2::melt(id.vars = c("sample", "visit", "anno"))
plot.dat %>%
ggplot(aes(anno, value, fill = visit)) +
stat_summary(fun = "mean", geom = "bar", position = position_dodge()) +
geom_point(aes(col = visit),
position = position_dodge(width = 0.9)) +
theme_bw() +
labs(x = "", y = "Normalized pseudobulk expression", title = gene, fill = "", col = "") +
theme(line = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
axis.text = element_text(color = "black")) +
scale_fill_manual(values = c(brewer.pal(9, "Greens")[c(3,6,9)])) +
scale_color_manual(values = c(brewer.pal(9, "Purples")[c(9,7,5)]))
gene <- "MAP3K5"
plot.dat <- cm.pseudo %>%
t() %>%
.[, colnames(.) %in% gene] %>%
as.data.frame() %>%
tibble::rownames_to_column(var = "sample") %>%
mutate(visit = strsplit(sample, "!!") %>%
sget(1) %>%
strsplit("_") %>%
sapply(\(x) if (length(x) == 1) x[1] else x[2]) %>%
factor(levels = c("lean", "t0", "t1")),
anno = strsplit(sample, "!!") %>%
sget(2) %>%
factor(levels = c("DGAT2_archetype", "CLSTN2_archetype", "PRSS23_archetype"))) %>%
reshape2::melt(id.vars = c("sample", "visit", "anno"))
plot.dat %>%
ggplot(aes(anno, value, fill = visit)) +
stat_summary(fun = "mean", geom = "bar", position = position_dodge()) +
geom_point(aes(col = visit),
position = position_dodge(width = 0.9)) +
theme_bw() +
labs(x = "", y = "Normalized pseudobulk expression", title = gene, fill = "", col = "") +
theme(line = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
axis.text = element_text(color = "black")) +
scale_fill_manual(values = c(brewer.pal(9, "Greens")[c(3,6,9)])) +
scale_color_manual(values = c(brewer.pal(9, "Purples")[c(9,7,5)]))
We downsampled to 15k nuclei due to calculation times. We provide
sds_obj_reduced.qs
for slingshot pseudotime estimations for
the downsampled dataset.
Here, we calculate for visit 1 and visit 3.
sds_obj <- qread("sds_obj_reduced.qs")
pseudotime <- sds_obj@assays@data@listData$pseudotime[, 1] %>%
.[!is.na(.)]
con <- qread("con_adipocytes_aspc.qs", nthreads = 10)
mat <- con$getJointCountMatrix() %>%
.[match(names(pseudotime), rownames(.)), colSums(.) > 2e2]
mt <- mat %>%
as.matrix() %>%
as.data.frame() %>%
tibble::rownames_to_column() %>%
{message("Mutating"); mutate(.,
pseudotime = pseudotime[rowname],
visit = con$getDatasetPerCell()[rowname] %>%
as.character() %>%
strsplit("_") %>%
sget(3),
donor = strsplit(rowname, "!!") %>%
sget(1))} %>%
.[complete.cases(.), ] %>%
{message("Melting"); reshape2::melt(., id.vars = c("rowname", "pseudotime", "visit", "donor"))} %$%
{message("Splitting"); split(., variable)}
The calculations take app. 11 h with 14k genes and 10k cells. We’ve
added pre-calculated results as
pseudotime_res_novis2.qs
.
res <- mt %>%
sccore::plapply(\(y) {
x = y %>%
filter(!visit == "vis2")
fit_full <- gamm4(data = x, formula = value ~ s(pseudotime, by = factor(visit)), random = ~(1 | donor), REML = F)
fit_reduced <- gamm4(data = x, formula = value ~ s(pseudotime), random = ~(1 | donor), REML = F)
fit_none <- gamm4(data = x, formula = value ~ 1, random = ~(1 | donor), REML = F)
ann <- anova(fit_full$mer, fit_reduced$mer, fit_none$mer)
if (any(ann$`Pr(>Chisq)` <= 0.05, na.rm = T)) {
residuals <- predict(fit_full$gam, se.fit = T)$fit %>%
unname()
r.sq <- c(summary(fit_full$gam)$r.sq, summary(fit_reduced$gam)$r.sq) %>%
setNames(c("full", "reduced"))
out <- list(annova = ann,
residuals = residuals,
r.sq = r.sq)
return(out)
}
}, n.cores = 5, mc.preschedule = T, mc.cleanup = T, progress = F) # 10 cores fails, 5 is max tested that finishes
Load data. For ease, we also provide pseudotime W/O visit 2 nuclei.
We need the sds_obj
from Figure 6a. Also, for ease of use
we provide slingshot pseudotime W/O visit 2 as
pseudotime_novis2.qs
.
res <- qread("pseudotime_res_novis2.qs", nthreads = 10) %>%
.[!sapply(., is.null)]
sds_obj <- qread("sds_obj.qs")
pseudotime <- qread("pseudotime_novis2.qs")
con <- qread("con_adipocytes_aspc.qs", nthreads = 10)
both <- res %>%
sapply(\(gene) {
ann <- gene$annova
(ann[2, "Pr(>Chisq)"] <= 0.05 & ann[3, "Pr(>Chisq)"] <= 0.05)
})
res.both <- res[both]
rsq.both <- res.both %>%
sapply(\(gene) gene$r.sq[1]) %>% # Ranking by r2 for pseudotime
sort(decreasing = T)
res.filter <- res.both %>%
.[rsq.both %>% .[. > 0.2] %>% names() %>% gsub(".full", "", .)]
cm.merged <- con$getJointCountMatrix() %>%
.[match(names(pseudotime), rownames(.)), colnames(.) %in% names(res.filter)] %>%
.[rowSums(.) > 0, ]
## Predict smoothend expression
pseudotime.both <- pseudotime[rownames(cm.merged)]
weights.both <- sds_obj@assays@data@listData$weights %>% .[match(names(pseudotime.both), rownames(.)), 1] + 1E-7
scFit <- cm.merged %>%
Matrix::t() %>%
tradeSeq::fitGAM(pseudotime = pseudotime.both, cellWeights = weights.both, verbose = T)
Smooth <- tradeSeq::predictSmooth(scFit, gene = colnames(cm.merged), tidy = F, n=100)
# Average across replicates and scale
Smooth <- t(scale(t(Smooth)))
# Seriate the results
Smooth <- Smooth[ seriation::get_order(seriation::seriate(Smooth, method="PCA_angle")), ]
# Omit MALAT1
Smooth %<>% .[!rownames(.) == "MALAT1", ]
# Create heatmap
gene.subset <- which(rownames(Smooth) %in% c("PDGFRB", "DCN", "FBN1", "LPL", "ADIPOQ"))
labels <- rownames(Smooth)[gene.subset]
lannot <- rowAnnotation(link = anno_mark(at = gene.subset,
labels = labels,
labels_gp = grid::gpar(fontsize = 7)))
col_fun = circlize::colorRamp2(c(-4, 0, 4), c("navy", "white", "firebrick"))
Heatmap(Smooth,
col = col_fun,
cluster_columns=F,
cluster_rows=F,
show_column_names = F,
show_row_names = F,
left_annotation = lannot)
repressed.start <- 1
tempinduced.start = which(rownames(Smooth) == "SMOC2")
tempinduced.end <- which(rownames(Smooth) == "APOD")
repressed.end <- tempinduced.start - 1
induced.start <- tempinduced.end + 1
induced.end <- nrow(Smooth)
go.list <- Smooth %>%
rownames() %>%
{
list(
repressed = .[repressed.start : repressed.end],
tempinduced = .[tempinduced.start : tempinduced.end],
induced = .[induced.start : induced.end]
)
} %>%
lapply(\(x) clusterProfiler::enrichGO(x,
OrgDb = org.Hs.eg.db::org.Hs.eg.db,
keyType = "SYMBOL",
ont = "BP",
universe = rownames(con$samples[[1]])))
go.list %>%
lapply(getElement, "result") %>%
lapply(head, 10)
## $repressed
## ID
## GO:0030198 GO:0030198
## GO:0043062 GO:0043062
## GO:0045229 GO:0045229
## GO:0031589 GO:0031589
## GO:0085029 GO:0085029
## GO:0048251 GO:0048251
## GO:0061448 GO:0061448
## GO:0071560 GO:0071560
## GO:0071230 GO:0071230
## GO:0071559 GO:0071559
## Description
## GO:0030198 extracellular matrix organization
## GO:0043062 extracellular structure organization
## GO:0045229 external encapsulating structure organization
## GO:0031589 cell-substrate adhesion
## GO:0085029 extracellular matrix assembly
## GO:0048251 elastic fiber assembly
## GO:0061448 connective tissue development
## GO:0071560 cellular response to transforming growth factor beta stimulus
## GO:0071230 cellular response to amino acid stimulus
## GO:0071559 response to transforming growth factor beta
## GeneRatio BgRatio RichFactor FoldEnrichment zScore pvalue
## GO:0030198 23/128 339/18805 0.06784661 9.967621 13.793269 9.082953e-17
## GO:0043062 23/128 340/18805 0.06764706 9.938304 13.768812 9.689092e-17
## GO:0045229 23/128 341/18805 0.06744868 9.909160 13.744457 1.033348e-16
## GO:0031589 19/128 361/18805 0.05263158 7.732319 10.692205 4.915608e-12
## GO:0085029 8/128 56/18805 0.14285714 20.987723 12.400661 3.991196e-09
## GO:0048251 5/128 11/18805 0.45454545 66.779119 18.065556 6.039213e-09
## GO:0061448 13/128 291/18805 0.04467354 6.563171 7.917632 9.654499e-08
## GO:0071560 13/128 308/18805 0.04220779 6.200918 7.618707 1.860436e-07
## GO:0071230 8/128 92/18805 0.08695652 12.775136 9.372699 2.131588e-07
## GO:0071559 13/128 315/18805 0.04126984 6.063120 7.502078 2.407654e-07
## p.adjust qvalue
## GO:0030198 8.108337e-14 6.635182e-14
## GO:0043062 8.108337e-14 6.635182e-14
## GO:0045229 8.108337e-14 6.635182e-14
## GO:0031589 2.892835e-09 2.367253e-09
## GO:0085029 1.879055e-06 1.537661e-06
## GO:0048251 2.369384e-06 1.938905e-06
## GO:0061448 3.246670e-05 2.656802e-05
## GO:0071560 5.474333e-05 4.479734e-05
## GO:0071230 5.575287e-05 4.562347e-05
## GO:0071559 5.667618e-05 4.637902e-05
## geneID
## GO:0030198 ADAMTS5/CST3/DPP4/MFAP4/LTBP4/MMP2/COL1A1/ADAMTSL1/COL3A1/SH3PXD2B/TNXB/COL1A2/RECK/ANTXR1/COL14A1/APP/CCDC80/HMCN1/LUM/ELN/FBLN1/PDGFRA/FBLN5
## GO:0043062 ADAMTS5/CST3/DPP4/MFAP4/LTBP4/MMP2/COL1A1/ADAMTSL1/COL3A1/SH3PXD2B/TNXB/COL1A2/RECK/ANTXR1/COL14A1/APP/CCDC80/HMCN1/LUM/ELN/FBLN1/PDGFRA/FBLN5
## GO:0045229 ADAMTS5/CST3/DPP4/MFAP4/LTBP4/MMP2/COL1A1/ADAMTSL1/COL3A1/SH3PXD2B/TNXB/COL1A2/RECK/ANTXR1/COL14A1/APP/CCDC80/HMCN1/LUM/ELN/FBLN1/PDGFRA/FBLN5
## GO:0031589 ACTG1/S100A10/CCN2/AXL/ITGA11/CD63/COL1A1/CD34/ITGBL1/COL3A1/PXN/TNXB/FBLN2/CARMIL1/ANTXR1/CD44/CCDC80/FBLN1/FBLN5
## GO:0085029 MFAP4/LTBP4/COL3A1/TNXB/COL1A2/ANTXR1/ELN/FBLN5
## GO:0048251 MFAP4/LTBP4/COL3A1/TNXB/FBLN5
## GO:0061448 TIMP1/CRIP1/EBF2/CCN2/ECM1/COL1A1/CD34/COL3A1/SH3PXD2B/ZEB1/CD44/PDGFD/GLI3
## GO:0071560 SMURF2/FBN1/HTRA3/LTBP4/CILP/COL1A1/COL3A1/PXN/COL1A2/ZEB1/PDGFD/IGF1R/FYN
## GO:0071230 MMP2/COL1A1/COL3A1/COL1A2/ZEB1/PDGFD/PDGFRA/FYN
## GO:0071559 SMURF2/FBN1/HTRA3/LTBP4/CILP/COL1A1/COL3A1/PXN/COL1A2/ZEB1/PDGFD/IGF1R/FYN
## Count
## GO:0030198 23
## GO:0043062 23
## GO:0045229 23
## GO:0031589 19
## GO:0085029 8
## GO:0048251 5
## GO:0061448 13
## GO:0071560 13
## GO:0071230 8
## GO:0071559 13
##
## $tempinduced
## ID
## GO:0006956 GO:0006956
## GO:0006959 GO:0006959
## GO:0051895 GO:0051895
## GO:0150118 GO:0150118
## GO:0006957 GO:0006957
## GO:1901889 GO:1901889
## GO:0001953 GO:0001953
## GO:0006958 GO:0006958
## GO:0032970 GO:0032970
## GO:0002455 GO:0002455
## Description
## GO:0006956 complement activation
## GO:0006959 humoral immune response
## GO:0051895 negative regulation of focal adhesion assembly
## GO:0150118 negative regulation of cell-substrate junction organization
## GO:0006957 complement activation, alternative pathway
## GO:1901889 negative regulation of cell junction assembly
## GO:0001953 negative regulation of cell-matrix adhesion
## GO:0006958 complement activation, classical pathway
## GO:0032970 regulation of actin filament-based process
## GO:0002455 humoral immune response mediated by circulating immunoglobulin
## GeneRatio BgRatio RichFactor FoldEnrichment zScore pvalue
## GO:0006956 5/34 65/18805 0.07692308 42.545249 14.279537 1.086271e-07
## GO:0006959 7/34 309/18805 0.02265372 12.529507 8.697049 1.110972e-06
## GO:0051895 3/34 16/18805 0.18750000 103.704044 17.491078 2.975747e-06
## GO:0150118 3/34 16/18805 0.18750000 103.704044 17.491078 2.975747e-06
## GO:0006957 3/34 18/18805 0.16666667 92.181373 16.471553 4.325377e-06
## GO:1901889 3/34 34/18805 0.08823529 48.801903 11.873037 3.109821e-05
## GO:0001953 3/34 40/18805 0.07500000 41.481618 10.907732 5.096604e-05
## GO:0006958 3/34 40/18805 0.07500000 41.481618 10.907732 5.096604e-05
## GO:0032970 6/34 392/18805 0.01530612 8.465636 6.357238 6.478737e-05
## GO:0002455 3/34 54/18805 0.05555556 30.727124 9.310188 1.257581e-04
## p.adjust qvalue geneID Count
## GO:0006956 0.0001175345 8.255658e-05 C1R/CFD/C1S/C3/CFH 5
## GO:0006959 0.0006010359 4.221694e-04 C1R/CFD/C1S/C3/CFH/CXCL12/CXCL14 7
## GO:0051895 0.0008049396 5.653920e-04 ARHGAP6/DLC1/APOD 3
## GO:0150118 0.0008049396 5.653920e-04 ARHGAP6/DLC1/APOD 3
## GO:0006957 0.0009360117 6.574574e-04 CFD/C3/CFH 3
## GO:1901889 0.0056080436 3.939106e-03 ARHGAP6/DLC1/APOD 3
## GO:0001953 0.0068931573 4.841774e-03 ARHGAP6/DLC1/APOD 3
## GO:0006958 0.0068931573 4.841774e-03 C1R/C1S/C3 3
## GO:0032970 0.0077888818 5.470934e-03 GSN/PDGFRB/ANK2/ARHGAP6/DLC1/CXCL12 6
## GO:0002455 0.0130178838 9.143800e-03 C1R/C1S/C3 3
##
## $induced
## ID Description GeneRatio
## GO:0019915 GO:0019915 lipid storage 8/56
## GO:0170062 GO:0170062 nutrient storage 8/56
## GO:0010889 GO:0010889 regulation of triglyceride storage 5/56
## GO:0030730 GO:0030730 triglyceride storage 5/56
## GO:0010883 GO:0010883 regulation of lipid storage 6/56
## GO:1905954 GO:1905954 positive regulation of lipid localization 7/56
## GO:1905952 GO:1905952 regulation of lipid localization 8/56
## GO:0034308 GO:0034308 primary alcohol metabolic process 6/56
## GO:0016042 GO:0016042 lipid catabolic process 9/56
## GO:0006641 GO:0006641 triglyceride metabolic process 6/56
## BgRatio RichFactor FoldEnrichment zScore pvalue
## GO:0019915 97/18805 0.08247423 27.695140 14.405703 4.329521e-10
## GO:0170062 97/18805 0.08247423 27.695140 14.405703 4.329521e-10
## GO:0010889 15/18805 0.33333333 111.934524 23.489785 5.725524e-10
## GO:0030730 18/18805 0.27777778 93.278770 21.406192 1.622525e-09
## GO:0010883 55/18805 0.10909091 36.633117 14.463207 1.371437e-08
## GO:1905954 113/18805 0.06194690 20.801991 11.538525 4.265558e-08
## GO:1905952 189/18805 0.04232804 14.213908 9.978128 8.442191e-08
## GO:0034308 103/18805 0.05825243 19.561373 10.323210 6.062461e-07
## GO:0016042 350/18805 0.02571429 8.634949 7.879764 8.478839e-07
## GO:0006641 110/18805 0.05454545 18.316558 9.954633 8.937458e-07
## p.adjust qvalue
## GO:0019915 2.467701e-07 1.777926e-07
## GO:0170062 2.467701e-07 1.777926e-07
## GO:0010889 2.467701e-07 1.777926e-07
## GO:0030730 5.244811e-07 3.778774e-07
## GO:0010883 3.546536e-06 2.555203e-06
## GO:1905954 9.192278e-06 6.622840e-06
## GO:1905952 1.559393e-05 1.123510e-05
## GO:0034308 9.798452e-05 7.059576e-05
## GO:0016042 1.155613e-04 8.325947e-05
## GO:0006641 1.155613e-04 8.325947e-05
## geneID Count
## GO:0019915 PPARG/ACACB/NRIP1/CIDEA/PNPLA2/PLIN5/LPL/ACVR1C 8
## GO:0170062 PPARG/ACACB/NRIP1/CIDEA/PNPLA2/PLIN5/LPL/ACVR1C 8
## GO:0010889 PPARG/CIDEA/PNPLA2/PLIN5/LPL 5
## GO:0030730 PPARG/CIDEA/PNPLA2/PLIN5/LPL 5
## GO:0010883 PPARG/ACACB/CIDEA/PNPLA2/PLIN5/LPL 6
## GO:1905954 PPARG/ACACB/CIDEA/ACSL1/PLIN5/ADIPOQ/LPL 7
## GO:1905952 PPARG/ACACB/CIDEA/ACSL1/PNPLA2/PLIN5/ADIPOQ/LPL 8
## GO:0034308 ADH1B/ALDH2/PNPLA2/PECR/RETSAT/LIPE 6
## GO:0016042 ACACB/CIDEA/PNPLA2/PLIN5/ADIPOQ/LIPE/LPL/PDE3B/PLAAT3 9
## GO:0006641 ACSL1/PNPLA2/PLIN5/LIPE/LPL/PLAAT3 6
Figure S6A needs to be calculated first. We obtain list with human transcription factors from public resource.
tfs <- read.table("https://humantfs.ccbr.utoronto.ca/download/v_1.01/TF_names_v_1.01.txt") %>%
pull(V1)
tf.filter <- intersect(names(res.filter), tfs)
cm.merged <- con$getJointCountMatrix() %>%
.[match(names(pseudotime), rownames(.)), colnames(.) %in% tf.filter] %>%
.[rowSums(.) > 0, ]
## Predict smoothend expression
pseudotime.both <- pseudotime[rownames(cm.merged)]
weights.both <- sds_obj@assays@data@listData$weights %>% .[match(names(pseudotime.both), rownames(.)), 1] + 1E-7
scFit <- cm.merged %>%
Matrix::t() %>%
tradeSeq::fitGAM(pseudotime = pseudotime.both, cellWeights = weights.both, verbose = T)
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
Smooth <- tradeSeq::predictSmooth(scFit, gene = colnames(cm.merged), tidy = F, n=100)
# Average across replicates and scale
Smooth <- t(scale(t(Smooth)))
# Seriate the results
Smooth <- Smooth[ seriation::get_order(seriation::seriate(Smooth, method="PCA_angle")), ]
# Omit MALAT1
Smooth %<>% .[!rownames(.) == "MALAT1", ]
col_fun = circlize::colorRamp2(c(-4, 0, 4), c("navy", "white", "firebrick"))
Heatmap(Smooth,
col = col_fun,
cluster_columns=F,
cluster_rows=F,
show_column_names = F,
row_names_gp = grid::gpar(fontsize = 10)
)
We need the sds_obj
object from Figure 6a.
con <- qread("con_adipocytes_aspc.qs", nthreads = 10)
spc <- con$getDatasetPerCell()
vpc <- spc %>%
as.character() %>%
strsplit("_|!!") %>%
sget(3) %>%
setNames(names(spc))
anno.adipocytes <- qread("anno_major.qs") %>%
.[. == "Adipocytes"] %>%
factor()
anno.aspc <- qread("anno_aspc.qs")
anno <- factor(c(anno.adipocytes, anno.aspc))
cm.merged <- con$getJointCountMatrix()
mat.df <- c("ZEB1", "EBF2", "PPARG") %>%
{cm.merged[, colnames(cm.merged) %in% .]} %>%
as.matrix() %>%
as.data.frame() %>%
mutate(.,
visit = unname(vpc)[match(rownames(.),
names(vpc)
)
] %>%
factor(labels = c("Visit 1", "Visit 2", "Visit 3")),
anno = unname(anno)[match(rownames(.),
names(anno)
)
],
sample = unname(spc)[match(rownames(.),
names(spc)
)
]
) %>%
filter(!is.na(anno))
pseudotime_all <- qread("sds_obj.qs", nthreads = 10)@assays@data@listData$pseudotime %>%
as.data.frame() %>%
filter(!is.na(Lineage1)) %>%
{setNames(.$Lineage1, rownames(.))}
mat.df %<>% .[match(names(pseudotime_all), rownames(.), nomatch = F), ]
pseudotime_all %<>% .[rownames(mat.df)]
mat.df2 <- mat.df %>%
mutate(pseudotime = pseudotime_all) %>%
mutate(bin = cut(pseudotime, breaks = c(0, 0.005, 0.01, 0.0165, 0.023))) %>%
na.omit() %>%
mutate(bin = as.character(bin) %>%
strsplit(",") %>%
sget(2) %>%
gsub("]", "", ., fixed = T)) %>%
select(-anno, -pseudotime) %>%
reshape2::melt(id.vars = c("visit", "bin", "sample")) %>%
filter(value > 0) %>%
group_by(visit, variable, bin, sample) %>%
mutate(value = as.numeric(value)) %>%
summarize(mm = mean(value)) %>%
mutate(sex = meta$sex[match(sample, meta$sample)]) %>%
mutate(sex_visit = paste(sex, gsub("Visit", "visit", visit), sep = " "))
## `summarise()` has grouped output by 'visit', 'variable', 'bin'. You can
## override using the `.groups` argument.
mat.df2 %>%
pull(variable) %>%
unique() %>%
as.character() %>%
lapply(\(vv) {
plot.dat <- mat.df2 %>%
filter(variable == vv) %>%
ungroup() %>%
mutate(sex = meta$sex[match(.$sample, meta$sample)]) %>%
mutate(sex_visit = paste(sex, visit, sep = " ") %>% gsub("Visit", "visit", .))
# Kruskal-Wallis
sex.stat <- plot.dat %$%
split(., sex) %>%
lapply(\(x) split(x, x$bin)) %>%
lapply(lapply, \(x) kruskal.test(x$mm, g = x$visit)$p.value) %>%
lapply(sapply, gtools::stars.pval) %>%
{lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>%
`names<-`(c("Female","Male")) %>%
lapply(\(x) data.frame(ct = names(x), sig = unname(x))) %>%
{lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>%
bind_rows() %>%
mutate(ct = factor(ct, levels = unique(plot.dat$bin))) %>%
arrange(ct) %>%
mutate(sig = gsub(".", "", sig, fixed = T))
# Wilcoxon
stat.test <- plot.dat %>%
group_by(bin, sex) %>%
wilcox_test(mm~sex_visit, paired = F) %>%
filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
add_xy_position(x = "bin", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05) %>% # Adjust line position
arrange(desc(y.position))
# Plot
ggplot(plot.dat, aes(bin, mm)) +
geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
color = "black", size = 0.5, alpha = 0.2) +
theme_bw() +
labs(x = "", y = "Mean AUC", fill = "", title = vv) +
theme(line = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
axis.text = element_text(color = "black")) +
scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
geom_text(inherit.aes = F, data = sex.stat, aes(ct, y = 1.4, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 0.2, hide.ns = F) # Wilcoxon
}) %>%
cowplot::plot_grid(plotlist = ., ncol = 3)
First, we need to integrate samples, calculate new diffusion map, and calculate new trajectory before plotting using the same genes identified for Loft cells only. These initial steps are not run here, load data below.
Create new Conos object
con.new = qread("con_adipocytes_aspc.qs", nthreads = 10)
anno <- getConosCluster(con.integration, "loft_transfer") %>%
.[. %in% c("Adipocytes", "FAPs")] %>%
gsub("FAPs", "ASPCs", .)
anno.hinte <- anno %>%
.[!grepl("!!", names(.))]
samples.processed <- con.integration$samples %>%
.[!grepl("vis", names(.))] %>%
lapply(\(sid) {
conos:::getRawCountMatrix(sid)
}) %>%
lapply(basicP2proc, n.cores = 32, get.largevis = F, get.tsne = F, make.geneknn = F)
con.new$addSamples(samples.processed)
con.new$buildGraph()
con.new$findCommunities()
con.new$embedGraph()
con.new$embedGraph(method = "UMAP")
Calculate diffusion map
data <- con.new
## Convert raw counts to Seurat
for (name in names(data$samples)) {
seu <- CreateSeuratObject(CreateAssayObject(counts = t(data$samples[[name]]$misc$rawCounts)))
seu$orig.ident <- substr(name, 0, regexpr("vis", name)-2)
seu$visit <- substr(name, regexpr("vis", name), nchar(name))
if (name == names(data$samples)[1]) {
final <- seu
} else {
final <- merge(final, seu)
}
}
## Merge annotation
final <- subset(final, cells = names(anno))
final <- NormalizeData(final)
anno.final <- anno[ match(colnames(final), names(anno))]
final$fine <- anno.final
final$coarse <- anno.final
final$dataset <- paste(final$orig.ident, final$visit, sep="_")
final <- subset(final, cells = names(which(!is.na(final$fine))))
full <- final
embeddings <- list()
for (nfeats in c(100)) {
# Recreate original object
final <- full
# ASPC
ASPC <- subset(final, coarse == "ASPCs")
VariableFeatures(ASPC) <- NULL
obj.list <- SplitObject(ASPC, split.by = "dataset")
feats_ASPC <- suppressWarnings(SelectIntegrationFeatures(obj.list, nfeatures = nfeats, verbose = FALSE))
# Adipocytes
Ad <- subset(final, coarse == "Adipocytes")
VariableFeatures(Ad) <- NULL
obj.list <- SplitObject(Ad, split.by = "dataset")
feats_Ad <- suppressWarnings(SelectIntegrationFeatures(obj.list, nfeatures = nfeats, verbose = FALSE))
# Combine
feats <- unique(c(feats_ASPC, feats_Ad))
## Embed
# PCA
VariableFeatures(final) <- feats
final <- ScaleData(final)
final <- RunPCA(final)
for (useHarmony in c(TRUE)) {
if (useHarmony) {
final <- suppressWarnings(RunHarmony(final, group.by.vars = "dataset", verbose = FALSE))
pcs <- final@reductions$harmony@cell.embeddings
} else {
pcs <- final@reductions$pca@cell.embeddings
}
for (dims in c(10)) {
dm <- destiny::DiffusionMap(pcs[,1:dims], verbose = FALSE, n_pcs = NA)
dm_ev <- dm@eigenvectors
colnames(dm_ev) <- paste("DC", c(1:20), sep="_")
embeddings[[(length(embeddings) + 1)]] <- dm_ev
names(embeddings)[length(embeddings)] <- paste(nfeats, useHarmony, dims, sep="_")
}
}
}
# Save embedding
qsave(embeddings[[1]], "diffusion_embedding.qs")
Create trajectory
# Calculate slingshot trajectory
emb <- qread("diffusion_embedding.qs")
anno.minor <- getConosCluster(con.integration, "loft_transfer_minor")
anno.sort <- anno.minor[rownames(emb)] %>%
.[!. %in% c("ATM", "cvEC", "Mast cells", "mono/mac", "Pericytes")]
sds_obj <- slingshot(emb[names(anno.sort), 1:2] %>%
`colnames<-`(c("UMAP1","UMAP2")),
anno.sort %>% unname(),
end.clus = "Adipocytes",
start.clus = "ASPC_DPP4")
qsave(sds_obj, "sds_obj_hinte.qs")
Load data
emb <- qread("diffusion_embedding.qs")
sds_obj <- qread("sds_obj_hinte.qs")
sds <- as.SlingshotDataSet(sds_obj)
res <- qread("pseudotime_res_novis2.qs", nthreads = 10) %>% .[!sapply(., is.null)]
# Here, we paste the order of genes so we don't have to calculate it
pseudoGeneOrder <- c("GPC3", "ITM2B", "CXCL14", "CXCL12", "FOS", "CD81", "B2M", "TMSB10", "SERPING1", "CLU", "CRIP1", "S100A4", "LUM", "ACTB", "S100A10", "LMNA", "C1S", "C3", "CD63", "C1R", "CST3", "IGFBP6", "PLAC9", "AEBP1", "S100A6", "MMP2", "TIMP2", "FYN", "SDC2", "UST", "FLRT2", "MEG8", "SMURF2", "AL110292.1", "PKD2", "MYO9A", "PALLD", "FTO", "MSC-AS1", "LDLRAD4", "TTC3", "MAP3K20", "FBXO11", "DIP2C", "ROCK2", "UPP2", "LINC02456", "PDXDC1", "ADIPOQ-AS1", "IGF1", "PLEKHH2", "C2CD2", "TBL1XR1", "SLC1A3", "C6", "MAP3K5", "RGS6", "LENG8")
Get genes
both <- res %>%
sapply(\(gene) {
ann <- gene$annova
(ann[2, "Pr(>Chisq)"] <= 0.05 & ann[3, "Pr(>Chisq)"] <= 0.05)
})
res.both <- res[both]
p.both <- res.both %>%
sapply(\(gene) gene$annova[3, "Pr(>Chisq)"])
tmp <- res.both %>%
lget("annova") %>%
lapply(dplyr::slice, 2:3) %>%
lapply(pull, AIC) %>%
.[sapply(., \(x) abs(x[2]) > abs(x[1]))] %>%
bind_rows() %>%
t() %>%
as.data.frame() %>%
setNames(c("reduced", "full")) %>%
mutate(diff = abs(full) - abs(reduced)) %>%
mutate(diff.frac = diff / abs(reduced))
res.filter <- res[
tmp %>%
filter(diff.frac >= 0.05) %>%
rownames()
]
anno <- getConosCluster(con.integration, "loft_transfer") %>%
.[. %in% c("Adipocytes", "FAPs")] %>%
gsub("FAPs", "ASPCs", .)
anno.hinte <- anno %>%
.[!grepl("!!", names(.))] %>%
factor()
pseudotime_hinte <- sds_obj@assays@data@listData$pseudotime[, 1] %>%
.[!is.na(.)] %>%
.[names(anno.hinte)[!anno.hinte %in% c("ATM", "cvEC", "Mast cells", "mono/mac", "Pericytes")]] %>%
.[!is.na(.)]
cm.merged.tmp <- con.integration$getJointCountMatrix() %>%
.[match(names(pseudotime_hinte), rownames(.)), colnames(.) %in% names(res.filter)]
visit = con.integration$getDatasetPerCell()[rownames(cm.merged.tmp)] %>%
as.character() %>%
.[!grepl("!!", .)] %>%
strsplit("_") %>%
sapply(\(x) if (length(x) == 1) x else x[2])
# t1
cm.merged.vis <- cm.merged.tmp[visit == "t1", ]
pseudotime.vis <- pseudotime_hinte[rownames(cm.merged.vis)]
weights.vis <- sds_obj@assays@data@listData$weights %>% .[match(names(pseudotime.vis), rownames(.)), 1] + 1E-7
scFit <- cm.merged.vis %>%
Matrix::t() %>%
tradeSeq::fitGAM(pseudotime = pseudotime.vis, cellWeights = weights.vis, verbose = TRUE)
Smooth3 <- tradeSeq::predictSmooth(scFit, gene = colnames(cm.merged.vis), tidy = FALSE, n=100)
# t0
cm.merged.vis <- cm.merged.tmp[visit == "t0", ]
pseudotime.vis <- pseudotime_hinte[rownames(cm.merged.vis)]
weights.vis <- sds_obj@assays@data@listData$weights %>% .[match(names(pseudotime.vis), rownames(.)), 1] + 1E-7
scFit <- cm.merged.vis %>%
Matrix::t() %>%
tradeSeq::fitGAM(pseudotime = pseudotime.vis, cellWeights = weights.vis, verbose = TRUE)
Smooth1 <- tradeSeq::predictSmooth(scFit, gene = colnames(cm.merged.vis), tidy = FALSE, n=100)
# lean
cm.merged.vis <- cm.merged.tmp[visit == "lean", ]
pseudotime.vis <- pseudotime_hinte[rownames(cm.merged.vis)]
weights.vis <- sds_obj@assays@data@listData$weights %>% .[match(names(pseudotime.vis), rownames(.)), 1] + 1E-7
scFit <- cm.merged.vis %>%
Matrix::t() %>%
tradeSeq::fitGAM(pseudotime = pseudotime.vis, cellWeights = weights.vis, verbose = TRUE)
Smoothl <- tradeSeq::predictSmooth(scFit, gene = colnames(cm.merged.vis), tidy = FALSE, n=100)
# Combine smooth
Smooth <- cbind(Smooth3, Smooth1, Smoothl)
# Average across replicates and scale
Smooth <- t(scale(t(Smooth)))
Smooth %<>%
.[pseudoGeneOrder, ]
# Split smooth
Smooth33 <- Smooth[, 1:100]
Smooth11 <- Smooth[, 101:200]
Smoothll <- Smooth[, 201:300]
# t0
# Seriate the results
col_fun = circlize::colorRamp2(c(-4, 0, 4), c("navy", "white", "firebrick"))
p3 <- Heatmap(Smooth33,
col = col_fun,
cluster_columns=F,
cluster_rows=F,
show_column_names = F,
show_row_names = F,
show_heatmap_legend = T
)
# t0
p1 <- Heatmap(Smooth11,
col = col_fun,
cluster_columns=F,
cluster_rows=F,
show_column_names = F,
show_row_names = F,
show_heatmap_legend = F
)
# lean
gene.highlights <- c("CXCL14", "CXCL12", "CD81", "SERPING1", "CLU", "C1S", "C3", "C1R", "MMP2", "TIMP2", "FTO", "MAP3K20", "PLEKHH2", "TBL1XR1", "C6", "MAP3K5") %>%
{which(rownames(Smoothll) %in% .)}
row_anno <- rowAnnotation(foo = anno_mark(at = gene.highlights,
labels = rownames(Smoothll)[gene.highlights],
side = "left"))
pl <- Heatmap(Smoothll,
col = col_fun,
left_annotation = row_anno,
cluster_columns=F,
cluster_rows=F,
show_column_names = F,
show_row_names = F,
show_heatmap_legend = F
)
pl
p1
p3
tmp.regulons <- qread("regulons.qs")
pseudotime_all <- sds_obj@assays@data@listData$pseudotime %>%
as.data.frame() %>%
filter(!is.na(Lineage1)) %>%
{setNames(pull(., Lineage1), rownames(.))} %>%
.[names(tmp.regulons[[1]])]
spc <- con.integration$getDatasetPerCell()
tmp.regulons$PPARG %>%
{data.frame(value = unname(.), row.names = names(.))} %>%
mutate(sample = spc[match(rownames(.), names(spc))] %>% unname()) %>%
mutate(pseudo = pseudotime_all) %>%
mutate(bin = cut(pseudo, breaks = c(0, 0.01, 0.0165, 0.02, 0.026))) %>%
na.omit() %>%
mutate(bin = as.character(bin) %>%
strsplit(",") %>%
sget(2) %>%
gsub("]", "", ., fixed = TRUE)) %>%
mutate(visit = as.character(sample) %>%
strsplit("_") %>%
sapply(\(x) if (length(x) == 3) x[3] else if (length(x) == 2) x[2] else x),
sex = meta$sex[match(sample, meta$sample)],
study = ifelse(grepl("!!", rownames(.)), "Loft", "Hinte")) %>%
mutate(sex_visit = paste(sex, visit, sep = "_") %>%
gsub("NA_", "", .) %>%
factor() %>%
factor(levels = c(levels(.)[c(1:3, 5:9, 4)]))) %>%
arrange(bin) %>%
mutate(bin = factor(bin, labels = c("Early preadipocytes", "Mid preadipocytes", "Late preadipocytes", "Adipocyte"))) %>%
filter(study == "Hinte") %>%
group_by(bin, sex_visit, study, sample) %>%
summarize(mean_auc = mean(value)) %>%
mutate(sex_visit = factor(sex_visit, levels = c("lean", "t0", "t1"))) %>%
group_by(bin) %>% # Addition
mutate(mean_auc = mean_auc - mean(mean_auc[sex_visit == "t0"])) %>% # Addition
filter(!sex_visit == "t0") %>% # Addition
ggplot(aes(bin, mean_auc)) +
stat_summary(aes(fill = sex_visit), fun = "mean", position = position_dodge(width = 0.8), geom = "bar") +
geom_point(aes(col = sex_visit), position = position_dodge(width = 0.8)) +
theme_bw() +
theme(line = element_blank(),
axis.text = element_text(color = "black"),
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
scale_fill_manual(values = c(brewer.pal(9, "Greys")[c(3,6,9)])) +
scale_color_manual(values = c(brewer.pal(9, "Purples")[c(9,7,5)])) +
labs(title = "PPARG regulon activity", y = "Change from obese (t0) mean AUC", x = "", fill = "", col = "")
## `summarise()` has grouped output by 'bin', 'sex_visit', 'study'. You can
## override using the `.groups` argument.
These plots are presented in Liana.ipynb
.
Time to knit
Sys.time() - tt
## Time difference of 50.88765 mins
sessionInfo()
## R version 4.5.1 (2025-06-13)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.2 LTS
##
## Matrix products: default
## BLAS/LAPACK: /opt/intel/oneapi/mkl/2025.1/lib/libmkl_gf_lp64.so.2; LAPACK version 3.12.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=C
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Berlin
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 grid stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] glmnet_4.1-8 RColorBrewer_1.1-3
## [3] corrplot_0.95 sccore_1.0.5
## [5] cowplot_1.1.3 circlize_0.4.16
## [7] slingshot_2.16.0 TrajectoryUtils_1.16.1
## [9] SingleCellExperiment_1.30.1 SummarizedExperiment_1.38.1
## [11] Biobase_2.68.0 GenomicRanges_1.60.0
## [13] GenomeInfoDb_1.44.0 IRanges_2.42.0
## [15] S4Vectors_0.46.0 BiocGenerics_0.54.0
## [17] generics_0.1.4 MatrixGenerics_1.20.0
## [19] matrixStats_1.5.0 princurve_2.1.6
## [21] harmony_1.2.3 Rcpp_1.0.14
## [23] Seurat_5.3.0 SeuratObject_5.1.0
## [25] sp_2.2-0 destiny_3.22.0
## [27] ComplexHeatmap_2.24.0 ggpubr_0.6.0
## [29] rstatix_0.7.2 ggplot2_3.5.2
## [31] cacoa_0.4.0 scHelper_0.0.5
## [33] pagoda2_1.0.12 magrittr_2.0.3
## [35] dplyr_1.1.4 qs_0.27.3
## [37] conos_1.5.2 igraph_2.1.4
## [39] Matrix_1.7-3
##
## loaded via a namespace (and not attached):
## [1] R.methodsS3_1.8.2 dichromat_2.0-0.1
## [3] nnet_7.3-20 goftest_1.2-3
## [5] Biostrings_2.76.0 vctrs_0.6.5
## [7] ggtangle_0.0.6 spatstat.random_3.4-1
## [9] RApiSerialize_0.1.4 digest_0.6.37
## [11] png_0.1-8 shape_1.4.6.1
## [13] proxy_0.4-27 registry_0.5-1
## [15] ggrepel_0.9.6 deldir_2.0-4
## [17] parallelly_1.44.0 magick_2.8.6
## [19] MASS_7.3-65 reshape2_1.4.4
## [21] httpuv_1.6.16 foreach_1.5.2
## [23] qvalue_2.40.0 withr_3.0.2
## [25] psych_2.5.3 xfun_0.52
## [27] ggfun_0.1.8 survival_3.8-3
## [29] memoise_2.0.1 hexbin_1.28.5
## [31] gson_0.1.0 clusterProfiler_4.16.0
## [33] ggsci_3.2.0 tidytree_0.4.6
## [35] zoo_1.8-14 GlobalOptions_0.1.2
## [37] gtools_3.9.5 pbapply_1.7-2
## [39] R.oo_1.27.1 DEoptimR_1.1-3-1
## [41] Formula_1.2-5 KEGGREST_1.48.0
## [43] promises_1.3.2 scatterplot3d_0.3-44
## [45] httr_1.4.7 globals_0.18.0
## [47] fitdistrplus_1.2-2 stringfish_0.16.0
## [49] rstudioapi_0.17.1 UCSC.utils_1.4.0
## [51] miniUI_0.1.2 DOSE_4.2.0
## [53] curl_6.2.2 ca_0.71.1
## [55] polyclip_1.10-7 quadprog_1.5-8
## [57] GenomeInfoDbData_1.2.14 SparseArray_1.8.0
## [59] RcppEigen_0.3.4.0.2 xtable_1.8-4
## [61] stringr_1.5.1 doParallel_1.0.17
## [63] fastMatMR_1.2.5 evaluate_1.0.3
## [65] S4Arrays_1.8.0 irlba_2.3.5.1
## [67] colorspace_2.1-1 ROCR_1.0-11
## [69] reticulate_1.42.0 spatstat.data_3.1-6
## [71] lmtest_0.9-40 viridis_0.6.5
## [73] ggtree_3.16.0 later_1.4.2
## [75] lattice_0.22-7 spatstat.geom_3.4-1
## [77] future.apply_1.11.3 robustbase_0.99-4-1
## [79] scattermore_1.2 triebeard_0.4.1
## [81] RcppAnnoy_0.0.22 xts_0.14.1
## [83] class_7.3-23 pillar_1.10.2
## [85] nlme_3.1-168 iterators_1.0.14
## [87] compiler_4.5.1 RSpectra_0.16-2
## [89] stringi_1.8.7 TSP_1.2-4
## [91] tensor_1.5 plyr_1.8.9
## [93] drat_0.2.5 crayon_1.5.3
## [95] abind_1.4-8 gridGraphics_0.5-1
## [97] locfit_1.5-9.12 org.Hs.eg.db_3.21.0
## [99] bit_4.6.0 pcaMethods_2.0.0
## [101] fastmatch_1.1-6 tradeSeq_1.22.0
## [103] codetools_0.2-20 TTR_0.24.4
## [105] bslib_0.9.0 e1071_1.7-16
## [107] GetoptLong_1.0.5 ggplot.multistats_1.0.1
## [109] plotly_4.10.4 mime_0.13
## [111] splines_4.5.1 fastDummies_1.7.5
## [113] sparseMatrixStats_1.20.0 brew_1.0-10
## [115] N2R_1.0.3 knitr_1.50
## [117] blob_1.2.4 utf8_1.2.5
## [119] clue_0.3-66 fs_1.6.6
## [121] listenv_0.9.1 DelayedMatrixStats_1.30.0
## [123] ggsignif_0.6.4 ggplotify_0.1.2
## [125] tibble_3.2.1 statmod_1.5.0
## [127] pkgconfig_2.0.3 tools_4.5.1
## [129] Rook_1.2 cachem_1.1.0
## [131] RSQLite_2.3.11 viridisLite_0.4.2
## [133] smoother_1.3 DBI_1.2.3
## [135] fastmap_1.2.0 rmarkdown_2.29
## [137] scales_1.4.0 pbmcapply_1.5.1
## [139] ica_1.0-3 broom_1.0.8
## [141] sass_0.4.10 patchwork_1.3.0
## [143] dotCall64_1.2 carData_3.0-5
## [145] RANN_2.6.2 farver_2.1.2
## [147] mgcv_1.9-3 yaml_2.3.10
## [149] ggthemes_5.1.0 cli_3.6.5
## [151] purrr_1.0.4 UCell_2.12.0
## [153] lifecycle_1.0.4 uwot_0.2.3
## [155] backports_1.5.0 BiocParallel_1.42.0
## [157] gtable_0.3.6 rjson_0.2.23
## [159] ggridges_0.5.6 progressr_0.15.1
## [161] limma_3.64.0 parallel_4.5.1
## [163] ape_5.8-1 edgeR_4.6.2
## [165] jsonlite_2.0.0 seriation_1.5.7
## [167] RcppHNSW_0.6.0 bit64_4.6.0-1
## [169] Rtsne_0.17 yulab.utils_0.2.0
## [171] coda.base_1.0.0 BiocNeighbors_2.2.0
## [173] spatstat.utils_3.1-4 ranger_0.17.0
## [175] urltools_1.7.3 RcppParallel_5.1.10
## [177] jquerylib_0.1.4 GOSemSim_2.34.0
## [179] spatstat.univar_3.1-3 R.utils_2.13.0
## [181] lazyeval_0.2.2 shiny_1.10.0
## [183] htmltools_0.5.8.1 enrichplot_1.28.2
## [185] GO.db_3.21.0 sctransform_0.4.2
## [187] glue_1.8.0 spam_2.11-1
## [189] XVector_0.48.0 VIM_6.2.2
## [191] treeio_1.32.0 mnormt_2.1.1
## [193] RMTstat_0.3.1 gridExtra_2.3
## [195] boot_1.3-31 R6_2.6.1
## [197] tidyr_1.3.1 DESeq2_1.48.1
## [199] vcd_1.4-13 labeling_0.4.3
## [201] cluster_2.1.8.1 aplot_0.2.5
## [203] DelayedArray_0.34.1 tidyselect_1.2.1
## [205] dendsort_0.3.4 car_3.1-3
## [207] AnnotationDbi_1.70.0 future_1.49.0
## [209] leidenAlg_1.1.5 KernSmooth_2.23-26
## [211] laeken_0.5.3 data.table_1.17.2
## [213] htmlwidgets_1.6.4 fgsea_1.34.0
## [215] rlang_1.1.6 spatstat.sparse_3.1-0
## [217] spatstat.explore_3.4-3 Cairo_1.6-2