Setup

library(conos)
library(qs)
library(dplyr)
library(magrittr)
library(pagoda2)
library(scHelper) # github.com/rrydbirk/scHelper
library(cacoa)

## Warning: replacing previous import 'ape::where' by 'dplyr::where' when loading
## 'cacoa'

library(ggplot2)
library(rstatix)
library(ggpubr)
library(ComplexHeatmap)
library(destiny)
library(Seurat)
library(harmony)
library(slingshot)
library(circlize)
library(cowplot)
library(sccore)
library(corrplot)
library(RColorBrewer)
library(glmnet)

meta <- qread("meta.qs")

# Palettes
pal.major <- RColorBrewer::brewer.pal(9, "Set1") %>% 
  setNames(c("Adipocytes", "Myeloid immune cells", "ASPCs", "ECs", "SMCs", "Lymphoid immune cells", "Mast cells", "Lymphatic ECs", "Pericytes")) %>% 
  {setNames(c(., ., sapply(unname(.), \(color) {
    hsv_vals <- rgb2hsv(col2rgb(color))
    new_hues <- hsv(pmin(1, hsv_vals[1] * 0.9), hsv_vals[2], hsv_vals[3])
  })), c(names(.), paste0(names(.), ", female"), paste0(names(.), ", male"))
  )} %>% 
  {setNames(c(., .[c("Lymphoid immune cells", "Myeloid immune cells")]), c(names(.), c("Lymphoid ICs", "Myeloid ICs")))}

tt <- Sys.time()

Figure 1

Load data

con.major <- qread("con_major.qs", nthreads = 10)
anno.major <- qread("anno_major.qs")

Figures 1b-1f

These figures were made in GraphPad Prism.

Figure 1g

con.major$plotGraph(groups = anno.major, 
                    plot.na = FALSE, 
                    size = 0.1,
                    alpha = 0.1, 
                    embedding = "UMAP_refined7",
                    show.labels = TRUE, 
                    font.size = 5) + 
  labs(x = "UMAP1", y = "UMAP2") + 
  theme(line = element_blank()) + 
  scale_color_manual(values = RColorBrewer::brewer.pal(9, "Set1"))

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Figure 1h

cm.merged <- con.major$getJointCountMatrix() %>% 
  .[rownames(.) %in% names(anno.major), ]

c("ADIPOQ", "PLIN1", # Adipocytes
  "PDGFRA", "DCN", # ASPCs
  "MECOM", "ADGRL4", # ECs
  "PKHD1L1", "PROX1", # LECs
  "MYOCD", "MYH11", # SMCs
  "STEAP4", "GIPR", # Pericytes
  "IL7R", "CD3E", # Lymphoid
  "KIT", "CPA3", # Mast
  "MRC1", "F13A1") %>% # Myeloid
  sccore::dotPlot(., 
                  cm.merged, 
                  factor(anno.major[rownames(cm.merged)], 
                         levels = c("Adipocytes", 
                                    "ASPCs", 
                                    "ECs", 
                                    "Lymphatic ECs", 
                                    "SMCs", 
                                    "Pericytes", 
                                    "Lymphoid immune cells", 
                                    "Mast cells", 
                                    "Myeloid immune cells")), 
                  gene.order = ., 
                  cols = c("white","grey20"))

Figure 1i

# Use Cacoa to calculate proportions
cao <- Cacoa$new(data.object = con.major, 
                 cell.groups = anno.major, 
                 ref.level = "vis2", 
                 target.level = "vis3", 
                 sample.groups = con.major$samples %>% 
                   names() %>% 
                   strsplit("_") %>% 
                   sapply('[[', 3) %>% 
                   gsub(1, 2, .) %>% 
                   `names<-`(con.major$samples %>% 
                               names()),
                 sample.groups.palette = ggsci::pal_jama()(7))

cao$sample.groups <- con.major$samples %>% 
  names() %>% 
  strsplit("_") %>% 
  sapply('[[', 3) %>% 
  `names<-`(con.major$samples %>% 
              names()) %>% 
  gsub("vis", "Visit ", .)

p <- cao$plotCellGroupSizes(show.significance = TRUE, 
                            filter.empty.cell.types = FALSE)

plot.dat <- p$data %>% 
  mutate(sex = rep(meta$sex, 9)) %>% 
  mutate(sample = stringi::stri_dup(LETTERS[seq(14)], 3) %>% 
           paste(collapse = "") %>% 
           strsplit("") %>% 
           unlist() %>% 
           rep(9)) %>% 
  mutate(sex_visit = paste(sex, group, sep = " ") %>% 
           gsub("Vis", "vis", .),
         facet = ifelse(variable %in% c("SMCs", "Mast cells", "Lymphatic ECs", "Pericytes", "Lymphoid immune cells"), "low", "high"),
         variable = factor(variable, levels = c("Adipocytes", "ASPCs", "ECs", "Myeloid immune cells", "Lymphatic ECs", "SMCs", "Pericytes", "Lymphoid immune cells", "Mast cells")) %>% 
           renameAnnotation("Myeloid immune cells", "   Myeloid immune cells")) %>% 
  mutate(sex_visit_anno = paste0(sex_visit, "_", variable))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$variable)) %>%
  lapply(lapply, \(x) kruskal.test(x$value, g = x$group)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(ct = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(ct = factor(ct, levels = unique(plot.dat$variable))) %>% 
  arrange(ct) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T),
         facet = ifelse(ct %in% c("SMCs", "Mast cells", "Lymphatic ECs", "Pericytes", "Lymphoid immune cells"), "low", "high"))

# Wilcoxon
stat.test <- plot.dat %>%
  dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
  group_by(var, sex) %>%
  wilcox_test(value~sex_visit, paired = T) %>%
  mutate(., p.adj.signif = apply(., 1, \(x) if(is.na(x[10])) "" else if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>% 
  filter(p.adj.signif != "ns", !is.na(p), !var %in% c("Adipocytes", "Lymphoid immune cells")) %>% # Remove those not significant for KW
  add_xy_position(x = "var", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05) %>% # Adjust line position
  arrange(desc(y.position)) %>% 
  mutate(facet = ifelse(var %in% c("SMCs","Mast cells","Lymphatic ECs","Pericytes","Lymphoid immune cells"), "low", "high"))

# Plot
p.high <- plot.dat %>% 
  filter(facet == "high") %>% 
  ggplot(aes(variable, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "% cells per sample") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 90, vjust = 0, hjust = 1)) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat %>% filter(facet == "high"), aes(ct, y = 65, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test %>% filter(facet == "high"), label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) + # Wilcoxon
  guides(fill = "none")

x.cor <- 4

p.low <- plot.dat %>% 
  filter(facet == "low") %>%
  ggplot(aes(variable, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "", fill = "") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 90, vjust = 0, hjust = 1)) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = FALSE, data = sex.stat %>% filter(facet == "low"), aes(ct, y = 15, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test %>% filter(facet == "low") %>% mutate(x = x-x.cor, xmin = xmin-x.cor, xmax = xmax-x.cor), label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 2, hide.ns = FALSE) # Wilcoxon

pp <- plot_grid(plotlist = list(p.high, p.low), rel_widths = c(1,1.5))

pp

Figure 1k

size_data <- read.csv("Adipocyte_size.csv", sep = ",", header = TRUE)
sample_colours <- c("#F8B3BB", "#BD6563", "#7D2A25", "#ABD1D4", "#62649E", "#3E4282")

patient_averages <- size_data %>%
  group_by(ID, Visit, Sex) %>%
  summarise(Average_Diameter = mean(Diameter, na.rm = TRUE)) %>%
  ungroup() %>% 
  mutate(sex_visit = interaction(Visit, Sex) %>% 
           factor(labels = c("Female Visit 1", "Female Visit 2", "Female Visit 3", "Male Visit 1", "Male Visit 2", "Male Visit 3")))

## `summarise()` has grouped output by 'ID', 'Visit'. You can override using the
## `.groups` argument.

plot <- ggplot(patient_averages, aes(x = sex_visit, 
                                     y = Average_Diameter, 
                                     fill = sex_visit)) +
  geom_boxplot(position = position_dodge(width = 0.75), outlier.shape = NA) +
  geom_jitter(size = 1, width = 0.2, alpha = 0.3, show.legend = F) +  # Add individual points
  theme_bw() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"), 
    text = element_text(family = "serif", size = 12),
    line = element_blank(),
    axis.text.x = element_blank()
  ) +
  scale_fill_manual(values = sample_colours) +
  theme(rect = element_rect(fill = "transparent")) +
  labs(fill = "", x = "", y = "Diameter (µm)")

plot

Figure 1m

# Load the CSV file
CLS <- read.delim("Human_WL_CLS_density_for_plotting.csv", sep = ",")

names(CLS)[names(CLS) == "CLS_density_.number_per_mm3."] <- "CLS_density"

# Define a helper function to map p-values to stars
pval_to_stars <- function(p) {
  if (p < 0.0001) {
    return("****")
  } else if (p < 0.001) {
    return("***")
  } else if (p < 0.01) {
    return("**")
  } else if (p < 0.05) {
    return("*")
  } else {
    return("ns")  # Not significant
  }
}

# Perform a Kruskal-Wallis test separately for each sex
kw_res_F <- kruskal.test(CLS_density ~ Visit, data = CLS %>% filter(Sex == "F"))
kw_res_M <- kruskal.test(CLS_density ~ Visit, data = CLS %>% filter(Sex == "M"))

# Extract Kruskal-Wallis p-values
kw_pval_F <- kw_res_F$p.value
kw_pval_M <- kw_res_M$p.value

# Perform paired Wilcoxon tests for each sex with Holm's correction
paired_wilcox <- function(CLS, sex_group) {
  data_sex <- CLS %>% filter(Sex == sex_group)
  pairwise_tests <- list(
    visit1_vs_visit2 = wilcox.test(
      x = data_sex %>% filter(Visit == "1") %>% pull(CLS_density),
      y = data_sex %>% filter(Visit == "2") %>% pull(CLS_density),
      paired = F
    )$p.value,
    visit1_vs_visit3 = wilcox.test(
      x = data_sex %>% filter(Visit == "1") %>% pull(CLS_density),
      y = data_sex %>% filter(Visit == "3") %>% pull(CLS_density),
      paired = F
    )$p.value,
    visit2_vs_visit3 = wilcox.test(
      x = data_sex %>% filter(Visit == "2") %>% pull(CLS_density),
      y = data_sex %>% filter(Visit == "3") %>% pull(CLS_density),
      paired = F
    )$p.value
  )
  corrected_pvals <- p.adjust(unlist(pairwise_tests), method = "holm")
  return(corrected_pvals)
}

wilcox_pvals_F <- paired_wilcox(CLS, "F")

## Warning in wilcox.test.default(x = data_sex %>% filter(Visit == "1") %>% :
## cannot compute exact p-value with ties

## Warning in wilcox.test.default(x = data_sex %>% filter(Visit == "2") %>% :
## cannot compute exact p-value with ties

wilcox_pvals_M <- paired_wilcox(CLS, "M")

## Warning in wilcox.test.default(x = data_sex %>% filter(Visit == "1") %>% :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = data_sex %>% filter(Visit == "1") %>% :
## cannot compute exact p-value with ties

# Map the Wilcoxon p-values to stars
wilcox_stars_F <- sapply(wilcox_pvals_F, pval_to_stars)
wilcox_stars_M <- sapply(wilcox_pvals_M, pval_to_stars)


plot <- ggplot(CLS, aes(x = interaction(Visit, Sex), y = CLS_density, fill = interaction(Visit, Sex))) +
  geom_boxplot(outlier.shape = NA) +  # Remove outliers
  geom_jitter(color = "grey", size = 1, width = 0.2, alpha = 0.5) +  # Add individual points
  labs(title = "CLS Density Across Visits and Genders", x = "Visit and Sex", y = "# CLS per mm3") +
  theme_bw() +
  theme(plot.title = element_text(size = 10)) +
  geom_signif(
    comparisons = list(
      c("1.F", "2.F"), 
      c("1.F", "3.F"), 
      c("2.F", "3.F"),
      c("1.M", "2.M"), 
      c("1.M", "3.M"), 
      c("2.M", "3.M")
    ),
    annotations = c(
      wilcox_stars_F["visit1_vs_visit2"],
      wilcox_stars_F["visit1_vs_visit3"],
      wilcox_stars_F["visit2_vs_visit3"],
      wilcox_stars_M["visit1_vs_visit2"],
      wilcox_stars_M["visit1_vs_visit3"],
      wilcox_stars_M["visit2_vs_visit3"]
    ),
    textsize = 4,
    map_signif_level = FALSE
  )


# Add Kruskal-Wallis p-values as text
plot <- plot + 
  annotate("text", x = 1, y = max(CLS$CLS_density) * 1.1, 
           label = paste("KW Female p-value:", format(kw_pval_F, digits = 3)), 
           hjust = 0, size = 3) +
  annotate("text", x = 4, y = max(CLS$CLS_density) * 1.1, 
           label = paste("KW Male p-value:", format(kw_pval_M, digits = 3)), 
           hjust = 0, size = 3) +
  scale_fill_manual(values = sample_colours)

plot

## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded

## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded

Figure 1o

size_data <- read.csv("2D_histology_averaged_data_output.csv",sep = ",", header = TRUE)
condition_levels <- c("Female Visit 1", "Female Visit 2", "Female Visit 3", "Male Visit 1", "Male Visit 2", "Male Visit 3")

# 1. Calculate per-patient averages for each visit
patient_averages <- size_data %>%
  group_by(Donor, Visit, Sex) %>%
  summarise(Vascular_Count = mean(Vascular_count_per_area, na.rm = TRUE)) %>%
  ungroup() %>% 
  mutate(sex_visit = interaction(Visit, Sex) %>% 
           factor(labels = c("Female Visit 1", "Female Visit 2", "Female Visit 3", "Male Visit 1", "Male Visit 2", "Male Visit 3")))

## `summarise()` has grouped output by 'Donor', 'Visit'. You can override using
## the `.groups` argument.

# 3. Create the boxplot
plot <- ggplot(patient_averages, aes(x = sex_visit, y = Vascular_Count, fill = sex_visit)) +
  geom_boxplot(position = position_dodge(width = 0.75), outlier.shape = NA) +
  geom_jitter(size = 1, width = 0.2, alpha = 0.3) +  # Add individual points
  theme_bw() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"), 
    text = element_text(family = "serif", size = 12),
    axis.text.x = element_blank(),
    line = element_blank()
  ) +
  scale_fill_manual(values = sample_colours) +
  labs(x = "", fill = "", y = "Blood vessels per m2")

plot

Figure 2

Load data

con <- qread("con_vascular.qs", nthreads = 10)
anno.vascular <- qread("anno_vascular.qs")

# For 2d,e
cm.bulk <- read.delim("Bulk_norm_counts.txt", header = T) %>% 
  .[match(unique(.$hgnc_symbol), .$hgnc_symbol), ] %>% 
  `rownames<-`(.$hgnc_symbol) %>% 
  select(-hgnc_symbol, -ensembl_gene_id, -entrezgene_id) %>% 
  scale()

colnames(cm.bulk) %<>% 
  gsub("D", "Donor_", .) %>% 
  gsub("V", "vis", .)

# Metadata
metadata.all <- read.delim("Meta_Data_WL_Select.txt", h=T, dec = ",") %>% 
  mutate(sex = factor(sex, labels = c("Female", "Male")),
         donor = gsub("D", "Donor_", recordid) %>% 
           gsub("V", "vis", .))

Figure 2a

con$plotGraph(groups = anno.vascular, 
              plot.na = FALSE, 
              size = 0.2,
              alpha = 1, 
              embedding = "UMAP", 
              shuffle.colors = TRUE, 
              show.labels = TRUE, 
              font.size = 5) + 
  labs(x = "UMAP1", y = "UMAP2") + 
  theme(line = element_blank()) +
  scale_colour_manual(values = RColorBrewer::brewer.pal(9, "Purples")[-c(1,2)][c(1,4,7,2,6,3,5)])

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Figure 2b

cm.merged <- con$getJointCountMatrix() %>% 
  .[rownames(.) %in% names(anno.vascular), ]

c("PCSK5", "NEBL", "FN1", "CADM2", "BTNL9", "CEACAM1", "IL1R1", "VCAN", "ACKR1", "PKHD1L1", "MMRN1", "RELN", "MYH11", "ACTA2", "MYOCD", "COL25A1", "STEAP4", "NCKAP5") %>%  
  sccore::dotPlot(., 
                  cm.merged, 
                  anno.vascular %>% factor(levels = c("arEC", "cEC", "cvEC", "vEC", "lEC", "SMCs", "Pericytes")), 
                  gene.order = ., 
                  cols = c("white","purple3"))

Figure 2c

Here, we’re plotting proportions of total cells, so we load the Conos object and the annotation for all cells first.

con.major <- qread("con_major.qs", nthreads = 10)
anno.major <- qread("anno_major.qs")

# Use Cacoa to calculate proportions
cao <- Cacoa$new(data.object = con.major, 
                 cell.groups = anno.major, 
                 ref.level = "vis2", 
                 target.level = "vis3",
                 sample.groups = con.major$samples %>% 
                   names() %>% 
                   strsplit("_") %>% 
                   sapply('[[', 3) %>% 
                   gsub(1, 2, .) %>% 
                   `names<-`(con.major$samples %>% 
                               names()),
                 sample.groups.palette = ggsci::pal_jama()(7))

cao$sample.groups <- con.major$samples %>% 
  names() %>% 
  strsplit("_") %>% 
  sapply('[[', 3) %>% 
  `names<-`(con.major$samples %>% 
              names()) %>% 
  gsub("vis", "Visit ", .)

anno.comb <- anno.major[!names(anno.major) %in% names(anno.vascular)] %>% 
  {factor(c(., anno.vascular))}

p <- cao$plotCellGroupSizes(cell.groups = anno.comb,
                            show.significance = FALSE, 
                            filter.empty.cell.types = FALSE)

plot.dat <- p$data %>% 
  filter(variable %in% levels(anno.vascular)) %>% 
  mutate(sex = rep(meta$sex, anno.vascular %>% levels() %>% length())) %>% 
  mutate(sex_visit = paste(sex, group, sep = " ") %>% gsub("Vis", "vis", .),
         facet = ifelse(variable %in% c("cvEC", "lEC"), "low", "high"),
         variable = factor(variable, levels = c("arEC", "cEC", "vEC", "SMCs", "Pericytes", "cvEC", "lEC")) %>% 
           renameAnnotation("lEC", "         lEC"))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$variable)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$group)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(ct = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(ct = factor(ct, levels = unique(plot.dat$variable))) %>% 
  arrange(ct) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T),
         facet = ifelse(ct %in% c("cvEC", "         lEC"), "low", "high"))

# Wilcoxon
stat.test <- plot.dat %>%
  dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
  group_by(var, sex) %>%
  wilcox_test(value~sex_visit, paired = T) %>%
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "var", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05) %>% # Adjust line position
  arrange(desc(y.position)) %>% 
  mutate(y.position = c(y.position[1]+1, y.position[2], y.position[3]+1, y.position[4], y.position[5]+1, y.position[6:7]),  # Manually adjust overlapping lines
         facet = ifelse(var %in% c("cvEC", "         lEC"), "low", "high"))

# Plot
p.high <- ggplot(plot.dat %>% filter(facet == "high"), aes(variable, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "% cells per sample") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat %>% filter(facet == "high"), aes(ct, y = 22.5, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test %>% filter(facet == "high"), label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) + # Wilcoxon
  guides(fill = "none")

p.low <- ggplot(plot.dat %>% filter(facet == "low"), aes(variable, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "", fill = "") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat %>% filter(facet == "low"), aes(ct, y = 4.5, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test %>% filter(facet == "low"), label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) # Wilcoxon

plot_grid(plotlist = list(p.high, p.low), rel_widths = c(1,0.8))

Figures 2d,e

Preparations

# Use Cacoa to calculate proportions
cao <- Cacoa$new(data.object = con.major, 
                 cell.groups = anno.major, 
                 ref.level = "vis2", 
                 target.level = "vis3", 
                 sample.groups = con.major$samples %>% 
                   names() %>% 
                   strsplit("_") %>% 
                   sapply('[[', 3) %>% 
                   gsub(1, 2, .) %>% 
                   `names<-`(con.major$samples %>% 
                               names()),
                 sample.groups.palette = ggsci::pal_jama()(7))

cao$sample.groups <- con.major$samples %>% 
  names() %>% 
  strsplit("_") %>% 
  sapply('[[', 3) %>% 
  `names<-`(con.major$samples %>% 
              names()) %>% 
  gsub("vis", "Visit ", .)

anno.comb <- anno.major[!names(anno.major) %in% names(anno.vascular)] %>% 
  {factor(c(., anno.vascular))}

Here, we’re showing the preparations to create models for prediction. This takes some time and is not run again.

# Get markers
markers.vascular.all <- con.major$getDifferentialGenes(groups = anno.comb, z.threshold = 1, upregulated.only = T, verbose = T)

# vEC
# Get proportions
proportions <- cao$.__enclos_env__$private$extractCodaData(cell.groups = anno.comb, ret.groups = F) %>% 
  {100 * ./rowSums(.)} %>% 
  as.data.frame() %$% 
  setNames(vEC, rownames(.))

# Extract markers. Please note, the marker metrics may change a little in each iteration
markers.all <- markers.vascular.all$vEC %>% 
  filter(Specificity > 0.9,
         AUC > 0.65) %>% 
  arrange(desc(AUC)) %>% 
  pull(Gene)

# Subset bulk data
cm.bulk.mat <- cm.bulk %>% 
  as.data.frame() %>% 
  filter(rownames(.) %in% markers.all) %>% 
  as.matrix() %>% 
  t()

# Now, train a final model on all data for visualization and gene selection
cm.test <- cm.bulk.mat %>% 
  .[rownames(.) %in% names(proportions), ]

lasso_cv <- cv.glmnet(cm.test, proportions, alpha = 1, nfolds = 40)
best_lambda <- lasso_cv$lambda.min
lasso_model <- glmnet(cm.test, proportions, alpha = 1, lambda = best_lambda)

# Predict on all samples using the final model
lasso.predict.all <- predict(lasso_model, newx = cm.test, s = best_lambda)

# Save model
list(lasso_cv = lasso_cv,
     lasso_model = lasso_model) %>%
  qsave("vEC_model.qs")

# cEC
## Get proportions
proportions <- cao$.__enclos_env__$private$extractCodaData(cell.groups = anno.comb, ret.groups = F) %>% 
  {100 * ./rowSums(.)} %>% 
  as.data.frame() %$% 
  setNames(cEC, rownames(.))

## Markers
markers.all <- markers.vacsular.all$cEC %>% 
  filter(Specificity > 0.9,
         AUC > 0.6) %>% 
  arrange(desc(AUC)) %>% 
  pull(Gene)

## Subset bulk data
cm.bulk.mat <- cm.bulk %>% 
  as.data.frame() %>% 
  filter(rownames(.) %in% markers.all) %>% 
  as.matrix() %>% 
  t()

# Now, train a final model on all data for visualization and gene selection
cm.test <- cm.bulk.mat %>% 
  .[rownames(.) %in% names(proportions), ]

lasso_cv <- cv.glmnet(cm.test, proportions, alpha = 1, nfolds = 40)
best_lambda <- lasso_cv$lambda.min
lasso_model <- glmnet(cm.test, proportions, alpha = 1, lambda = best_lambda)

# Predict on all samples using the final model
lasso.predict.all <- predict(lasso_model, newx = cm.test, s = best_lambda)

# Save model
list(lasso_cv = lasso_cv,
     lasso_model = lasso_model) %>%
  qsave("cEC_model.qs")

Figure 2d

# Load and prepare
tmp <- qread("cEC_model.qs")

lasso_model <- tmp$lasso_model
best_lambda <- tmp$lasso_cv$lambda.min

cm.bulk.mat <- cm.bulk %>% 
  as.data.frame() %>% 
  filter(rownames(.) %in% rownames(coef(lasso_model))) %>% 
  as.matrix() %>% 
  t()

proportions <- cao$.__enclos_env__$private$extractCodaData(cell.groups = anno.comb, ret.groups = F) %>% 
  {100 * ./rowSums(.)} %>% 
  as.data.frame() %$% 
  setNames(cEC, rownames(.))

## Predict proportions
proportions.bulk <-  predict(lasso_model, newx = cm.bulk.mat, s = best_lambda)

## Prepare plot
plot.df <- proportions.bulk %>% 
  as.data.frame() %>%
  mutate(donor = rownames(.)) %>% 
  mutate(visit = strsplit(donor, "_") %>% sget(3),
         sex = metadata.all$sex[match(donor, metadata.all$donor)]) %>% 
  mutate(sex_visit = paste(sex, visit, sep = "_"))

trend.trained <- proportions.bulk %>% 
  as.data.frame() %>%
  mutate(donor = rownames(.)) %>% 
  filter(donor %in% names(proportions)) %>%
  mutate(visit = strsplit(donor, "_") %>% sget(3),
         sex = metadata.all$sex[match(donor, metadata.all$donor)]) %>% 
  group_by(visit, sex) %>% 
  summarize(s1 = median(s1)) %>%
  mutate(sex_visit = paste(sex, visit, sep = "_"))

## `summarise()` has grouped output by 'visit'. You can override using the
## `.groups` argument.

trend.untrained <- proportions.bulk %>% 
  as.data.frame() %>%
  mutate(donor = rownames(.)) %>% 
  filter(!donor %in% names(proportions)) %>%
  mutate(visit = strsplit(donor, "_") %>% sget(3),
         sex = metadata.all$sex[match(donor, metadata.all$donor)]) %>% 
  group_by(visit, sex) %>% 
  summarize(s1 = median(s1)) %>% 
  mutate(sex_visit = paste(sex, visit, sep = "_"))

## `summarise()` has grouped output by 'visit'. You can override using the
## `.groups` argument.

plot.df %>% 
  ggplot() +
  geom_boxplot(mapping = aes(sex_visit, s1, fill = sex_visit), outliers = F) +
  geom_jitter(mapping = aes(sex_visit, s1, col = sex_visit), alpha = 0.3, col = "black", width = 0.3) +
  theme_bw() +
  labs(y = "Predicted proportions (%)", title = "Predicted vECs proportions", x = "", col = "Median trends", fill = "") +
  geom_line(mapping = aes(x = sex_visit, y = s1, group = sex, col = "Trained data"), data = trend.trained, linewidth = 1) +
  geom_line(mapping = aes(x = sex_visit, y = s1, group = sex, col = "Untrained data"), data = trend.untrained, linewidth = 1) +
  scale_color_manual(values = c("Trained data" = "black", "Untrained data" = "grey40")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  theme(line = element_blank(),
        axis.text = element_text(color = "black"),
        axis.text.x = element_blank())

Statistics

plot.df %>% 
  filter(sex == "Female") %>% 
  rstatix::kruskal_test(s1 ~ visit)

## # A tibble: 1 × 6
##   .y.       n statistic    df          p method        
## * <chr> <int>     <dbl> <int>      <dbl> <chr>         
## 1 s1       63      26.1     2 0.00000212 Kruskal-Wallis

plot.df %>% 
  filter(sex == "Female") %>% 
  rstatix::wilcox_test(s1 ~ visit)

## # A tibble: 3 × 9
##   .y.   group1 group2    n1    n2 statistic          p      p.adj p.adj.signif
## * <chr> <chr>  <chr>  <int> <int>     <dbl>      <dbl>      <dbl> <chr>       
## 1 s1    vis1   vis2      21    21       240 0.636      0.636      ns          
## 2 s1    vis1   vis3      21    21        43 0.00000129 0.00000387 ****        
## 3 s1    vis2   vis3      21    21        48 0.00000302 0.00000604 ****

plot.df %>% 
  filter(sex == "Male") %>% 
  rstatix::kruskal_test(s1 ~ visit)

## # A tibble: 1 × 6
##   .y.       n statistic    df       p method        
## * <chr> <int>     <dbl> <int>   <dbl> <chr>         
## 1 s1       30      11.9     2 0.00263 Kruskal-Wallis

plot.df %>% 
  filter(sex == "Male") %>% 
  rstatix::wilcox_test(s1 ~ visit)

## # A tibble: 3 × 9
##   .y.   group1 group2    n1    n2 statistic        p p.adj p.adj.signif
## * <chr> <chr>  <chr>  <int> <int>     <dbl>    <dbl> <dbl> <chr>       
## 1 s1    vis1   vis2      10    10        59 0.529    0.529 ns          
## 2 s1    vis1   vis3      10    10        15 0.007    0.014 *           
## 3 s1    vis2   vis3      10    10         8 0.000725 0.002 **

Figure 2e

# Load and prepare
tmp <- qread("vEC_model.qs")

lasso_model <- tmp$lasso_model
best_lambda <- tmp$lasso_cv$lambda.min

cm.bulk.mat <- cm.bulk %>% 
  as.data.frame() %>% 
  filter(rownames(.) %in% rownames(coef(lasso_model))) %>% 
  as.matrix() %>% 
  t()

proportions <- cao$.__enclos_env__$private$extractCodaData(cell.groups = anno.comb, ret.groups = F) %>% 
  {100 * ./rowSums(.)} %>% 
  as.data.frame() %$% 
  setNames(vEC, rownames(.))

## Predict proportions
proportions.bulk <-  predict(lasso_model, newx = cm.bulk.mat, s = best_lambda)

plot.df <- proportions.bulk %>% 
  as.data.frame() %>%
  mutate(donor = rownames(.)) %>% 
  mutate(visit = strsplit(donor, "_") %>% sget(3),
         sex = metadata.all$sex[match(donor, metadata.all$donor)]) %>% 
  mutate(sex_visit = paste(sex, visit, sep = "_"))

trend.trained <- proportions.bulk %>% 
  as.data.frame() %>%
  mutate(donor = rownames(.)) %>% 
  filter(donor %in% names(proportions)) %>%
  mutate(visit = strsplit(donor, "_") %>% sget(3),
         sex = metadata.all$sex[match(donor, metadata.all$donor)]) %>% 
  group_by(visit, sex) %>% 
  summarize(s1 = median(s1)) %>%
  mutate(sex_visit = paste(sex, visit, sep = "_"))

## `summarise()` has grouped output by 'visit'. You can override using the
## `.groups` argument.

trend.untrained <- proportions.bulk %>% 
  as.data.frame() %>%
  mutate(donor = rownames(.)) %>% 
  filter(!donor %in% names(proportions)) %>%
  mutate(visit = strsplit(donor, "_") %>% sget(3),
         sex = metadata.all$sex[match(donor, metadata.all$donor)]) %>% 
  group_by(visit, sex) %>% 
  summarize(s1 = median(s1)) %>% 
  mutate(sex_visit = paste(sex, visit, sep = "_"))

## `summarise()` has grouped output by 'visit'. You can override using the
## `.groups` argument.

plot.df %>% 
  ggplot() +
  geom_boxplot(mapping = aes(sex_visit, s1, fill = sex_visit), outliers = F) +
  geom_jitter(mapping = aes(sex_visit, s1, col = sex_visit), alpha = 0.3, col = "black", width = 0.3) +
  theme_bw() +
  labs(y = "Predicted proportions (%)", title = "Predicted cECs proportions", x = "", col = "Median trends", fill = "") +
  geom_line(mapping = aes(x = sex_visit, y = s1, group = sex, col = "Trained data"), data = trend.trained, linewidth = 1) +
  geom_line(mapping = aes(x = sex_visit, y = s1, group = sex, col = "Untrained data"), data = trend.untrained, linewidth = 1) +
  scale_color_manual(values = c("Trained data" = "black", "Untrained data" = "grey40")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  theme(line = element_blank(),
        axis.text = element_text(color = "black"),
        axis.text.x = element_blank())

Statistics

plot.df %>% 
  filter(sex == "Female") %>% 
  rstatix::kruskal_test(s1 ~ visit)

## # A tibble: 1 × 6
##   .y.       n statistic    df           p method        
## * <chr> <int>     <dbl> <int>       <dbl> <chr>         
## 1 s1       63      28.6     2 0.000000605 Kruskal-Wallis

plot.df %>% 
  filter(sex == "Female") %>% 
  rstatix::wilcox_test(s1 ~ visit)

## # A tibble: 3 × 9
##   .y.   group1 group2    n1    n2 statistic           p      p.adj p.adj.signif
## * <chr> <chr>  <chr>  <int> <int>     <dbl>       <dbl>      <dbl> <chr>       
## 1 s1    vis1   vis2      21    21       221 1           1          ns          
## 2 s1    vis1   vis3      21    21        40 0.000000754 0.00000151 ****        
## 3 s1    vis2   vis3      21    21        34 0.00000024  0.00000072 ****

plot.df %>% 
  filter(sex == "Male") %>% 
  rstatix::kruskal_test(s1 ~ visit)

## # A tibble: 1 × 6
##   .y.       n statistic    df       p method        
## * <chr> <int>     <dbl> <int>   <dbl> <chr>         
## 1 s1       30      16.8     2 0.00023 Kruskal-Wallis

plot.df %>% 
  filter(sex == "Male") %>% 
  rstatix::wilcox_test(s1 ~ visit)

## # A tibble: 3 × 9
##   .y.   group1 group2    n1    n2 statistic         p     p.adj p.adj.signif
## * <chr> <chr>  <chr>  <int> <int>     <dbl>     <dbl>     <dbl> <chr>       
## 1 s1    vis1   vis2      10    10        50 1         1         ns          
## 2 s1    vis1   vis3      10    10         6 0.000325  0.00065   ***         
## 3 s1    vis2   vis3      10    10         1 0.0000217 0.0000651 ****

Figure 3

Load data

con <- qread("con_myeloid.qs", nthreads = 10)
anno.immune <- qread("anno_immune.qs")

anno.myeloid <- anno.immune %>% 
  .[!. %in% c("NKT","NK","Tem","Th1","Th","Treg","B-cells")] %>% 
  factor(levels = c("ATM", "mono/mac", "Early LAM", "LAM", "cDC1", "cDC2", "mDC"))

Figure 3a

con$plotGraph(groups = anno.immune[!anno.immune == "B-cells"], 
              plot.na = FALSE, 
              size = 0.2,
              alpha = 1, 
              embedding = "largeVis", 
              show.labels = TRUE, 
              font.size = 5) + 
  labs(x = "largeVis1", y = "largeVis2") + 
  theme(line = element_blank()) +
  scale_colour_manual(values = c("#174D86", "#9EC3B5", "#356790", "#ADD0BA", "#628EA0", "#80A9AA", "#719CA5"))

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Figure 3b

cm.merged <- con$getJointCountMatrix(raw = FALSE) %>% 
  .[rownames(.) %in% names(anno.myeloid), ]

c("LYVE1", "SELENOP", # ATM
  "FCN1","VCAN", # mono/mac
  "CYP27A1", "APOE", # Early LAM
  "LPL", "TREM2", # LAM
  "CLEC9A", "CADM1", # cDC1
  "CD1C", "CLEC10A", # cDC2
  "CCR7", "LAMP3") %>% # mDC
  sccore::dotPlot(., 
                  cm.merged, 
                  anno.myeloid, 
                  gene.order = ., 
                  cols = c("white","#377EB8"))

Figure 3c

Here, we’re plotting proportions of total cells, so we use the Conos object and the annotation for all cells first.

# Use Cacoa to calculate proportions
cao <- Cacoa$new(data.object = con.major, 
                 cell.groups = anno.major, 
                 ref.level = "vis2", 
                 target.level = "vis3", 
                 sample.groups = con.major$samples %>% 
                   names() %>% 
                   strsplit("_") %>% 
                   sapply('[[', 3) %>% 
                   gsub(1, 2, .) %>% 
                   `names<-`(con.major$samples %>% 
                               names()),
                 sample.groups.palette = ggsci::pal_jama()(7))

cao$sample.groups <- con.major$samples %>% 
  names() %>% 
  strsplit("_") %>% 
  sapply('[[', 3) %>% 
  `names<-`(con.major$samples %>% 
              names()) %>% 
  gsub("vis", "Visit ", .)

anno.comb <- anno.major[!names(anno.major) %in% names(anno.myeloid)] %>% 
  {factor(c(., anno.myeloid))}

p <- cao$plotCellGroupSizes(cell.groups = anno.comb,
                            show.significance = FALSE, 
                            filter.empty.cell.types = FALSE)

plot.dat <- p$data %>% 
  filter(variable %in% levels(anno.myeloid)) %>% 
  mutate(sex = rep(meta$sex, anno.myeloid %>% levels() %>% length()),
         variable = factor(variable)) %>% 
  mutate(sex_visit = paste(sex, group, sep = " ") %>% gsub("Vis", "vis", .),
         facet = ifelse(variable != "ATM", "low", "high"),
         variable = factor(variable, levels = c("ATM", "mono/mac", "Early LAM", "LAM", "cDC1", "cDC2", "mDC")) %>% 
           renameAnnotation("ATM", "         ATM"))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$variable)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$group)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(ct = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(ct = factor(ct, levels = unique(plot.dat$variable))) %>% 
  arrange(ct) %>% 
  mutate(sig = gsub(".", "", sig, fixed = TRUE),
         facet = ifelse(ct != "         ATM", "low", "high"))

# Wilcoxon
stat.test <- plot.dat %>%
  dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
  group_by(var, sex) %>%
  wilcox_test(value~sex_visit, paired = TRUE) %>%
  mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>% 
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "var", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05) %>% # Adjust line position
  arrange(desc(y.position)) %>% 
  mutate(facet = ifelse(var != "         ATM", "low", "high")) 

# Plot
p.high <- ggplot(plot.dat %>% filter(facet == "high"), aes(variable, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "% cells per sample") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 90, vjust = 0, hjust = 1)) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat %>% filter(facet == "high"), aes(ct, y = 50, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test %>% filter(facet == "high"), label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) + # Wilcoxon
  guides(fill = "none")

x.cor = 1

p.low <- ggplot(plot.dat %>% filter(facet == "low"), aes(variable, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "", fill = "") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 90, vjust = 0, hjust = 1)) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = FALSE, data = sex.stat %>% filter(facet == "low"), aes(ct, y = 10, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test %>% filter(facet == "low") %>% mutate(x = x-x.cor, xmin = xmin-x.cor, xmax = xmax-x.cor), label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 1, hide.ns = FALSE) # Wilcoxon

plot_grid(plotlist = list(p.high, p.low), rel_widths = c(1,4.5))

Figures 3d,e

Preparations

# Use Cacoa to calculate proportions
cao <- Cacoa$new(data.object = con.major, 
                 cell.groups = anno.major, 
                 ref.level = "vis2", 
                 target.level = "vis3", 
                 sample.groups = con.major$samples %>% 
                   names() %>% 
                   strsplit("_") %>% 
                   sapply('[[', 3) %>% 
                   gsub(1, 2, .) %>% 
                   `names<-`(con.major$samples %>% 
                               names()),
                 sample.groups.palette = ggsci::pal_jama()(7))

cao$sample.groups <- con.major$samples %>% 
  names() %>% 
  strsplit("_") %>% 
  sapply('[[', 3) %>% 
  `names<-`(con.major$samples %>% 
              names()) %>% 
  gsub("vis", "Visit ", .)

anno.comb <- anno.major[!names(anno.major) %in% names(anno.immune)] %>% 
  {factor(c(., anno.immune))}

Here, we’re showing the preparations to create models for prediction. This takes some time and is not run again.

# Get markers
markers.immune.all <- con.major$getDifferentialGenes(groups = anno.comb, z.threshold = 1, upregulated.only = T, verbose = T)

# vEC
# Get proportions
proportions <- cao$.__enclos_env__$private$extractCodaData(cell.groups = anno.comb, ret.groups = F) %>% 
  {100 * ./rowSums(.)} %>% 
  as.data.frame() %$% 
  setNames(`Early LAM`, rownames(.))

# Extract markers. Please note, the marker metrics may change a little in each iteration
markers.all <- markers.immune.all$`Early LAM` %>% 
  filter(Specificity > 0.9,
         AUC > 0.65) %>% 
  arrange(desc(AUC)) %>% 
  pull(Gene)

# Subset bulk data
cm.bulk.mat <- cm.bulk %>% 
  as.data.frame() %>% 
  filter(rownames(.) %in% markers.all) %>% 
  as.matrix() %>% 
  t()

# Now, train a final model on all data for visualization and gene selection
cm.test <- cm.bulk.mat %>% 
  .[rownames(.) %in% names(proportions), ]

lasso_cv <- cv.glmnet(cm.test, proportions, alpha = 1, nfolds = 5)
best_lambda <- lasso_cv$lambda.min
lasso_model <- glmnet(cm.test, proportions, alpha = 1, lambda = best_lambda)

# Predict on all samples using the final model
lasso.predict.all <- predict(lasso_model, newx = cm.test, s = best_lambda)

# Save model
list(lasso_cv = lasso_cv,
     lasso_model = lasso_model) %>%
  qsave("eLAM_model.qs")

# cEC
## Get proportions
proportions <- cao$.__enclos_env__$private$extractCodaData(cell.groups = anno.comb, ret.groups = F) %>% 
  {100 * ./rowSums(.)} %>% 
  as.data.frame() %$% 
  setNames(LAM, rownames(.))

## Markers
markers.all <- markers.immune.all$LAM %>% 
  filter(Specificity > 0.9,
         AUC > 0.7) %>% 
  arrange(desc(AUC)) %>% 
  pull(Gene)

## Subset bulk data
cm.bulk.mat <- cm.bulk %>% 
  as.data.frame() %>% 
  filter(rownames(.) %in% markers.all) %>% 
  as.matrix() %>% 
  t()

# Now, train a final model on all data for visualization and gene selection
cm.test <- cm.bulk.mat %>% 
  .[rownames(.) %in% names(proportions), ]

lasso_cv <- cv.glmnet(cm.test, proportions, alpha = 1, nfolds = 15)
best_lambda <- lasso_cv$lambda.min
lasso_model <- glmnet(cm.test, proportions, alpha = 1, lambda = best_lambda)

# Predict on all samples using the final model
lasso.predict.all <- predict(lasso_model, newx = cm.test, s = best_lambda)

# Save model
list(lasso_cv = lasso_cv,
     lasso_model = lasso_model) %>%
  qsave("LAM_model.qs")

Figure 3d

# Load and prepare
tmp <- qread("eLAM_model.qs")

lasso_model <- tmp$lasso_model
best_lambda <- tmp$lasso_cv$lambda.min

cm.bulk.mat <- cm.bulk %>% 
  as.data.frame() %>% 
  filter(rownames(.) %in% rownames(coef(lasso_model))) %>% 
  as.matrix() %>% 
  t()

proportions <- cao$.__enclos_env__$private$extractCodaData(cell.groups = anno.comb, ret.groups = F) %>% 
  {100 * ./rowSums(.)} %>% 
  as.data.frame() %$% 
  setNames(`Early LAM`, rownames(.))

## Predict proportions
proportions.bulk <-  predict(lasso_model, newx = cm.bulk.mat, s = best_lambda)

## Prepare plot
plot.df <- proportions.bulk %>% 
  as.data.frame() %>%
  mutate(donor = rownames(.)) %>% 
  mutate(visit = strsplit(donor, "_") %>% sget(3),
         sex = metadata.all$sex[match(donor, metadata.all$donor)]) %>% 
  mutate(sex_visit = paste(sex, visit, sep = "_"))

trend.trained <- proportions.bulk %>% 
  as.data.frame() %>%
  mutate(donor = rownames(.)) %>% 
  filter(donor %in% names(proportions)) %>%
  mutate(visit = strsplit(donor, "_") %>% sget(3),
         sex = metadata.all$sex[match(donor, metadata.all$donor)]) %>% 
  group_by(visit, sex) %>% 
  summarize(s1 = median(s1)) %>%
  mutate(sex_visit = paste(sex, visit, sep = "_"))

## `summarise()` has grouped output by 'visit'. You can override using the
## `.groups` argument.

trend.untrained <- proportions.bulk %>% 
  as.data.frame() %>%
  mutate(donor = rownames(.)) %>% 
  filter(!donor %in% names(proportions)) %>%
  mutate(visit = strsplit(donor, "_") %>% sget(3),
         sex = metadata.all$sex[match(donor, metadata.all$donor)]) %>% 
  group_by(visit, sex) %>% 
  summarize(s1 = median(s1)) %>% 
  mutate(sex_visit = paste(sex, visit, sep = "_"))

## `summarise()` has grouped output by 'visit'. You can override using the
## `.groups` argument.

plot.df %>% 
  ggplot() +
  geom_boxplot(mapping = aes(sex_visit, s1, fill = sex_visit), outliers = F) +
  geom_jitter(mapping = aes(sex_visit, s1, col = sex_visit), alpha = 0.3, col = "black", width = 0.3) +
  theme_bw() +
  labs(y = "Predicted proportions (%)", title = "Predicted eLAM proportions", x = "", col = "Median trends", fill = "") +
  geom_line(mapping = aes(x = sex_visit, y = s1, group = sex, col = "Trained data"), data = trend.trained, linewidth = 1) +
  geom_line(mapping = aes(x = sex_visit, y = s1, group = sex, col = "Untrained data"), data = trend.untrained, linewidth = 1) +
  scale_color_manual(values = c("Trained data" = "black", "Untrained data" = "grey40")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  theme(line = element_blank(),
        axis.text = element_text(color = "black"),
        axis.text.x = element_blank())

Statistics

plot.df %>% 
  filter(sex == "Female") %>% 
  rstatix::kruskal_test(s1 ~ visit)

## # A tibble: 1 × 6
##   .y.       n statistic    df         p method        
## * <chr> <int>     <dbl> <int>     <dbl> <chr>         
## 1 s1       63      21.0     2 0.0000276 Kruskal-Wallis

plot.df %>% 
  filter(sex == "Female") %>% 
  rstatix::wilcox_test(s1 ~ visit)

## # A tibble: 3 × 9
##   .y.   group1 group2    n1    n2 statistic         p     p.adj p.adj.signif
## * <chr> <chr>  <chr>  <int> <int>     <dbl>     <dbl>     <dbl> <chr>       
## 1 s1    vis1   vis2      21    21       230 0.823     0.823     ns          
## 2 s1    vis1   vis3      21    21       379 0.0000251 0.0000753 ****        
## 3 s1    vis2   vis3      21    21       376 0.0000379 0.0000758 ****

plot.df %>% 
  filter(sex == "Male") %>% 
  rstatix::kruskal_test(s1 ~ visit)

## # A tibble: 1 × 6
##   .y.       n statistic    df         p method        
## * <chr> <int>     <dbl> <int>     <dbl> <chr>         
## 1 s1       30      18.4     2 0.0000988 Kruskal-Wallis

plot.df %>% 
  filter(sex == "Male") %>% 
  rstatix::wilcox_test(s1 ~ visit)

## # A tibble: 3 × 9
##   .y.   group1 group2    n1    n2 statistic         p     p.adj p.adj.signif
## * <chr> <chr>  <chr>  <int> <int>     <dbl>     <dbl>     <dbl> <chr>       
## 1 s1    vis1   vis2      10    10        59 0.529     0.529     ns          
## 2 s1    vis1   vis3      10    10        99 0.0000217 0.0000651 ****        
## 3 s1    vis2   vis3      10    10        98 0.0000433 0.0000866 ****

Figure 3e

# Load and prepare
tmp <- qread("LAM_model.qs")

lasso_model <- tmp$lasso_model
best_lambda <- tmp$lasso_cv$lambda.min

cm.bulk.mat <- cm.bulk %>% 
  as.data.frame() %>% 
  filter(rownames(.) %in% rownames(coef(lasso_model))) %>% 
  as.matrix() %>% 
  t()

proportions <- cao$.__enclos_env__$private$extractCodaData(cell.groups = anno.comb, ret.groups = F) %>% 
  {100 * ./rowSums(.)} %>% 
  as.data.frame() %$% 
  setNames(LAM, rownames(.))

## Predict proportions
proportions.bulk <-  predict(lasso_model, newx = cm.bulk.mat, s = best_lambda)

plot.df <- proportions.bulk %>% 
  as.data.frame() %>%
  mutate(donor = rownames(.)) %>% 
  mutate(visit = strsplit(donor, "_") %>% sget(3),
         sex = metadata.all$sex[match(donor, metadata.all$donor)]) %>% 
  mutate(sex_visit = paste(sex, visit, sep = "_"))

trend.trained <- proportions.bulk %>% 
  as.data.frame() %>%
  mutate(donor = rownames(.)) %>% 
  filter(donor %in% names(proportions)) %>%
  mutate(visit = strsplit(donor, "_") %>% sget(3),
         sex = metadata.all$sex[match(donor, metadata.all$donor)]) %>% 
  group_by(visit, sex) %>% 
  summarize(s1 = median(s1)) %>%
  mutate(sex_visit = paste(sex, visit, sep = "_"))

## `summarise()` has grouped output by 'visit'. You can override using the
## `.groups` argument.

trend.untrained <- proportions.bulk %>% 
  as.data.frame() %>%
  mutate(donor = rownames(.)) %>% 
  filter(!donor %in% names(proportions)) %>%
  mutate(visit = strsplit(donor, "_") %>% sget(3),
         sex = metadata.all$sex[match(donor, metadata.all$donor)]) %>% 
  group_by(visit, sex) %>% 
  summarize(s1 = median(s1)) %>% 
  mutate(sex_visit = paste(sex, visit, sep = "_"))

## `summarise()` has grouped output by 'visit'. You can override using the
## `.groups` argument.

plot.df %>% 
  ggplot() +
  geom_boxplot(mapping = aes(sex_visit, s1, fill = sex_visit), outliers = F) +
  geom_jitter(mapping = aes(sex_visit, s1, col = sex_visit), alpha = 0.3, col = "black", width = 0.3) +
  theme_bw() +
  labs(y = "Predicted proportions (%)", title = "Predicted LAM  proportions", x = "", col = "Median trends", fill = "") +
  geom_line(mapping = aes(x = sex_visit, y = s1, group = sex, col = "Trained data"), data = trend.trained, linewidth = 1) +
  geom_line(mapping = aes(x = sex_visit, y = s1, group = sex, col = "Untrained data"), data = trend.untrained, linewidth = 1) +
  scale_color_manual(values = c("Trained data" = "black", "Untrained data" = "grey40")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  theme(line = element_blank(),
        axis.text = element_text(color = "black"),
        axis.text.x = element_blank())

Statistics

plot.df %>% 
  filter(sex == "Female") %>% 
  rstatix::kruskal_test(s1 ~ visit)

## # A tibble: 1 × 6
##   .y.       n statistic    df         p method        
## * <chr> <int>     <dbl> <int>     <dbl> <chr>         
## 1 s1       63      20.0     2 0.0000445 Kruskal-Wallis

plot.df %>% 
  filter(sex == "Female") %>% 
  rstatix::wilcox_test(s1 ~ visit)

## # A tibble: 3 × 9
##   .y.   group1 group2    n1    n2 statistic         p     p.adj p.adj.signif
## * <chr> <chr>  <chr>  <int> <int>     <dbl>     <dbl>     <dbl> <chr>       
## 1 s1    vis1   vis2      21    21       196 0.55      0.55      ns          
## 2 s1    vis1   vis3      21    21       367 0.00012   0.00024   ***         
## 3 s1    vis2   vis3      21    21       379 0.0000251 0.0000753 ****

plot.df %>% 
  filter(sex == "Male") %>% 
  rstatix::kruskal_test(s1 ~ visit)

## # A tibble: 1 × 6
##   .y.       n statistic    df       p method        
## * <chr> <int>     <dbl> <int>   <dbl> <chr>         
## 1 s1       30      17.5     2 0.00016 Kruskal-Wallis

plot.df %>% 
  filter(sex == "Male") %>% 
  rstatix::wilcox_test(s1 ~ visit)

## # A tibble: 3 × 9
##   .y.   group1 group2    n1    n2 statistic         p     p.adj p.adj.signif
## * <chr> <chr>  <chr>  <int> <int>     <dbl>     <dbl>     <dbl> <chr>       
## 1 s1    vis1   vis2      10    10        54 0.796     0.796     ns          
## 2 s1    vis1   vis3      10    10        96 0.00013   0.00026   ***         
## 3 s1    vis2   vis3      10    10        99 0.0000217 0.0000651 ****

Figure 3f

Preparations

cm.merged <- con$getJointCountMatrix(raw = TRUE) %>% 
  Matrix::t() %>% 
  .[, colnames(.) %in% names(anno.myeloid)]

# Calculate DEGs using Cacoa
con.vis13 <- con$samples %>% 
  .[!grepl("vis2", names(.))] %>% 
  Conos$new()

sample.groups <- con.vis13$samples %>% 
  names() %>% 
  `names<-`(ifelse(grepl("vis1", .), "visit1", "visit3"), .)

cao <- Cacoa$new(con.vis13, 
                 sample.groups, 
                 anno.myeloid, 
                 ref.level = "visit1", 
                 target.level = "visit3", 
                 n.cores = 10)

cao$estimateDEPerCellType(min.cell.frac = 0.1, 
                          min.cell.count = 5)

## Preparing matrices for DE

## Estimating DE per cell type

## DEs not calculated for 2 cell group(s): cDC1, mDC

de <- cao$test.results$de %>%
  lapply('[[', "res")

# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[colnames(cm.merged)]
anno.subtype <- anno.myeloid %>% 
  .[!is.na(.)]

idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())

anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>% 
  {`names<-`(as.character(.), names(.))}

anno.final <- paste0(anno.donor,"!!",anno.subtype) %>% 
  `names<-`(anno.donor %>% names()) %>% 
  .[grepl("ATM|Early LAM|LAM|mono/mac", .)]

# Create pseudo CM
cm.pseudo <- sccore::collapseCellsByType(cm.merged %>% Matrix::t(), 
                                         groups = anno.final, min.cell.count = 1) %>% 
  t() %>%
  apply(2, as, "integer")

cm.pseudo %<>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("_|!!") %>% 
                                   sget(3) %>% 
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(cm.pseudo), "group")), 
                                 design = ~ group) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = T)

## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors

# Initially, we used this approach to select genes. Since the clusters are not exactly reproducible, here we load the gene lists

# genes <- de[c("ATM","Early LAM","LAM","mono/mac")] %>%
#   {`names<-`(lapply(names(.), \(nn) mutate(.[[nn]], ct = nn)), names(.))} %>% 
#   lapply(filter, padj <= 0.05, abs(log2FoldChange) > 1) %>%
#   lapply(pull, Gene) %>% 
#   {sapply(names(.), \(nn) .[[nn]] %>% `names<-`(., rep(nn, length(.))))} %>% 
#   Reduce(c, .) %>% 
#   .[match(unique(.), .)]

genes <- read.delim("Macrophages_DEG_heatmap_genes.tsv") %$% 
  setNames(gene, cluster) %>%
  .[order(names(.))]

idx <- cm.pseudo %>% 
  colnames() %>% 
  data.frame(id = .) %>% 
  mutate(vis = strsplit(id, "_|!!") %>% sget(3),
         ct = strsplit(id, "!!") %>% sget(2)) %>% 
  mutate(ord = order(vis, ct))

x <- cm.pseudo %>% 
  .[match(genes, rownames(.)), match(colnames(na.omit(.)), colnames(.))] %>%
  .[, idx$ord] %>% 
  Matrix::t() %>%
  scale() %>%
  Matrix::t()

# Ordering
spc <- con$getDatasetPerCell()

sepc <- meta$sex[match(spc, meta$sample)] %>% 
  setNames(names(spc))

sex.df <- sepc %>% 
  {data.frame(nn = names(.), sex = unname(.))} %>% 
  mutate(nn = strsplit(nn, "!!") %>% sget(1)) %>% 
  .[match(unique(.$nn), .$nn), ] %>% 
  filter(!grepl("pool", nn))

ord <- data.frame(Visit = colnames(x) %>%
                    setNames(colnames(x)) %>% 
                    strsplit("_|!!|vis") %>%
                    sget(4)) %>% 
  mutate(., 
         donor = rownames(.) %>% strsplit("_vis") %>% sget(1)) %>% 
  mutate(., 
         Sex = sex.df$sex[match(.$donor, sex.df$nn)],
         ct = rownames(.) %>% strsplit("!!") %>% sget(2)) %>% 
  arrange(ct, Visit, Sex) %>% 
  rownames()

x <- x[, ord]

groups <- colnames(x) %>%
  `names<-`(colnames(x))

# Limit scale
x[x > 2] <- 2
x[x < -1.5] <- -1.5

Please note, the order of the clusters is not always reproducible.

# Create top annotation
tannot <- data.frame(Visit = groups[colnames(x)] %>%
                       strsplit("_|!!|vis") %>%
                       sget(4)) %>% 
  mutate(., donor = rownames(.) %>% strsplit("_vis") %>% sget(1)) %>% 
  mutate(., Sex = sex.df$sex[match(.$donor, sex.df$nn)]) %>% 
  select(-donor) %>% 
  {HeatmapAnnotation(df=.,
                     border=T,
                     col=list(Visit = ggsci::pal_jama()(7)[c(1:3)] %>% `names<-`(c(1:3)),
                              Sex = c("darkred","blue3") %>% setNames(c("Female","Male"))),
                     show_legend=T)}

# Create color palette
pal <- colorRampPalette(c('navy','grey95','firebrick'))(1024)
labeled.gene.subset <- NULL

# Plot
set.seed(1337)

ha <- ComplexHeatmap::Heatmap(x, 
                              name='Expression', 
                              col=pal, 
                              cluster_columns=FALSE, 
                              show_row_names=F, 
                              show_column_names=FALSE, 
                              top_annotation=tannot,
                              left_annotation=NULL,
                              border=TRUE,
                              show_column_dend = FALSE, 
                              show_row_dend = FALSE,
                              cluster_rows = F,
                              # row_km = 5, # Only used initially
                              # row_km_repeats = 500, # Only used initially
                              row_split = names(genes),
                              column_split = groups[colnames(x)] %>% strsplit("!!") %>% sapply('[[', 2) %>% unname() %>% factor(levels = c("ATM", "mono/mac", "Early LAM", "LAM")))

ht = draw(ha); row_order(ht) -> cls

Calculate GO enrichment

go <- cls %>% 
  lapply(\(y) clusterProfiler::enrichGO(rownames(x)[y],
                                        OrgDb = "org.Hs.eg.db", 
                                        keyType = "SYMBOL", 
                                        ont = "BP", 
                                        universe = rownames(cm.merged))) %>% 
  setNames(names(cls))

##

##

Save order for later

cls %>% 
  lapply(\(y) rownames(x)[y]) %>% 
  lapply(data.frame) %>% 
  lapply(setNames, "gene") %>% 
  bind_rows(.id = "cluster") %>%
  select(gene, cluster) %>% 
  write.table("Macrophages_DEG_heatmap_genes.tsv", sep = "\t")

Figures 3g,h

Preparations

cts <- c("Early LAM", "LAM", "mono/mac", "ATM")
cm.merged <- con$getJointCountMatrix(raw = TRUE)

# Get markers
markers <- con$getDifferentialGenes(groups = anno.myeloid %>% .[. %in% cts], 
                                    z.threshold = 1, 
                                    upregulated.only = TRUE, 
                                    append.specificity.metrics = TRUE, 
                                    append.auc = TRUE)

## Estimating marker genes per sample

## Aggregating marker genes

## Estimating specificity metrics

## All done!

genes <- markers$LAM %>% 
  filter(AUC > 0.8) %>% 
  pull(Gene)

# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[rownames(cm.merged)]
anno.subtype <- anno.myeloid[anno.myeloid %in% cts] %>% 
  .[!is.na(.)] %>% 
  factor()

idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())

anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>% 
  {`names<-`(as.character(.), names(.))}

anno.final <- paste0(anno.donor,"!!",anno.subtype) %>% 
  `names<-`(anno.donor %>% names())

cm.pseudo.tmp <- sccore::collapseCellsByType(cm.merged,
                                             groups = anno.final, min.cell.count = 1) %>% 
  t() %>%
  apply(2, as, "integer")

cm.pseudo <- cm.pseudo.tmp %>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("_|!!") %>% 
                                   sget(3) %>% 
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(cm.pseudo.tmp), "group")), 
                                 design = ~ group) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = TRUE) %>% 
  t()

## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors

Figure 3g

plot.dat <- cm.pseudo %>% 
  .[, colnames(.) %in% c("LPL")] %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column(var = "sample") %>% 
  mutate(visit = strsplit(sample, "!!|_") %>% 
           sget(3) %>% 
           factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
         anno = strsplit(sample, "!!") %>% 
           sget(2) %>% 
           factor(levels = c("ATM", "mono/mac", "Early LAM", "LAM")),
         donor = strsplit(sample, "!!") %>% 
           sget(1)) %>% 
  reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
  mutate(sex = meta$sex[match(donor, meta$sample)]) %>% 
  mutate(sex_visit = paste(sex, visit, sep = " "))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$anno)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>% 
  arrange(anno) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
  group_by(anno, sex) %>%
  wilcox_test(value~sex_visit, paired = F) %>%
  mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>% 
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)

plot.dat %>% 
  ggplot(aes(anno, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Normalized pseudobulk expression", title = "LPL", fill = "") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 500, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) # Wilcoxon

Figure 3h

anno <- anno.myeloid %>% 
  .[. %in% cts] %>% 
  factor()

cm.clean <- cm.merged %>% 
  .[rownames(.) %in% names(anno), ]

spc <- con$getDatasetPerCell()

lam.scores <- list(LAM.activity = genes)
seu_obj <- CreateSeuratObject(counts = cm.clean %>% Matrix::t() %>% drop0(),
                              min.cells = 0, 
                              min.features = 0)
m.scores <- UCell::AddModuleScore_UCell(seu_obj, features = lam.scores, ncores = 32)

plot.df <- reshape2::melt(m.scores$LAM.activity_UCell) %>% 
  mutate(anno = anno[rownames(.)] %>% factor(levels = c("ATM", "mono/mac", "Early LAM", "LAM")), 
         visit = spc[rownames(.)] %>% as.character() %>% strsplit("_") %>% sget(3),
         donor = spc[rownames(.)] %>% as.character() %>% strsplit("_") %>% sapply(\(x) paste(x[1], x[2], sep = "_")),
         donor_vis = spc[rownames(.)]) %>% 
  mutate(sex = meta$sex[match(donor_vis, meta$sample)],
         donor_anno = paste(donor, anno, sep = "-")) %>%
  group_by(sex, visit, donor_anno, donor, anno) %>%
  summarize(value = mean(value)) %>% 
  ungroup() %>% 
  filter(visit != "vis2") %$% 
  split(., visit) %>% 
  {lapply(., filter, donor_anno %in% .[[2]]$donor_anno)} %>% 
  {rbind(data.frame(.[[2]], diff = .[[2]]$value - .[[1]]$value))}

## `summarise()` has grouped output by 'sex', 'visit', 'donor_anno', 'donor'. You
## can override using the `.groups` argument.

plot.df %>% 
  ggplot(aes(anno, diff, fill = sex)) +
  geom_boxplot() +
  geom_point(aes(col = sex), alpha = 0.3, position = position_dodge(width = 0.8), col = "black") +
  geom_hline(yintercept = 0, color = "black") +
  theme_bw() +
  labs(x = "", y = "Mean module score difference (V3 - V1)", title = "LAM signature post-surgery weight loss", fill = "", col = "") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c("darkred", "blue3"))

Figure 4

Load data

con <- qread("con_aspc.qs", nthreads = 10)
anno.aspc <- qread("anno_aspc.qs")

Figure 4a

con$plotGraph(groups = anno.aspc, 
              plot.na = FALSE, 
              size = 0.2,
              alpha = 1, 
              embedding = "largeVis",
              show.labels = TRUE, 
              font.size = 5) + 
  labs(x = "largeVis1", y = "largeVis2") + 
  theme(line = element_blank()) +
  scale_colour_manual(values = RColorBrewer::brewer.pal(5, "Greens")[-1][c(2,1,3,4)])

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Figure 4b

cm.merged <- con$getJointCountMatrix()

c("DPP4", "SEMA3C", "FBN1", # DPP4
  "CXCL14", "CFD", "C3", # CXCL14
  "PPARG", "ACACB", "SEMA3A", # PPARG
  "EPHA3", "MEOX2", "IGFBP7" # EPHA3
) %>% 
  sccore::dotPlot(., 
                  cm.merged, 
                  anno.aspc %>% factor(levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")), 
                  gene.order = ., 
                  cols = c("white","green4"))

Figure 4c

Here, we’re plotting proportions of total cells, so we use the Conos object and the annotation for all cells.

# Use Cacoa to calculate proportions
cao <- Cacoa$new(data.object = con.major, 
                 cell.groups = anno.major, 
                 ref.level = "vis2", 
                 target.level = "vis3", 
                 sample.groups = con.major$samples %>% 
                   names() %>% 
                   strsplit("_") %>% 
                   sapply('[[', 3) %>% 
                   gsub(1, 2, .) %>% 
                   `names<-`(con.major$samples %>% 
                               names()),
                 sample.groups.palette = ggsci::pal_jama()(7))

cao$sample.groups <- con.major$samples %>% 
  names() %>% 
  strsplit("_") %>% 
  sapply('[[', 3) %>% 
  `names<-`(con.major$samples %>% 
              names()) %>% 
  gsub("vis", "Visit ", .)

anno.comb <- anno.major[!names(anno.major) %in% names(anno.aspc)] %>% 
  {factor(c(., anno.aspc))}

p <- cao$plotCellGroupSizes(cell.groups = anno.comb,
                            show.significance = FALSE, 
                            filter.empty.cell.types = FALSE)

plot.dat <- p$data %>% 
  filter(variable %in% levels(anno.aspc)) %>% 
  mutate(sex = rep(meta$sex, anno.aspc %>% levels() %>% length()),
         variable = factor(variable)) %>% 
  mutate(sex_visit = paste(sex, group, sep = " ") %>% gsub("Vis", "vis", .),
         variable = factor(variable, levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$variable)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$group)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(ct = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(ct = factor(ct, levels = unique(plot.dat$variable))) %>% 
  arrange(ct) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
  group_by(var, sex) %>%
  wilcox_test(value~sex_visit, paired = T)

stat.test[1, "p.adj.signif"] <- "."
stat.test[15, "p.adj.signif"] <- "."

stat.test %<>% 
  filter(p.adj.signif != "ns", !is.na(p), !var %in% c("ASPC_CXCL14")) %>% # Remove those not significant for KW
  add_xy_position(x = "var", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05) %>% # Adjust line position
  arrange(desc(y.position))

# Plot
ggplot(plot.dat, aes(variable, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1), 
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "% cells per sample", fill = "") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 90, vjust = 0, hjust = 1)) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(ct, y = 20, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 2, hide.ns = F) # Wilcoxon

Figure 4d

Preparations

# Use Cacoa to calculate proportions
cao <- Cacoa$new(data.object = con.major, 
                 cell.groups = anno.major, 
                 ref.level = "vis2", 
                 target.level = "vis3", 
                 sample.groups = con.major$samples %>% 
                   names() %>% 
                   strsplit("_") %>% 
                   sapply('[[', 3) %>% 
                   gsub(1, 2, .) %>% 
                   `names<-`(con.major$samples %>% 
                               names()),
                 sample.groups.palette = ggsci::pal_jama()(7))

cao$sample.groups <- con.major$samples %>% 
  names() %>% 
  strsplit("_") %>% 
  sapply('[[', 3) %>% 
  `names<-`(con.major$samples %>% 
              names()) %>% 
  gsub("vis", "Visit ", .)

anno.comb <- anno.major[!names(anno.major) %in% names(anno.aspc)] %>% 
  {factor(c(., anno.aspc))}

Here, we’re showing the preparations to create models for prediction. This takes some time and is not run again.

# Get markers
markers.aspc.all <- con.major$getDifferentialGenes(groups = anno.comb, z.threshold = 1, upregulated.only = T, verbose = T)

# Get proportions
proportions <- cao$.__enclos_env__$private$extractCodaData(cell.groups = anno.comb, ret.groups = F) %>% 
  {100 * ./rowSums(.)} %>% 
  as.data.frame() %$% 
  setNames(`Early LAM`, rownames(.))

# Extract markers. Please note, the marker metrics may change a little in each iteration
markers.all <- markers.aspc.all$ASPC_DPP4 %>% 
  filter(PAdj < 0.05,
         Specificity > 0.7) %>% 
  arrange(desc(AUC)) %>% 
  pull(Gene)

# Subset bulk data
cm.bulk.mat <- cm.bulk %>% 
  as.data.frame() %>% 
  filter(rownames(.) %in% markers.all) %>% 
  as.matrix() %>% 
  t()

# Now, train a final model on all data for visualization and gene selection
cm.test <- cm.bulk.mat %>% 
  .[rownames(.) %in% names(proportions), ]

lasso_cv <- cv.glmnet(cm.test, proportions, alpha = 1, nfolds = 5)
best_lambda <- lasso_cv$lambda.min
lasso_model <- glmnet(cm.test, proportions, alpha = 1, lambda = best_lambda)

# Predict on all samples using the final model
lasso.predict.all <- predict(lasso_model, newx = cm.test, s = best_lambda)

# Save model
list(lasso_cv = lasso_cv,
     lasso_model = lasso_model) %>%
  qsave("DPP4_model.qs")

# Load and prepare
tmp <- qread("DPP4_model.qs")

lasso_model <- tmp$lasso_model
best_lambda <- tmp$lasso_cv$lambda.min

cm.bulk.mat <- cm.bulk %>% 
  as.data.frame() %>% 
  filter(rownames(.) %in% rownames(coef(lasso_model))) %>% 
  as.matrix() %>% 
  t()

proportions <- cao$.__enclos_env__$private$extractCodaData(cell.groups = anno.comb, ret.groups = F) %>% 
  {100 * ./rowSums(.)} %>% 
  as.data.frame() %$% 
  setNames(ASPC_DPP4, rownames(.))

## Predict proportions
proportions.bulk <-  predict(lasso_model, newx = cm.bulk.mat, s = best_lambda)

## Prepare plot
plot.df <- proportions.bulk %>% 
  as.data.frame() %>%
  mutate(donor = rownames(.)) %>% 
  mutate(visit = strsplit(donor, "_") %>% sget(3),
         sex = metadata.all$sex[match(donor, metadata.all$donor)]) %>% 
  mutate(sex_visit = paste(sex, visit, sep = "_"))

trend.trained <- proportions.bulk %>% 
  as.data.frame() %>%
  mutate(donor = rownames(.)) %>% 
  filter(donor %in% names(proportions)) %>%
  mutate(visit = strsplit(donor, "_") %>% sget(3),
         sex = metadata.all$sex[match(donor, metadata.all$donor)]) %>% 
  group_by(visit, sex) %>% 
  summarize(s1 = median(s1)) %>%
  mutate(sex_visit = paste(sex, visit, sep = "_"))

## `summarise()` has grouped output by 'visit'. You can override using the
## `.groups` argument.

trend.untrained <- proportions.bulk %>% 
  as.data.frame() %>%
  mutate(donor = rownames(.)) %>% 
  filter(!donor %in% names(proportions)) %>%
  mutate(visit = strsplit(donor, "_") %>% sget(3),
         sex = metadata.all$sex[match(donor, metadata.all$donor)]) %>% 
  group_by(visit, sex) %>% 
  summarize(s1 = median(s1)) %>% 
  mutate(sex_visit = paste(sex, visit, sep = "_"))

## `summarise()` has grouped output by 'visit'. You can override using the
## `.groups` argument.

plot.df %>% 
  ggplot() +
  geom_boxplot(mapping = aes(sex_visit, s1, fill = sex_visit), outliers = F) +
  geom_jitter(mapping = aes(sex_visit, s1, col = sex_visit), alpha = 0.3, col = "black", width = 0.3) +
  theme_bw() +
  labs(y = "Predicted proportions (%)", title = "Predicted ASPC_DPP4 proportions", x = "", col = "Median trends", fill = "") +
  geom_line(mapping = aes(x = sex_visit, y = s1, group = sex, col = "Trained data"), data = trend.trained, linewidth = 1) +
  geom_line(mapping = aes(x = sex_visit, y = s1, group = sex, col = "Untrained data"), data = trend.untrained, linewidth = 1) +
  scale_color_manual(values = c("Trained data" = "black", "Untrained data" = "grey40")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  theme(line = element_blank(),
        axis.text = element_text(color = "black"),
        axis.text.x = element_blank())

Statistics

plot.df %>% 
  filter(sex == "Female") %>% 
  rstatix::kruskal_test(s1 ~ visit)

## # A tibble: 1 × 6
##   .y.       n statistic    df        p method        
## * <chr> <int>     <dbl> <int>    <dbl> <chr>         
## 1 s1       63      16.6     2 0.000245 Kruskal-Wallis

plot.df %>% 
  filter(sex == "Female") %>% 
  rstatix::wilcox_test(s1 ~ visit)

## # A tibble: 3 × 9
##   .y.   group1 group2    n1    n2 statistic        p    p.adj p.adj.signif
## * <chr> <chr>  <chr>  <int> <int>     <dbl>    <dbl>    <dbl> <chr>       
## 1 s1    vis1   vis2      21    21        70 0.000073 0.000219 ***         
## 2 s1    vis1   vis3      21    21       106 0.003    0.007    **          
## 3 s1    vis2   vis3      21    21       280 0.139    0.139    ns

plot.df %>% 
  filter(sex == "Male") %>% 
  rstatix::kruskal_test(s1 ~ visit)

## # A tibble: 1 × 6
##   .y.       n statistic    df      p method        
## * <chr> <int>     <dbl> <int>  <dbl> <chr>         
## 1 s1       30      8.29     2 0.0159 Kruskal-Wallis

plot.df %>% 
  filter(sex == "Male") %>% 
  rstatix::wilcox_test(s1 ~ visit)

## # A tibble: 3 × 9
##   .y.   group1 group2    n1    n2 statistic     p p.adj p.adj.signif
## * <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl> <dbl> <chr>       
## 1 s1    vis1   vis2      10    10        14 0.005 0.016 *           
## 2 s1    vis1   vis3      10    10        35 0.28  0.28  ns          
## 3 s1    vis2   vis3      10    10        75 0.063 0.126 ns

Figure 4e

Preparations

cm.merged <- con$getJointCountMatrix(raw = TRUE) %>% 
  Matrix::t() %>% 
  .[, colnames(.) %in% names(anno.aspc)]

# Calculate DEGs using Cacoa
con.vis13 <- con$samples %>% 
  .[!grepl("vis2", names(.))] %>% 
  Conos$new()

sample.groups <- con.vis13$samples %>% 
  names() %>% 
  `names<-`(ifelse(grepl("vis1", .), "visit1", "visit3"), .)

cao <- Cacoa$new(con.vis13, 
                 sample.groups, 
                 anno.aspc, 
                 ref.level = "visit1", 
                 target.level = "visit3", 
                 n.cores = 10)

cao$estimateDEPerCellType()

## Preparing matrices for DE

## Estimating DE per cell type

de <- cao$test.results$de %>%
  lapply('[[', "res")

# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[colnames(cm.merged)]
anno.subtype <- anno.aspc %>% 
  .[!is.na(.)]

idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())

anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>% 
  {`names<-`(as.character(.), names(.))}

anno.final <- paste0(anno.donor,"!!",anno.subtype) %>% 
  `names<-`(anno.donor %>% names())

# Create pseudo CM
cm.pseudo <- sccore::collapseCellsByType(cm.merged %>% Matrix::t(), 
                                         groups = anno.final, min.cell.count = 1) %>% 
  t() %>%
  apply(2, as, "integer")

cm.pseudo %<>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("_|!!") %>% 
                                   sget(3) %>% 
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(cm.pseudo), "group")), 
                                 design = ~ group) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = T)

## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors

# See comment for Figure 3f

# genes <- de %>%
#   {`names<-`(lapply(names(.), \(nn) mutate(.[[nn]], ct = nn)), names(.))} %>% 
#   lapply(filter, padj <= 0.05, abs(log2FoldChange) > 1) %>%
#   lapply(pull, Gene) %>% 
#   {sapply(names(.), \(nn) .[[nn]] %>% `names<-`(., rep(nn, length(.))))} %>% 
#   Reduce(c, .) %>% 
#   .[match(unique(.), .)]

genes <- read.delim("ASPC_DEG_heatmap_genes.tsv") %$% 
  setNames(gene, cluster) %>%
  .[order(names(.))]

idx <- cm.pseudo %>% 
  colnames() %>% 
  data.frame(id = .) %>% 
  mutate(vis = strsplit(id, "_|!!") %>% sget(3),
         ct = strsplit(id, "!!") %>% sget(2)) %>% 
  mutate(ord = order(vis, ct))

x <- cm.pseudo %>% 
  .[match(genes, rownames(.)), match(colnames(na.omit(.)), colnames(.))] %>%
  .[, idx$ord] %>% 
  Matrix::t() %>%
  scale() %>%
  Matrix::t()

# Ordering
spc <- con$getDatasetPerCell()

sepc <- meta$sex[match(spc, meta$sample)] %>% 
  setNames(names(spc))

sex.df <- sepc %>% 
  {data.frame(nn = names(.), sex = unname(.))} %>% 
  mutate(nn = strsplit(nn, "!!") %>% sget(1)) %>% 
  .[match(unique(.$nn), .$nn), ] %>% 
  filter(!grepl("pool", nn))

ord <- data.frame(Visit = colnames(x) %>%
                    setNames(colnames(x)) %>% 
                    strsplit("_|!!|vis") %>%
                    sget(4)) %>% 
  mutate(., 
         donor = rownames(.) %>% strsplit("_vis") %>% sget(1)) %>% 
  mutate(., 
         Sex = sex.df$sex[match(.$donor, sex.df$nn)],
         ct = rownames(.) %>% strsplit("!!") %>% sget(2)) %>% 
  arrange(ct, Visit, Sex) %>% 
  rownames()

x <- x[, ord]

groups <- colnames(x) %>%
  `names<-`(colnames(x))

# Limit scale
x[x > 2] <- 2
x[x < -1] <- -1

Please note, the order of the clusters is not always reproducible.

# Create top annotation
tannot <- data.frame(Visit = groups[colnames(x)] %>%
                       strsplit("_|!!|vis") %>%
                       sget(4)) %>% 
  mutate(., donor = rownames(.) %>% strsplit("_vis") %>% sget(1)) %>% 
  mutate(., Sex = sex.df$sex[match(.$donor, sex.df$nn)]) %>% 
  select(-donor) %>% 
  {HeatmapAnnotation(df=.,
                     border=TRUE,
                     col=list(Visit = ggsci::pal_jama()(7)[c(1:3)] %>% `names<-`(c(1:3)),
                              Sex = c("darkred","blue3") %>% setNames(c("Female","Male"))),
                     show_legend=TRUE)}

# Create color palette
pal <- colorRampPalette(c('navy','grey95','firebrick'))(1024)
labeled.gene.subset <- NULL

# Plot
set.seed(1337)

ha <- ComplexHeatmap::Heatmap(x, 
                              name='Expression', 
                              col=pal, 
                              cluster_columns=FALSE, 
                              show_row_names=FALSE, 
                              show_column_names=FALSE, 
                              top_annotation=tannot,
                              left_annotation=NULL,
                              border=TRUE,
                              show_column_dend = FALSE, 
                              show_row_dend = FALSE,
                              # row_km = 3, # Only used initially
                              # row_km_repeats = 100, # Only used initially
                              column_split = groups[colnames(x)] %>% strsplit("!!") %>% sapply('[[', 2) %>% unname() %>% factor(levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")),
                              row_split = names(genes))

ht = draw(ha); row_order(ht) -> cls

Calculate GO enrichment

go <- cls %>% 
  lapply(\(y) clusterProfiler::enrichGO(rownames(x)[y],
                                        OrgDb = "org.Hs.eg.db", 
                                        keyType = "SYMBOL", 
                                        ont = "BP", 
                                        universe = rownames(cm.merged))) %>% 
  setNames(names(cls))

Save order for later

cls %>% 
  lapply(\(y) rownames(x)[y]) %>% 
  lapply(data.frame) %>% 
  lapply(setNames, "gene") %>% 
  bind_rows(.id = "cluster") %>%
  select(gene, cluster) %>% 
  write.table("ASPC_DEG_heatmap_genes.tsv", sep = "\t")

Figure 4f

Please note, cluster number may be different.

genes.go <- clusterProfiler::bitr("GO:0097193", 
                                  fromType="GOALL", 
                                  toType="SYMBOL", 
                                  OrgDb='org.Hs.eg.db')$SYMBOL %>% 
  unique()

## 'select()' returned 1:many mapping between keys and columns

genes.cls <- x %>% 
  rownames() %>% 
  .[cls[[1]]] # Change if needed

genes <- intersect(genes.go, genes.cls)

cm.merged <- con$getJointCountMatrix(raw = T)

# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[rownames(cm.merged)]
anno.subtype <- anno.aspc %>% 
  .[!is.na(.)] %>% 
  factor()

idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())

anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>% 
  {`names<-`(as.character(.), names(.))}

anno.final <- paste0(anno.donor,"!!",anno.subtype) %>% 
  `names<-`(anno.donor %>% names())

cm.pseudo.tmp <- sccore::collapseCellsByType(cm.merged,
                                             groups = anno.final, min.cell.count = 1) %>% 
  t() %>%
  apply(2, as, "integer")

cm.pseudo <- cm.pseudo.tmp %>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("_|!!") %>% 
                                   sget(3) %>% 
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(cm.pseudo.tmp), "group")), 
                                 design = ~ group) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = T) %>% 
  t()

## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors

plot.dat <- cm.pseudo %>%
  .[, colnames(.) %in% genes] %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column(var = "sample") %>% 
  mutate(visit = strsplit(sample, "!!|_") %>% 
           sget(3) %>% 
           factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
         anno = strsplit(sample, "!!") %>% 
           sget(2) %>% 
           factor(levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")),
         donor = strsplit(sample, "!!") %>% 
           sget(1)) %>% 
  reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
  mutate(sex = meta$sex[match(donor, meta$sample)]) %>% 
  mutate(sex_visit = paste(sex, visit, sep = " ")) %>% 
  group_by(visit, sex, anno, sex_visit, sample) %>%
  summarize(value = mean(value))

## `summarise()` has grouped output by 'visit', 'sex', 'anno', 'sex_visit'. You
## can override using the `.groups` argument.

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$anno)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>% 
  arrange(anno) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  group_by(anno, sex) %>%
  wilcox_test(value~sex_visit, paired = T) %>% 
  mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>% 
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)

plot.dat %>% 
  ggplot(aes(anno, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Mean normalized pseudobulk expression", title = "Apoptosis ontology gene expression") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 250, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) + # Wilcoxon
  guides(fill = "none")

Figure 4g

See Scenic.ipynb.

Figure 4h

See Scenic.ipynb for mat object calculation.

mat <- fastMatMR::fmm_to_mat("aspc_adi_auc_mat.mtx") %>% 
  `dimnames<-`(list(
    read.table("aspc_adi_auc_mat.rownames", header = T)[, 1],
    read.table("aspc_adi_auc_mat.colnames", header = T)[, 1]
  ))

spc <- con.major$getDatasetPerCell()
vpc <- spc %>% 
  as.character() %>% 
  strsplit("_|!!") %>% 
  sget(3) %>% 
  setNames(names(spc))

anno.comb <- factor(anno.aspc)

plot.dat <- c("JUN(+)", "ZEB1(+)") %>% # ZEB1 is just a random pick to make it easier to manipulate data, omitted later
  {mat[, colnames(mat) %in% .]} %>% 
  as.data.frame() %>%
  mutate(., 
         visit = unname(vpc)[match(rownames(.), 
                                   names(vpc) %>% 
                                     strsplit("!!") %>% 
                                     sapply(\(x) paste0(x[2],"!!",x[3]))
         )
         ],
         anno = unname(anno.comb)[match(rownames(.),
                                        names(anno.comb) %>% 
                                          strsplit("!!") %>% 
                                          sapply(\(x) paste0(x[2],"!!",x[3]))
         )
         ],
         sample = unname(spc)[match(rownames(.), 
                                    names(spc) %>% 
                                      strsplit("!!") %>% 
                                      sapply(\(x) paste0(x[2],"!!",x[3]))
         )
         ]
  ) %>% 
  filter(!is.na(anno)) %>% 
  reshape2::melt(id.vars = c("visit", "anno", "sample")) %>% 
  mutate(anno_vis = paste0(anno,"_",visit),
         anno_var = paste0(anno,"_",variable)) %>% 
  group_by(visit, anno, variable, sample) %>% 
  summarize(mm = median(value)) %>% 
  ungroup() %>% 
  filter(variable == "JUN(+)") %>% 
  mutate(anno = factor(anno, levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")),
         visit = factor(visit, levels = c("vis1", "vis2", "vis3"), labels = c("Visit 1", "Visit 2", "Visit 3")),
         sex = meta$sex[match(.$sample, meta$sample)],
         variable = factor(variable)) %>% 
  mutate(sex_visit = paste(sex, visit, sep = " ") %>% gsub("Visit", "visit", .),
         anno = factor(anno, levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")))

## `summarise()` has grouped output by 'visit', 'anno', 'variable'. You can
## override using the `.groups` argument.

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$anno)) %>% 
  lapply(lapply, \(x) kruskal.test(x$mm, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(ct = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(ct = factor(ct, levels = unique(plot.dat$anno))) %>% 
  arrange(ct) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
  group_by(anno, sex) %>%
  wilcox_test(mm~sex_visit, paired = TRUE) %>%
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05) %>% # Adjust line position
  arrange(desc(y.position))

# Plot
ggplot(plot.dat, aes(anno, mm)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1), 
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Regulon activity (mean AUC)", fill = "") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(ct, y = 6, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 2, hide.ns = F) # Wilcoxon

Figure 5

Load data

con <- qread("con_adipocytes.qs", nthreads = 10)
anno.adipocytes <- qread("anno_adipocytes_archetypes.qs")

missing.cells <- setdiff(rownames(con$embedding), 
                         names(anno.adipocytes)) %>% 
  {setNames(rep("", length(.)), .)} %>% 
  factor()

anno.adipocytes.ext <- factor(c(anno.adipocytes, missing.cells))

Figure 5a

con$plotGraph(groups = anno.adipocytes.ext, 
              show.labels = TRUE, 
              embedding = "largeVis") +
  labs(x = "largeVis1", y = "largeVis2") +
  theme(line = element_blank()) + 
  scale_colour_manual(values = c(RColorBrewer::brewer.pal(4, "Reds")[-1], "gray60"))

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Figure 5b

cm.merged <- con$getJointCountMatrix()

# c("PLXDC2","CBLB","TCF4",
#   "GPAM","ACSL1","LPL","DGAT2",
#   "PRSS23","THSD7A","AL139317.5") %>% 
c("PRSS23","PDE5A","ADIPOQ", "DGAT2", "GPAM", "LPL", "RORA", "CLSTN2", "CRIM1", "ITGA1") %>% 
  sccore::dotPlot(., 
                  cm.merged, 
                  anno.adipocytes, 
                  gene.order = ., 
                  cols = c("white","red3"))

Figure 5c

Preparations

cm.merged <- con$getJointCountMatrix(raw = TRUE) %>% 
  Matrix::t() %>% 
  .[, colnames(.) %in% names(anno.adipocytes)]

# Calculate DEGs using Cacoa
con.vis13 <- con$samples %>% 
  .[!grepl("vis2", names(.))] %>% 
  Conos$new()

sample.groups <- con.vis13$samples %>% 
  names() %>% 
  `names<-`(ifelse(grepl("vis1", .), "visit1", "visit3"), .)

cao <- Cacoa$new(con.vis13, 
                 sample.groups, 
                 anno.adipocytes, 
                 ref.level = "visit1", 
                 target.level = "visit3", 
                 n.cores = 10)

cao$estimateDEPerCellType()

## Preparing matrices for DE

## Warning in estimateDEPerCellTypeInner(raw.mats = raw.mats, cell.groups =
## cell.groups, : Excluded 1 sample(s) due to 'min.cell.count'.

## Estimating DE per cell type

de <- cao$test.results$de %>%
  lapply('[[', "res")

# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[colnames(cm.merged)]
anno.subtype <- anno.adipocytes %>% 
  .[!is.na(.)]

idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())

anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>% 
  {`names<-`(as.character(.), names(.))}

anno.final <- paste0(anno.donor,"!!",anno.subtype) %>% 
  `names<-`(anno.donor %>% names())

# Create pseudo CM
cm.pseudo <- sccore::collapseCellsByType(cm.merged %>% Matrix::t(), 
                                         groups = anno.final, min.cell.count = 1) %>% 
  t() %>%
  apply(2, as, "integer")

cm.pseudo %<>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("_|!!") %>% 
                                   sget(3) %>% 
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(cm.pseudo), "group")), 
                                 design = ~ group) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = T)

## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors

# See comment for Figure 3f

# genes <- de %>%
#   {`names<-`(lapply(names(.), \(nn) mutate(.[[nn]], ct = nn)), names(.))} %>% 
#   lapply(filter, padj <= 0.05, abs(log2FoldChange) > 1) %>%
#   lapply(pull, Gene) %>% 
#   {sapply(names(.), \(nn) .[[nn]] %>% `names<-`(., rep(nn, length(.))))} %>% 
#   Reduce(c, .) %>% 
#   .[match(unique(.), .)]

genes <- read.delim("Archetypes_DEG_heatmap_genes.tsv") %$% 
  setNames(gene, cluster) %>%
  .[order(names(.))]

idx <- cm.pseudo %>% 
  colnames() %>% 
  data.frame(id = .) %>% 
  mutate(vis = strsplit(id, "_|!!") %>% sget(3),
         ct = strsplit(id, "!!") %>% sget(2)) %>% 
  mutate(ord = order(vis, ct))

x <- cm.pseudo %>% 
  .[match(genes, rownames(.)), match(colnames(na.omit(.)), colnames(.))] %>%
  .[, idx$ord] %>% 
  Matrix::t() %>%
  scale() %>%
  Matrix::t()

# Ordering
spc <- con$getDatasetPerCell()

sepc <- meta$sex[match(spc, meta$sample)] %>% 
  setNames(names(spc))

sex.df <- sepc %>% 
  {data.frame(nn = names(.), sex = unname(.))} %>% 
  mutate(nn = strsplit(nn, "!!") %>% sget(1)) %>% 
  .[match(unique(.$nn), .$nn), ] %>% 
  filter(!grepl("pool", nn))

ord <- data.frame(Visit = colnames(x) %>%
                    setNames(colnames(x)) %>% 
                    strsplit("_|!!|vis") %>%
                    sget(4)) %>% 
  mutate(., 
         donor = rownames(.) %>% strsplit("_vis") %>% sget(1)) %>% 
  mutate(., 
         Sex = sex.df$sex[match(.$donor, sex.df$nn)],
         ct = rownames(.) %>% strsplit("!!") %>% sget(2)) %>% 
  arrange(ct, Visit, Sex) %>% 
  rownames()

x <- x[, ord]

groups <- colnames(x) %>%
  `names<-`(colnames(x))

# Limit scale
x[x > 2] <- 2
x[x < -1] <- -1

Please note, the order of the clusters is not always reproducible, and the clusters them selves may also change slightly.

# Create top annotation
tannot <- data.frame(Visit = groups[colnames(x)] %>%
                       strsplit("_|!!|vis") %>%
                       sget(4)) %>% 
  mutate(., donor = rownames(.) %>% strsplit("_vis") %>% sget(1)) %>% 
  mutate(., Sex = sex.df$sex[match(.$donor, sex.df$nn)]) %>% 
  select(-donor) %>% 
  {HeatmapAnnotation(df=.,
                     border=TRUE,
                     col=list(Visit = ggsci::pal_jama()(7)[c(1:3)] %>% `names<-`(c(1:3)),
                              Sex = c("darkred","blue3") %>% setNames(c("Female","Male"))),
                     show_legend=TRUE)}

# Create color palette
pal <- colorRampPalette(c('navy','grey95','firebrick'))(1024)
labeled.gene.subset <- NULL

# Plot
set.seed(1337)

ha <- ComplexHeatmap::Heatmap(x, 
                              name='Expression', 
                              col=pal, 
                              cluster_columns=FALSE, 
                              show_row_names=FALSE, 
                              show_column_names=FALSE, 
                              top_annotation=tannot,
                              left_annotation=NULL,
                              border=TRUE,
                              show_column_dend = FALSE, 
                              show_row_dend = FALSE,
                              # row_km = 4, # Only used initially
                              # row_km_repeats = 200, # Only used initially
                              column_split = groups[colnames(x)] %>% strsplit("!!") %>% sapply('[[', 2) %>% unname() %>% factor(levels = c("CLSTN2_archetype", "DGAT2_archetype", "PRSS23_archetype")),
                              row_split = names(genes))

ht = draw(ha); row_order(ht) -> cls

Calculate GO enrichment

go <- cls %>% 
  lapply(\(y) clusterProfiler::enrichGO(rownames(x)[y],
                                        OrgDb = "org.Hs.eg.db", 
                                        keyType = "SYMBOL", 
                                        ont = "BP", 
                                        universe = rownames(cm.merged))) %>% 
  setNames(names(cls))

Save order for later

cls %>% 
  lapply(\(y) rownames(x)[y]) %>% 
  lapply(data.frame) %>% 
  lapply(setNames, "gene") %>% 
  bind_rows(.id = "cluster") %>%
  select(gene, cluster) %>% 
  write.table("Archetypes_DEG_heatmap_genes.tsv", sep = "\t")

Figure 5d

cm.merged <- con$getJointCountMatrix(raw = T)

# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[rownames(cm.merged)]
anno.subtype <- anno.adipocytes %>% 
  .[!is.na(.)] %>% 
  factor()

idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())

anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>% 
  {`names<-`(as.character(.), names(.))}

anno.final <- paste0(anno.donor,"!!",anno.subtype) %>% 
  `names<-`(anno.donor %>% names())

cm.pseudo.tmp <- sccore::collapseCellsByType(cm.merged,
                                             groups = anno.final, min.cell.count = 1) %>% 
  t() %>%
  apply(2, as, "integer")

cm.pseudo <- cm.pseudo.tmp %>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("_|!!") %>% 
                                   sget(3) %>% 
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(cm.pseudo.tmp), "group")), 
                                 design = ~ group) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = T) %>% 
  t()

## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors

gene <- "DECR1"

plot.dat <- cm.pseudo %>% 
  .[, colnames(.) %in% gene] %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column(var = "sample") %>% 
  mutate(visit = strsplit(sample, "!!|_") %>% 
           sget(3) %>% 
           factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
         anno = strsplit(sample, "!!") %>% 
           sget(2) %>% 
           factor(levels = c("DGAT2_archetype", "CLSTN2_archetype", "PRSS23_archetype")),
         donor = strsplit(sample, "!!") %>% 
           sget(1)) %>% 
  reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
  mutate(sex = meta$sex[match(donor, meta$sample)]) %>% 
  mutate(sex_visit = paste(sex, visit, sep = " "))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$anno)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>% 
  arrange(anno) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
  group_by(anno, sex) %>%
  wilcox_test(value~sex_visit, paired = F) %>%
  mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>% 
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)

plot.dat %>% 
  ggplot(aes(anno, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Normalized pseudobulk expression", title = gene, fill = "") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 200, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F)# Wilcoxon

gene <- "LIPA"

plot.dat <- cm.pseudo %>% 
  .[, colnames(.) %in% gene] %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column(var = "sample") %>% 
  mutate(visit = strsplit(sample, "!!|_") %>% 
           sget(3) %>% 
           factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
         anno = strsplit(sample, "!!") %>% 
           sget(2) %>% 
           factor(levels = c("DGAT2_archetype", "CLSTN2_archetype", "PRSS23_archetype")),
         donor = strsplit(sample, "!!") %>% 
           sget(1)) %>% 
  reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
  mutate(sex = meta$sex[match(donor, meta$sample)]) %>% 
  mutate(sex_visit = paste(sex, visit, sep = " "))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$anno)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>% 
  arrange(anno) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
  group_by(anno, sex) %>%
  wilcox_test(value~sex_visit, paired = F) %>%
  mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>% 
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)

plot.dat %>% 
  ggplot(aes(anno, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Normalized pseudobulk expression", title = gene, fill = "") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 30, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F)# Wilcoxon

gene <- "PLIN5"

plot.dat <- cm.pseudo %>% 
  .[, colnames(.) %in% gene] %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column(var = "sample") %>% 
  mutate(visit = strsplit(sample, "!!|_") %>% 
           sget(3) %>% 
           factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
         anno = strsplit(sample, "!!") %>% 
           sget(2) %>% 
           factor(levels = c("DGAT2_archetype", "CLSTN2_archetype", "PRSS23_archetype")),
         donor = strsplit(sample, "!!") %>% 
           sget(1)) %>% 
  reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
  mutate(sex = meta$sex[match(donor, meta$sample)]) %>% 
  mutate(sex_visit = paste(sex, visit, sep = " "))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$anno)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>% 
  arrange(anno) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
  group_by(anno, sex) %>%
  wilcox_test(value~sex_visit, paired = F) %>% 
  mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>% 
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)

plot.dat %>% 
  ggplot(aes(anno, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Normalized pseudobulk expression", title = gene, fill = "") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 200, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F)# Wilcoxon

Figure 6

Figure 6a

Calculate diffusion embeddings. Please note, this takes some time. We saved the embeddings in adipocytes_aspc_embeddings.qs.

## Load Conos object and annotation
data = qread("con_adipocytes_faps.qs", nthreads = 10)
anno.adipocytes <- qread("anno_major.qs") %>% 
  .[. == "Adipocytes"] %>% 
  factor()
anno.aspc <- qread("anno_aspc.qs")
anno <- factor(c(anno.adipocytes, anno.aspc))

## Convert raw counts to Seurat
for (name in names(data$samples)) {
  seu <- CreateSeuratObject(CreateAssayObject(counts = t(data$samples[[name]]$misc$rawCounts)))
  seu$orig.ident <- substr(name, 0, regexpr("vis", name)-2)
  seu$visit <- substr(name, regexpr("vis", name), nchar(name))
  if (name == names(data$samples)[1]) {
    final <- seu
  } else {
    final <- merge(final, seu)
  }
}

## Merge annotation
final <- subset(final, cells = names(anno))
final <- NormalizeData(final)
anno.final <- anno[ match(colnames(final), names(anno))]
final$fine <- anno.final
final$coarse <- substr(anno.final, 0, regexpr("_", anno.final)-1)
final$dataset <- paste(final$orig.ident, final$visit, sep="_")
final <- subset(final, cells = names(which(!is.na(final$fine))))
full <- final

embeddings <- list()
for (nfeats in c(25,50,100,250)) { 
  # Recreate original object
  final <- full
  # ASPC
  ASPC <- subset(final, coarse == "ASPC")
  VariableFeatures(ASPC) <- NULL
  obj.list <- SplitObject(ASPC, split.by = "dataset")
  feats_ASPC <- suppressWarnings(SelectIntegrationFeatures(obj.list, nfeatures = nfeats, verbose = FALSE))
  # Adipocytes
  Ad <- subset(final, coarse == "Adipocytes")
  VariableFeatures(Ad) <- NULL
  obj.list <- SplitObject(Ad, split.by = "dataset")
  feats_Ad <- suppressWarnings(SelectIntegrationFeatures(obj.list, nfeatures = nfeats, verbose = FALSE))
  # Combine
  feats <- unique(c(feats_ASPC, feats_Ad))
  ## Embed
  # PCA
  VariableFeatures(final) <- feats
  final <- ScaleData(final)
  final <- RunPCA(final)
  for (useHarmony in c(TRUE, FALSE)) {
    if (useHarmony) {
      final <- suppressWarnings(RunHarmony(final, group.by.vars = "dataset", verbose  = FALSE))
      pcs <- final@reductions$harmony@cell.embeddings
    } else {
      pcs <- final@reductions$pca@cell.embeddings
    }
    for (dims in c(5, 10, 15, 20)) {
      dm <- destiny::DiffusionMap(pcs[,1:dims], verbose = FALSE, n_pcs = NA)
      dm_ev <- dm@eigenvectors
      colnames(dm_ev) <- paste("DC", c(1:20), sep="_")
      embeddings[[(length(embeddings) + 1)]] <- dm_ev
      names(embeddings)[length(embeddings)] <- paste(nfeats, useHarmony, dims, sep="_")
    }
  }
}

Prepare data. We chose to continue with embedding no. 2. The sds_obj object is saved for Figure 6E.

# Load and prepare data
embeddings <- qread("adipocytes_aspc_embeddings.qs")
anno.adipocytes <- qread("anno_major.qs") %>% 
  .[. == "Adipocytes"] %>% 
  factor()
anno.aspc <- qread("anno_aspc.qs")
anno <- factor(c(anno.adipocytes, anno.aspc))

# Calculate slingshot trajectory
anno.sort <- anno[rownames(embeddings[[2]])] %>%
  as.character() %>%
  unname()

sds_obj <- slingshot(embeddings[[2]][, 1:2] %>% 
                       `colnames<-`(c("UMAP1","UMAP2")), 
                     anno.sort, 
                     end.clus = "Adipocytes", 
                     start.clus = "ASPC_DPP4")

qsave(sds_obj, "sds_obj.qs")


sds <- as.SlingshotDataSet(sds_obj)

Final plot

plot.df <- embeddings[[2]][,1:2] %>% 
  as.data.frame() %>% 
  `colnames<-`(c("UMAP1","UMAP2")) %>% 
  mutate(., annotation = anno[rownames(.)] %>% as.factor()) %>% 
  .[complete.cases(.),]

line.df <- lapply(1:length(sds@curves), \(lineage) {
  sds@curves[[lineage]]$s[,1:2] %>% 
    data.frame() %>% 
    mutate(lineage = lineage %>% as.factor())
}) %>% 
  setNames(sds@curves %>% names()) %>% 
  bind_rows() %>% 
  mutate(lineage = factor(lineage, labels = c("Lineage 1", "Lineage 2")))

ggplot(plot.df, aes(UMAP1, UMAP2, col = annotation)) + 
  geom_point(size = 0.3) +
  geom_path(data = line.df, aes(UMAP1, UMAP2, group = lineage, col = lineage), inherit.aes = F, linewidth = 1) + 
  theme_bw() + 
  theme(legend.position = "right",
        line = element_blank()) + 
  labs(col = "", x = "DC1", y = "DC2") +
  scale_colour_manual(values = c("#E41A1C", RColorBrewer::brewer.pal(5, "Greens")[-1][c(2,1,3,4)], "black", "grey40")) +
  dotSize(3) +
  ylim(c(-0.03, 0.013)) +
  xlim(c(-0.006, 0.0052))

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_path()`).

Figure 6b

# We need the `sds_obj` object from Figure 6a
sds_obj <- qread("sds_obj.qs")

pseudotime_all <- sds_obj@assays@data@listData$pseudotime %>% 
  as.data.frame() %>% 
  filter(!is.na(Lineage1)) %>% 
  {setNames(pull(., Lineage1), rownames(.))}

plot.df %>% 
  .[names(pseudotime_all), ] %>%
  mutate(pseudotime = pseudotime_all) %>%
  ggplot(aes(UMAP1, UMAP2, col = pseudotime)) +
  geom_point(size = 0.3) +
  geom_path(data = line.df %>% filter(lineage == "Lineage 1"), aes(UMAP1, UMAP2), inherit.aes = F, color = "black", linewidth = 1, alpha = 0.4) +
  theme_bw() +
  theme(line = element_blank()) +
  labs(col = "Pseudotime", x = "DC1", y = "DC2") +
  scale_color_continuous(low = "blue3", high = "orange") +
  ylim(c(-0.005, 0.013)) +
  xlim(c(-0.006, 0.0052))

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_path()`).

Figure 6c

# We need the `sds_obj` object from Figure 6a
sds_obj <- qread("sds_obj.qs")

pseudotime_all <- sds_obj@assays@data@listData$pseudotime %>% 
  as.data.frame() %>% 
  filter(!is.na(Lineage1)) %>% 
  {setNames(pull(., Lineage1), rownames(.))}

plot.df %>% 
  .[names(pseudotime_all), ] %>%
  mutate(pseudo = pseudotime_all) %>%
  mutate(bin = cut(pseudo, breaks = c(0, 0.005, 0.01, 0.0165, 0.023))) %>% 
  na.omit() %>% 
  mutate(bin = as.character(bin) %>% 
           strsplit(",") %>% 
           sget(2) %>% 
           gsub("]", "", ., fixed = TRUE)) %>%
  arrange(bin) %>% 
  mutate(bin = factor(bin, labels = c("Early preadipocytes", "Mid preadipocytes", "Late preadipocytes", "Adipocyte"))) %>% 
  ggplot(aes(UMAP1, UMAP2, col = bin)) +
  geom_point(size = 0.3) +
  geom_path(data = line.df %>% filter(lineage == "Lineage 1"), aes(UMAP1, UMAP2), inherit.aes = FALSE, color = "black", linewidth = 1, alpha = 0.4) +
  theme_bw() +
  theme(line = element_blank()) +
  labs(col = "Binned pseudotime", x = "DC1", y = "DC2") +
  scale_color_manual(values = colorRampPalette(c("pink","darkred"))(4)) +
  ylim(c(-0.005, 0.013)) +
  xlim(c(-0.006, 0.0052)) +
  dotSize(3)

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_path()`).

Figure 6d

See Scenic.ipynb for mat object calculation.

mat <- fastMatMR::fmm_to_mat("aspc_adi_auc_mat.mtx") %>%
  `dimnames<-`(list(
    read.table("aspc_adi_auc_mat.rownames", header = T)[, 1],
    read.table("aspc_adi_auc_mat.colnames", header = T)[, 1]
  ))

con.major <- qread("con_major.qs", nthreads = 10)
spc <- con.major$getDatasetPerCell()
vpc <- spc %>% 
  as.character() %>% 
  strsplit("_|!!") %>% 
  sget(3) %>% 
  setNames(names(spc))

anno.adipocytes <- qread("anno_major.qs") %>% 
  .[. == "Adipocytes"] %>% 
  factor()
anno.aspc <- qread("anno_aspc.qs")
anno <- factor(c(anno.adipocytes, anno.aspc))

# We need the `sds_obj` object from Figure 6a
sds_obj <- qread("sds_obj.qs")

mat.df <- c("ZEB1", "EBF3", "PPARG") %>%
  paste0("(+)") %>%
  {mat[, colnames(mat) %in% .]} %>% 
  as.data.frame() %>% 
  mutate(., 
         visit = unname(vpc)[match(rownames(.), 
                                   names(vpc) %>% 
                                     strsplit("!!") %>% 
                                     sapply(\(x) paste0(x[2],"!!",x[3]))
         )
         ] %>% 
           factor(labels = c("Visit 1", "Visit 2", "Visit 3")),
         anno = unname(anno)[match(rownames(.),
                                   names(anno) %>% 
                                     strsplit("!!") %>% 
                                     sapply(\(x) paste0(x[2],"!!",x[3]))
         )
         ],
         sample = unname(spc)[match(rownames(.), 
                                    names(spc) %>% 
                                      strsplit("!!") %>% 
                                      sapply(\(x) paste0(x[2],"!!",x[3]))
         )
         ]
  ) %>% 
  filter(!is.na(anno))

pseudotime_all <- sds_obj@assays@data@listData$pseudotime %>% 
  as.data.frame() %>% 
  filter(!is.na(Lineage1)) %>% 
  mutate(., 
         cid = rownames(.) %>% 
           strsplit("!!") %>% 
           sapply(\(x) paste0(x[2],"!!",x[3])))

mat.df %<>% .[match(pseudotime_all$cid, rownames(.), nomatch = F), ]

pseudotime_all %<>% .[match(rownames(mat.df), .$cid, nomatch = F), ]

mat.df2 <- mat.df %>% 
  mutate(pseudotime = pseudotime_all$Lineage1) %>%
  mutate(bin = cut(pseudotime, breaks = c(0, 0.005, 0.01, 0.0165, 0.023))) %>% 
  na.omit() %>% 
  mutate(bin = as.character(bin) %>% 
           strsplit(",") %>% 
           sget(2) %>% 
           gsub("]", "", ., fixed = T)) %>%
  select(-anno, -pseudotime) %>% 
  reshape2::melt(id.vars = c("visit", "bin", "sample"), variable.name = "regulon") %>%
  group_by(visit, regulon, bin, sample) %>%
  mutate(value = as.numeric(value)) %>% 
  summarize(mm = median(value))

## `summarise()` has grouped output by 'visit', 'regulon', 'bin'. You can override
## using the `.groups` argument.

mat.df2 %>% 
  pull(regulon) %>% 
  unique() %>% 
  as.character() %>% 
  lapply(\(vv) {
    plot.dat <- mat.df2 %>% 
      filter(regulon == vv) %>%
      ungroup() %>%
      mutate(sex = meta$sex[match(.$sample, meta$sample)]) %>% 
      mutate(sex_visit = paste(sex, visit, sep = " ") %>% gsub("Visit", "visit", .))
    
    # Kruskal-Wallis
    sex.stat <- plot.dat %$% 
      split(., sex) %>% 
      lapply(\(x) split(x, x$bin)) %>% 
      lapply(lapply, \(x) kruskal.test(x$mm, g = x$visit)$p.value) %>% 
      lapply(sapply, gtools::stars.pval) %>% 
      {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
      `names<-`(c("Female","Male")) %>% 
      lapply(\(x) data.frame(ct = names(x), sig = unname(x))) %>% 
      {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
      bind_rows() %>% 
      mutate(ct = factor(ct, levels = unique(plot.dat$bin))) %>% 
      arrange(ct) %>% 
      mutate(sig = gsub(".", "", sig, fixed = T))
    
    # Wilcoxon
    stat.test <- plot.dat %>%
      group_by(bin, sex) %>%
      wilcox_test(mm~sex_visit, paired = F)
    
    stat.test %<>% 
      filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
      add_xy_position(x = "bin", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05) %>% # Adjust line position
      arrange(desc(y.position))
    
    # Plot
    pp <- ggplot(plot.dat, aes(bin, mm)) + 
      geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
      geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1), 
                 color = "black", size = 0.5, alpha = 0.2) +
      theme_bw() +
      labs(x = "", y = "Mean AUC", fill = "", title = vv) +
      theme(line = element_blank(),
            axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
            axis.text = element_text(color = "black")) +
      scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
      geom_text(inherit.aes = F, data = sex.stat, aes(ct, y = 2.2, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
      stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 0.2, hide.ns = F) # Wilcoxon
    
    pp
  }) %>% 
  cowplot::plot_grid(plotlist = ., ncol = 3)

Figure 6e

Prepare data

We downsampled to 15k nuclei due to calculation times. We provide sds_obj_reduced.qs for slingshot pseudotime estimations for the downsampled dataset.

Here, we calculate for visit 1 and visit 3.

sds_obj <- qread("sds_obj_reduced.qs")

pseudotime <- sds_obj@assays@data@listData$pseudotime[, 1] %>% 
  .[!is.na(.)]

con <- qread("con_adipocytes_aspc.qs", nthreads = 10)

mat <- con$getJointCountMatrix() %>%
  .[match(names(pseudotime), rownames(.)), colSums(.) > 2e2]

mt <- mat %>%
  as.matrix() %>%
  as.data.frame() %>%
  tibble::rownames_to_column() %>%
  {message("Mutating"); mutate(.,
                               pseudotime = pseudotime[rowname],
                               visit = con$getDatasetPerCell()[rowname] %>%
                                 as.character() %>%
                                 strsplit("_") %>%
                                 sget(3),
                               donor = strsplit(rowname, "!!") %>%
                                 sget(1))} %>%
  .[complete.cases(.), ] %>%
  {message("Melting"); reshape2::melt(., id.vars = c("rowname", "pseudotime", "visit", "donor"))} %$%
  {message("Splitting"); split(., variable)}

The calculations take app. 11 h with 14k genes and 10k cells. We’ve added pre-calculated results as pseudotime_res_novis2.qs.

res <- mt %>% 
  sccore::plapply(\(y) {
    x = y %>% 
      filter(!visit == "vis2")
    
    fit_full <- gamm4(data = x, formula = value ~ s(pseudotime, by = factor(visit)), random = ~(1 | donor), REML = F)
    fit_reduced <- gamm4(data = x, formula = value ~ s(pseudotime), random = ~(1 | donor), REML = F)
    fit_none <- gamm4(data = x, formula = value ~ 1, random = ~(1 | donor), REML = F)
    
    ann <- anova(fit_full$mer, fit_reduced$mer, fit_none$mer)
    
    if (any(ann$`Pr(>Chisq)` <= 0.05, na.rm = T)) {
      residuals <- predict(fit_full$gam, se.fit = T)$fit %>% 
        unname()
      
      r.sq <- c(summary(fit_full$gam)$r.sq, summary(fit_reduced$gam)$r.sq) %>% 
        setNames(c("full", "reduced"))
      
      out <- list(annova = ann,
                  residuals = residuals,
                  r.sq = r.sq)
      
      return(out)
    }
  }, n.cores = 5, mc.preschedule = T, mc.cleanup = T, progress = F) # 10 cores fails, 5 is max tested that finishes

Plot

Load data. We need the sds_obj from Figure 6a. Also, for ease of use we provide slingshot pseudotime W/O visit 2 as pseudotime_novis2.qs.

res <- qread("pseudotime_res_novis2.qs", nthreads = 10) %>% 
  .[!sapply(., is.null)]

sds_obj <- qread("sds_obj.qs")

pseudotime <- qread("pseudotime_novis2.qs")

con <- qread("con_adipocytes_aspc.qs", nthreads = 10)

both <- res %>%
  sapply(\(gene) {
    ann <- gene$annova
    (ann[2, "Pr(>Chisq)"] <= 0.05 & ann[3, "Pr(>Chisq)"] <= 0.05)
  })

res.both <- res[both]

p.both <- res.both %>% 
  sapply(\(gene) gene$annova[3, "Pr(>Chisq)"])

tmp <- res.both %>% 
  lget("annova") %>% 
  lapply(dplyr::slice, 2:3) %>% 
  lapply(pull, AIC) %>%
  .[sapply(., \(x) abs(x[2]) > abs(x[1]))] %>% 
  bind_rows() %>% 
  t() %>% 
  as.data.frame() %>% 
  setNames(c("reduced", "full")) %>% 
  mutate(diff = abs(full) - abs(reduced)) %>% 
  mutate(diff.frac = diff / abs(reduced))

res.filter <- res[
  tmp %>% 
    filter(diff.frac >= 0.05) %>% 
    rownames()
]

cm.merged <- con$getJointCountMatrix() %>% 
  .[match(names(pseudotime), rownames(.)), colnames(.) %in% names(res.filter)]

visit = con$getDatasetPerCell()[rownames(cm.merged)] %>%
  as.character() %>%
  strsplit("_") %>%
  sget(3)

# Vis3
cm.merged.vis <- cm.merged[visit == "vis3", ]
pseudotime.vis <- pseudotime[rownames(cm.merged.vis)]
weights.vis <- sds_obj@assays@data@listData$weights %>% .[match(names(pseudotime.vis), rownames(.)), 1] + 1E-7

scFit <- cm.merged.vis %>% 
  Matrix::t() %>% 
  tradeSeq::fitGAM(pseudotime = pseudotime.vis, cellWeights = weights.vis, verbose = TRUE)

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

Smooth3 <- tradeSeq::predictSmooth(scFit, gene = colnames(cm.merged.vis), tidy = FALSE, n=100)

# Vis1
cm.merged.vis <- cm.merged[visit == "vis1", ]
pseudotime.vis <- pseudotime[rownames(cm.merged.vis)]
weights.vis <- sds_obj@assays@data@listData$weights %>% .[match(names(pseudotime.vis), rownames(.)), 1] + 1E-7

scFit <- cm.merged.vis %>% 
  Matrix::t() %>% 
  tradeSeq::fitGAM(pseudotime = pseudotime.vis, cellWeights = weights.vis, verbose = TRUE)

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

Smooth1 <- tradeSeq::predictSmooth(scFit, gene = colnames(cm.merged.vis), tidy = FALSE, n=100)

# Combine smooth
Smooth <- cbind(Smooth3, Smooth1)

# Average across replicates and scale
Smooth <- t(scale(t(Smooth)))

# Split smooth
Smooth33 <- Smooth[, 1:100]
Smooth11 <- Smooth[, 101:200]

# Vis3
# Seriate the results

Smooth33 <- Smooth33[ seriation::get_order(seriation::seriate(Smooth33, method="PCA_angle")), ]

## Registered S3 methods overwritten by 'registry':
##   method               from 
##   print.registry_field proxy
##   print.registry_entry proxy

col_fun = circlize::colorRamp2(c(-4, 0, 4), c("navy", "white", "firebrick"))

p3 <- Heatmap(Smooth33, 
              col = col_fun,
              cluster_columns=F, 
              cluster_rows=F, 
              show_column_names = F, 
              row_names_gp = grid::gpar(fontsize = 5)
)

# Vis1
Smooth11 %<>% .[p3@matrix %>% rownames(), ]

p1 <- Heatmap(Smooth11, 
              col = col_fun,
              cluster_columns=F, 
              cluster_rows=F, 
              show_column_names = F, 
              row_names_gp = grid::gpar(fontsize = 5)
)

p3

p1

Figure 7

Figures 7a-c

See Liana.ipynb.

Figure 7d

For calculation of data, see Liana.ipynb.

dat.raw <- read.delim("liana_res.csv",
                      sep = ",",
                      header = T) %>%
  mutate(visit = strsplit(sample, "_") %>%
           sget(3))

Function

lianaCircos <- function(df,
                        top.interactions = 30, 
                        text.size = 1, 
                        pal = RColorBrewer::brewer.pal(9, "Set1"), 
                        cell.types = c("Adipocytes", 
                                       "Myeloid immune cells", 
                                       "ASPCs", 
                                       "EC", 
                                       "SMCs", 
                                       "Lymphoid immune cells", 
                                       "Mast cells", 
                                       "Lymphatic ECs", 
                                       "Pericytes"),
                        big.gap = 5,
                        small.gap = 2,
                        arrow.width = 3,
                        link.ramp.rel = T,
                        link.sort = F,
                        scale = F,
                        arrow.head.width = 0.3,
                        arrow.head.length = 0.3,
                        link.ramp.col = c("navy", "grey", "firebrick")) {
  input_df <- df %>% 
    slice_max(order_by = score, n = top.interactions) %>% 
    mutate(source = paste0(source, " ")) %>% 
    mutate(source_lig = paste0(source, "|", ligand), 
           target_rec = paste0(target, "|", receptor))
  
  if (link.ramp.rel) {
    arr_wd <- rep(arrow.width, nrow(input_df))
  } else {
    arr_wd <- (((input_df$score - min(input_df$score))/(max(input_df$score) - min(input_df$score))) * (arrow.width)) + 1
  }
  
  # Colors and segments
  anno.col <- setNames(pal, 
                       cell.types) %>% 
    c(., "ASPCs " = unname(.["ASPCs"]))
  
  cell_cols <- anno.col[unique(c(unique(input_df$source), unique(input_df$target), "ASPCs "))]
  
  link_cols <- c()
  
  if (!link.ramp.rel) {
    for (i in input_df$source_lig) {
      link_cols <- c(link_cols, cell_cols[stringr::str_extract(i, 
                                                               "[^|]+")])
    }
  } else {
    input_df %<>% 
      arrange(score)
    
    df.down <- input_df %>% filter(score <= 0)
    link_down <- colorRampPalette(c(link.ramp.col[1], link.ramp.col[2]))(nrow(df.down))
    
    df.up <- input_df %>% filter(score > 0)
    link_up <- colorRampPalette(c(link.ramp.col[2], link.ramp.col[3]))(nrow(df.up))
    
    link_cols <- c(link_down, link_up)
  }
  
  segments <- unique(c(paste0(input_df$source, "|", input_df$ligand), 
                       paste0(input_df$target, "|", input_df$receptor)))
  
  grp <- stringr::str_extract(segments, "[^|]+") %>% 
    setNames(segments)
  
  # Redo colors
  cell_cols2 <- grp
  for (i in unique(grp)) {
    cell_cols2[cell_cols2 == i] <- cell_cols[i]
  }
  
  # Plot
  input_df %>% 
    select(source_lig, target_rec, score) %>%
    chordDiagram(directional = 1, 
                 group = grp,
                 scale = scale, 
                 diffHeight = 0.005, 
                 direction.type = c("arrows"),
                 link.arr.type = "triangle", 
                 annotationTrack = c(), 
                 preAllocateTracks = list(
                   list(track.height = 0.05),
                   list(track.height = 0.175),
                   list(track.height = 0.05)), 
                 big.gap = big.gap,
                 transparency = 1,
                 link.arr.lwd = arr_wd,
                 link.arr.col = link_cols,
                 link.arr.length = arrow.head.length,
                 link.arr.width = arrow.head.width,
                 small.gap = small.gap
    )
  
  circos.track(track.index = 2, panel.fun = function(x, y) {
    circos.text(CELL_META$xcenter, 
                CELL_META$ylim[1], 
                stringr::str_extract(CELL_META$sector.index, "[^|]+$"), 
                facing = "clockwise", 
                niceFacing = TRUE, 
                adj = c(0, 0.55), 
                cex = 1)
  }, bg.border = NA)
  
  # Split segments
  for (l in segments) {
    highlight.sector(l, track.index = 3, col = cell_cols2[l])
  }
  
  # Add ligand/receptor track
  ## Ligand
  highlight.sector(input_df$source_lig, 
                   track.index = 1, 
                   col = "black", 
                   text = "Ligands", 
                   cex = 1, 
                   text.col = "white", 
                   niceFacing = TRUE)
  ## Receptor
  highlight.sector(input_df$target_rec, 
                   track.index = 1, 
                   col = "white", 
                   text = "Receptors", 
                   cex = 1, 
                   text.col = "black", 
                   border = "black", 
                   niceFacing = TRUE)
  
  # Legends
  minmax <- input_df %>% 
    pull(score) %>% 
    {pmax(abs(min(.)), max(.))} %>% 
    formatC(digits = 1) %>% 
    as.numeric()
  
  col.range = c(-minmax, 0, minmax)
  lgd_links = Legend(at = col.range, 
                     col_fun = colorRamp2(col.range, link.ramp.col), 
                     title_position = "topleft", 
                     title = "Links")
  
  lgd_ct <- Legend(labels = unique(c(input_df$source, input_df$target)), 
                   title = "Cell type", 
                   type = "points", 
                   legend_gp = gpar(col = "transparent"), 
                   background = cell_cols[unique(c(input_df$source, input_df$target))])
  
  lgd_list_vertical = packLegend(lgd_ct, lgd_links)
  
  draw(lgd_list_vertical, 
       just = c("left", "bottom"), 
       x = unit(5, "mm"), 
       y = unit(5, "mm"))
  
  circos.clear()
}

Plot

dat.plot <- dat.raw %>% 
  dplyr::rename(ligand = ligand_complex,
                receptor = receptor_complex,
                score = lrscore) %>%
  filter(visit %in% c("visit1", "visit3"),
         source == "ASPCs",
         ligand %in% c("COL18A1", "NID1", "JAG1", "FGF2", "VEGFA", "BMP1", "ANGPT1", "MFGE8", "C3", "IGF1", "ANXA1", "ALCAM"),
         receptor %in% c("ITGB5", "ITGAV", "CD46", "NRP1", "BMPR2", "ITGB1", "IGF2R", "INSR", "EGFR")) %>% 
  group_by(visit, ligand, receptor, source, target) %>% 
  summarize(score = mean(score)) %>% 
  ungroup() %>% 
  arrange(visit, source, target, ligand, receptor)

## `summarise()` has grouped output by 'visit', 'ligand', 'receptor', 'source'.
## You can override using the `.groups` argument.

dat.vis1 <- dat.plot %>% 
  filter(visit == "visit1",
         score > 0.8648) %>% 
  mutate(lrst = paste0(ligand, receptor, source, target))

dat.vis3 <- dat.plot %>% 
  mutate(lrst = paste0(ligand, receptor, source, target)) %>% 
  filter(visit == "visit3",
         lrst %in% dat.vis1$lrst) 

dat.rel <- dat.vis3 %>% 
  mutate(score = score - dat.vis1$score)

dat.rel %>% 
  lianaCircos()

Figure 7e

Preparation

# Load Conos object
con <- qread("con_adipocytes_aspc.qs", nthreads = 10)

anno.faps <- qread("anno_aspc.qs")
anno <- factor(c(anno.adipocytes, anno.faps))

cm.merged <- con$getJointCountMatrix(raw = T) %>% 
  Matrix::t() %>% 
  .[, colnames(.) %in% names(anno)]

# Use bins instead of anno
spc <- con$getDatasetPerCell()

anno.bin <- qread("sds_obj.qs")@assays@data@listData$pseudotime %>% 
  as.data.frame() %>% 
  filter(!is.na(Lineage1)) %>% 
  {setNames(.$Lineage1, rownames(.))} %>% 
  {data.frame(pseudotime = unname(.), anno = anno[names(.)], cid = names(.))} %>% 
  mutate(bin = cut(pseudotime, breaks = c(0, 0.005, 0.01, 0.0165, 0.023))) %>% 
  na.omit() %>% 
  mutate(bin = as.character(bin) %>% 
           strsplit(",") %>% 
           sget(2) %>% 
           gsub("]", "", ., fixed = T),
         sample = spc[cid]) %>% 
  mutate(sex = meta$sex[match(sample, meta$sample)],
         visit = strsplit(as.character(sample), "_") %>% sget(3)) %>% 
  mutate(anno.final = paste(sample, bin, sex, sep = "!!"))

# Create pseudo CM
anno.final <- anno.bin %>% 
  pull(anno.final) %>% 
  setNames(anno.bin$cid)

cm.pseudo <- sccore::collapseCellsByType(cm.merged %>% Matrix::t(), 
                                         groups = anno.final, min.cell.count = 1) %>% 
  t() %>%
  apply(2, as, "integer")

cm.pseudo %<>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("_|!!") %>% 
                                   sget(3) %>% 
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(cm.pseudo), "group")), 
                                 design = ~ group) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = T)

## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors

genes <- "C3"

idx <- cm.pseudo %>% 
  colnames() %>% 
  data.frame(id = .) %>% 
  mutate(vis = strsplit(id, "_|!!") %>% sget(3),
         bin = strsplit(id, "!!") %>% sget(2),
         sex = strsplit(id, "!!") %>% sget(3)) %>% 
  mutate(ord = order(vis, bin, sex))

x <- cm.pseudo %>% 
  .[match(genes, rownames(.)), match(colnames(na.omit(.)), colnames(.))] %>%
  .[idx$ord]

plot.dat <- x %>% 
  {data.frame(sample = names(.), 
              value = unname(.))} %>% 
  mutate(bin = strsplit(sample, "!!") %>% 
           sget(2),
         visit = strsplit(sample, "!!|_") %>% 
           sget(3) %>% 
           factor(labels = c("Visit 1", "Visit 2", "Visit 3")),
         sex = strsplit(sample, "!!") %>% sget(3)) %>% 
  mutate(sex_visit = paste(sex, gsub("Visit", "visit", visit), sep = " "))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$bin)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(ct = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(ct = factor(ct, levels = unique(plot.dat$bin))) %>% 
  arrange(ct) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  group_by(bin, sex) %>%
  wilcox_test(value~sex_visit, paired = T)

stat.test %<>% 
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "bin", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05) %>% # Adjust line position
  arrange(desc(y.position))

# Plot
ggplot(plot.dat, aes(bin, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1), 
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Normalized pseudobulk expression", fill = "", title = "C3") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(ct, y = 1.5e3, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 0.2, hide.ns = F)# Wilcoxon

Extended Data Figure 1

Load data

cms <- con.major$samples %>% 
  lget("misc") %>% 
  lget("rawCounts") %>% 
  lapply(Matrix::t)

spc <- con.major$getDatasetPerCell()

Extended Data Figure 1a

cms %>% 
  lapply(\(x) diff(x@p)) %>% 
  sapply(median) %>% 
  {data.frame(sample = names(.),
              value = unname(.))} %>% 
  mutate(visit = strsplit(sample, "_") %>% 
           sget(3)) %>% 
  mutate(sample = factor(sample, levels = sample)) %>%
  ggplot(aes(sample, value, fill = visit)) +
  geom_col() +
  theme_bw() +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1, size = 6)) +
  labs(x = "", y = "Median genes", fill = "") +
  scale_fill_manual(values = ggsci::pal_jama()(3))

cms %>% 
  lapply(sparseMatrixStats::colSums2) %>% 
  sapply(median) %>% 
  {data.frame(sample = names(.), 
              value = unname(.))} %>% 
  mutate(visit = strsplit(sample, "_") %>% 
           sget(3)) %>% 
  mutate(sample = factor(sample, levels = sample)) %>%
  ggplot(aes(sample, value, fill = visit)) +
  geom_col() +
  theme_bw() +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1, size = 6)) +
  labs(x = "", y = "Median UMIs", fill = "") +
  scale_fill_manual(values = ggsci::pal_jama()(3))

Extended Data Figure 1b

WATLAS <- read.delim("watlas_labels_l1_adipose_anno_super.tsv")
rownames(WATLAS)=WATLAS$X
WATLAS=WATLAS[2:21]

# Define the order of columns and rows
column_order <- c("Adipocyte", "FAP", "VEC", "LEC", "SMC","Pericyte", "B.cell", "CD4..T.cell", "CD8..T.cell", "ILC", "NK.cell", "Mast",  
                  "Macrophage",  "Monocyte", "DC", "pDC",
                  "Endometrium", "Mesothelial", "Plasmablast", 
                  "Schwann" )

row_order <- c("Adipocytes", "ASPC", "EC", "lEC", "SMCs", "Pericytes", "B-cells", "T-cells", "Mast cells", "Myeloid")
WATLAS_ordered <- WATLAS[, rev(column_order)] # Reorder columns
WATLAS_ordered <- WATLAS_ordered[row_order, ] # Reorder rows

corrplot(WATLAS_ordered %>% t() %>% t(), method="color", col.lim=c(0, 1),
         col = rev(COL2("RdBu",200)),
         is.corr = F, 
         tl.col = "black",
         tl.pos = "lower",
         outline = "#C0C0C0",
         addgrid.col= NA)

Extended Data Figure 1c

varToPlot <- spc %>% 
  as.character() %>% 
  strsplit("_vis") %>% 
  sget(1) %>% 
  setNames(names(spc)) %>% 
  factor()

con.major$plotGraph(groups = varToPlot, 
                    plot.na = F, 
                    size = 0.1,
                    alpha = 0.1, 
                    embedding = "UMAP_refined7",
                    mark.groups = F,
                    show.labels = T,
                    show.legend = T,
                    title = "Donor") +
  labs(x = "UMAP1", y = "UMAP2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3)

Extended Data Figure 1d

var <- meta$sex

varToPlot <- grepl.replace(spc %>% 
                             as.character(), 
                           meta$sample, 
                           var) %>% 
  setNames(names(spc)) %>% 
  factor(labels = unique(var))

con.major$plotGraph(groups = varToPlot, 
                    plot.na = F, 
                    size = 0.1,
                    alpha = 0.1, 
                    embedding = "UMAP_refined7",
                    mark.groups = F,
                    show.labels = T,
                    show.legend = T,
                    title = "Sex") +
  labs(x = "UMAP1", y = "UMAP2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3) +
  scale_color_manual(values = c("firebrick", "navy"))

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Extended Data Figure 1e

varToPlot <- spc %>% 
  as.character() %>% 
  strsplit("_|!!") %>% 
  sget(3) %>% 
  setNames(names(spc)) %>% 
  factor()

con.major$plotGraph(groups = varToPlot, 
                    plot.na = F, 
                    size = 0.1,
                    alpha = 0.1, 
                    embedding = "UMAP_refined7",
                    mark.groups = F,
                    show.labels = T,
                    show.legend = T,
                    title = "Visit") +
  labs(x = "UMAP1", y = "UMAP2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3) +
  scale_color_manual(values = ggsci::pal_jama()(3))

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Extended Data Figure 1f

Calculate

spc <- con.major$getDatasetPerCell()

anno.sex <- data.frame(anno = unname(anno.major), 
                       cid = names(anno.major)) %>% 
  mutate(donor = spc[match(cid, names(spc))]) %>% 
  mutate(sex = meta$sex[match(donor, meta$sample)]) %>% 
  mutate(anno_sex = paste(anno, tolower(sex), sep = ", ")) %>% 
  pull(anno_sex, cid)

res.major <- c("vis1", "vis2", "vis3") %>% 
  sapply(\(vis) {
    con <- con.major$samples %>% 
      .[!grepl(vis, names(.))] %>%
      Conos$new()
    
    if (vis == "vis1") {
      vis.ref <- "vis2"
      vis.target <- "vis3" 
    } else {
      vis.ref <- "vis1"
      if (vis == "vis2") {
        vis.target <- "vis3"
      } else {
        vis.target <- "vis2"
      }
    }
    
    sample.groups <- con$samples %>%
      names() %>%
      `names<-`(ifelse(grepl(vis.ref, .), vis.ref, vis.target), .)
    
    cao <- Cacoa$new(con,
                     sample.groups,
                     anno.sex,
                     ref.level = vis.ref,
                     target.level = vis.target,
                     n.cores = 32)
    
    cao$estimateCellLoadings()
    
    return(cao)
  }, simplify = F, USE.NAMES = T)

Plot

visits <- c("vis1", "vis2", "vis3")

p <- visits %>% 
  lapply(\(vis) {
    comp <- visits[!visits == vis]
    
    dat.p <- res.major[[vis]]$test.results$coda$padj
    
    dat.plot <- res.major[[vis]]$test.results$coda$loadings %>% 
      tibble::rownames_to_column(var = "anno") %>% 
      reshape2::melt(id.var = "anno") %>% 
      mutate(anno = renameAnnotation(anno, "Myeloid immune cells", "Myeloid ICs") %>% 
               renameAnnotation("Lymphoid immune cells", "Lymphoid ICs"))
    
    yvar <- dat.plot %>%
      group_by(anno) %>%
      summarize(var = mean(value)) %>%
      mutate(padj = dat.p[match(anno, names(dat.p))]) %>%
      arrange(padj, desc(abs(var)))
    
    dat.plot %<>% 
      mutate(anno = factor(anno, levels = rev(unique(yvar$anno))))
    
    ymax <- dat.plot %>%
      pull(value) %>%
      abs() %>%
      max()
    
    out <- dat.plot %>% 
      ggplot(aes(anno, value, fill = anno)) + 
      geom_hline(yintercept = 0, col = "black") +
      geom_jitter(alpha = 0.2, size = 0.3, col = "grey") +
      geom_boxplot(outliers = F) +
      coord_flip() +
      ylim(c(-ymax, ymax)) +
      theme_bw() + 
      theme(line = element_blank(),
            axis.text = element_text(color = "black")) +
      guides(fill = "none") +
      labs(title = paste0(comp[1], " vs ", comp[2]), x = "", y = "Loadings [AU]") +
      scale_fill_manual(values = pal.major)
    
    return(out)
  })

p

## [[1]]

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

## 
## [[2]]

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

## 
## [[3]]

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

Statistics

for (vis in visits) {
  comp <- visits[!visits == vis]
  message("Comparison: ", comp)
  print(res.major[[vis]]$test.results$coda$padj)
}

## Comparison: vis2vis3

##    Myeloid immune cells, male  Myeloid immune cells, female 
##                    0.01598402                    0.02131202 
## Lymphoid immune cells, female              Mast cells, male 
##                    0.02397602                    0.01598402 
##                 ASPCs, female                     ECs, male 
##                    0.12787213                    0.12787213 
##            Mast cells, female            Adipocytes, female 
##                    0.18724133                    0.38716839 
##                   ASPCs, male                   ECs, female 
##                    0.33566434                    0.51788212 
##   Lymphoid immune cells, male                    SMCs, male 
##                    0.78428238                    0.65389156 
##             Pericytes, female               Pericytes, male 
##                    0.69100131                    0.69100131 
##                  SMCs, female              Adipocytes, male 
##                    0.78428238                    0.86913087

## Comparison: vis1vis3

##    Myeloid immune cells, male  Myeloid immune cells, female 
##                    0.06393606                    0.06393606 
## Lymphoid immune cells, female   Lymphoid immune cells, male 
##                    0.06393606                    0.10229770 
##                    SMCs, male                     ECs, male 
##                    0.07992008                    0.24508825 
##              Adipocytes, male                 ASPCs, female 
##                    0.59540460                    0.61968800 
##            Adipocytes, female               Pericytes, male 
##                    0.61449661                    0.59540460 
##             Pericytes, female              Mast cells, male 
##                    0.61968800                    0.61968800 
##                   ASPCs, male            Mast cells, female 
##                    0.61968800                    0.86313686 
##                   ECs, female                  SMCs, female 
##                    0.96103896                    0.96103896

## Comparison: vis1vis2

##            Mast cells, female                 ASPCs, female 
##                     0.3942724                     0.3942724 
##               Pericytes, male    Myeloid immune cells, male 
##                     0.3942724                     0.5178821 
##              Mast cells, male             Pericytes, female 
##                     0.4395604                     0.5461205 
##   Lymphoid immune cells, male                     ECs, male 
##                     0.5461205                     0.5461205 
##  Myeloid immune cells, female            Adipocytes, female 
##                     0.5461205                     0.5461205 
##              Adipocytes, male Lymphoid immune cells, female 
##                     0.5461205                     0.5778837 
##                    SMCs, male                   ASPCs, male 
##                     0.5461205                     0.7331335 
##                  SMCs, female                   ECs, female 
##                     0.7392607                     0.7331335

Extended Data Figure 2

We integrated our data with the data from Hinte et al. that were graciously provided by the authors. As these data are not public, here we will show how we integrated the data and performed label transferring, but we will not run those steps here.

## Load data
dat.nefa <- qread("scAT_NEFA.qs", nthreads = 10)
con <- qread("con_major.qs", nthreads = 10)

## Integrate
cm <- GetAssayData(dat.nefa, assay = "RNA", layer = "counts")
p2.list <- split(colnames(dat.nefa), dat.nefa$condition) %>% 
  lapply(\(cid) cm[, cid]) %>% 
  lapply(basicP2proc, n.cores = 32, get.largevis = F, get.tsne = F, make.geneknn = F)

con$addSamples(p2.list)
con$n.cores <- 64
con$buildGraph()
con$findCommunities(method = leiden.community, resolution = 1)
con$embedGraph(method = "UMAP")

nefa.ids <- cm %>% 
  colnames() %>% 
  `names<-`(rep("NEFA", length(.)), .)

hinte.cls <- dat.nefa$cellType %>% 
  setNames(colnames(dat.nefa))

hinte.cond <- dat.nefa$condition %>% 
  setNames(colnames(dat.nefa))

# Label transfer, minor
anno.minor <- c("adipocytes", "fap", "immune", "vascular") %>% 
  sapply(\(x) paste0("anno_",x,".qs")) %>% 
  lapply(qread) %>% 
  setNames(rep("", 4)) %>% 
  unlist() %>% 
  scHelper::collapseAnnotation("Adipocytes") %>% 
  scHelper::renameAnnotation(c("Th","Th1"), "CD4 T cells") %>% 
  scHelper::renameAnnotation("Tem", "CD8 T cells") %>% 
  scHelper::renameAnnotation("NKT", "NK-like T cells") %>% 
  scHelper::renameAnnotation("cEC2", "cvEC") %>% 
  .[unique(names(.))]

anno.minor %<>% 
  c(qread("anno_major.qs") %>% .[. == "Mast cells"]) %>% 
  factor()

# Major
anno <- qread("anno_major") %>% 
  scHelper::renameAnnotation("FAPs", "ASPCs") %>% 
  scHelper::renameAnnotation("Tissue-resident macrophages", "Myeloid immune cells") %>% 
  scHelper::renameAnnotation("Endothelial cells", "ECs") %>% 
  scHelper::renameAnnotation("Lymphatic endothelial cells", "Lymphatic ECs") %>% 
  .[!. == "SMCs"]

anno.vasc <- qread("anno_vascular.qs") %>% 
  .[. %in% c("SMCs","Pericytes")]

anno <- factor(c(anno, anno.vasc)) %>% 
  .[match(unique(names(.)), names(.))]

anno.major <- anno[names(anno.minor)]

# Fix annotation issues
anno.minor <- anno.minor[!is.na(anno.minor)]
anno.major <- anno.major[!is.na(anno.major)]
matched <- names(anno.minor)[ (names(anno.minor) %in% names(anno.major))]
anno.major <- anno.major[ names(anno.major) %in% matched]
anno.minor <- anno.minor[ names(anno.minor) %in% matched]
anno.major <- anno.major[ duplicated(names(anno.major)) == FALSE]
anno.minor <- anno.minor[ duplicated(names(anno.minor)) == FALSE]

qsave(anno.minor, "anno_minor.qs")

## Label transfer
loft.transfer <- con$propagateLabels(labels = anno.major)
loft.transfer.minor <- con$propagateLabels(labels = anno.minor)

# Loft annotations
con$clusters$loft_transfer <- list(groups = loft.transfer$labels)
con$clusters$loft_transfer_uncertainty <- list(groups = formatC(loft.transfer$uncertainty, 1) %>% {setNames(as.numeric(.), names(.))} %>% .[!grepl("!!", names(.))])
con$clusters$loft_transfer_minor <- list(groups = loft.transfer.minor$labels)
con$clusters$loft_transfer_minor_uncertainty <- list(groups = formatC(loft.transfer.minor$uncertainty, 1) %>% {setNames(as.numeric(.), names(.))} %>% .[!grepl("!!", names(.))])
con$clusters$loft_clusters <- list(groups = anno.major)
con$clusters$loft_clusters_minor <- list(groups = anno.minor)

# Hinte annotations
con$clusters$hinte_transfer <- list(groups = hinte.transfer$labels)
con$clusters$hinte_clusters <- list(groups = hinte.cls)
con$clusters$hinte_condition <- list(groups = hinte.cond)
con$clusters$hinte_clusters_major <- list(groups = con$clusters$hinte_clusters$groups %>%
                                            collapseAnnotation(c("APCs", "Endo", "Macro")) %>%
                                            renameAnnotation("Endo", "EndoCs") %>% 
                                            renameAnnotation("FAPs", "APCs"))

qsave(con, "hinte_integration.qs", nthreads = 10)

Load and prepare

con.integration <- qread("hinte_integration.qs")
loft.transfer <- con.integration$propagateLabels(anno.major)

## Stop after 21 iterations. Norm: 0.024024
## Min weight: 1.67017e-05, max weight: 0.367879, fading: (10, 0.1)

cm.merged <- con.integration$getJointCountMatrix()
spc <- con.integration$getDatasetPerCell()

Extended Data Figure 2a

con.integration$plotGraph(groups = loft.transfer$labels,
                          font.size = 5) +
  scale_color_manual(values = pal.major) +
  theme(line = element_blank())

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Extended Data Figure 2b

con.integration$plotGraph(groups = loft.transfer$labels %>% .[!grepl("!!", names(.))], 
                          plot.na = F,
                          font.size = 5) +
  scale_color_manual(values = pal.major) +
  theme(line = element_blank())

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Extended Data Figure 2c

# Calculate
cm.pseudo <- cm.merged %>%
  sccore::collapseCellsByType(spc) %>% 
  t() %>%
  apply(2, as, "integer")

cm.pseudo %<>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("_") %>% 
                                   sget(1) %>% 
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(cm.pseudo), "group")), 
                                 design = ~ group) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = T)

## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors

# We rotate Hinte into Loft sample PCA space
cm.loft <- cm.pseudo[, grepl("vis", colnames(cm.pseudo))] %>% 
  as.data.frame() %>%
  filter(rowSums(.) > 0)
cm.hinte <- cm.pseudo[, !grepl("vis", colnames(cm.pseudo))] %>% 
  as.data.frame() %>%
  filter(rowSums(.) > 0)

genesToKeep <- intersect(rownames(cm.loft), rownames(cm.hinte))

pc.res <- 
  cm.loft[genesToKeep, ] %>% 
  t() %>% 
  prcomp(center = T, 
         scale = T)

pc.tmp <- cm.hinte[genesToKeep, ] %>% 
  as.data.frame() %>% 
  filter(rowSums(.) > 0) %>% 
  t() %>% 
  scale(pc.res$center, pc.res$scale) %*% pc.res$rotation

# Plot
dat.plot <- rbind(pc.res$x, pc.tmp) %>%
  data.frame() %>% 
  mutate(id = rownames(.)) %>% 
  mutate(type = grepl.replace(id, c("lean", "t0", "vis1", "vis2", "t1", "vis3"), c("lean", "obese", "obese", "short-term WL", "regressed", "regressed")) %>% factor(levels = c("lean", "obese", "short-term WL", "regressed")),
         sex = meta$sex[match(id, meta$sample)] %>% 
           as.character())

dat.plot$sex[is.na(dat.plot$sex)] <- "Hinte"

dat.plot %>% 
  mutate(sex = factor(sex, levels = c("Female", "Male", "Hinte"), labels = c("Loft, female", "Loft, male", "Hinte"))) %>% 
  ggplot(aes(PC1, PC2, col = type, shape = sex)) +
  geom_point(
    data = filter(dat.plot, sex == "Hinte"),
    aes(PC1, PC2), color = "yellow3", shape = 15, size = 5, alpha = 0.7) +
  geom_point(size = 3) +
  theme_bw() +
  scale_color_manual(values = c("purple", brewer.pal(9, "Greens")[c(3, 6, 9)])) +
  theme(line = element_blank()) +
  labs(shape = "Study and sex", col = "Sample type")

Variation

pc.res$x %>% 
  data.frame() %>% 
  mutate(var = pc.res$sdev^2 / sum(pc.res$sdev^2)) %>% 
  pull(var)

##  [1] 2.133072e-01 1.147448e-01 8.491969e-02 6.837396e-02 4.755415e-02
##  [6] 4.037000e-02 3.178713e-02 3.010272e-02 2.742972e-02 2.362458e-02
## [11] 2.092853e-02 1.954507e-02 1.843777e-02 1.685591e-02 1.585535e-02
## [16] 1.566574e-02 1.452216e-02 1.406167e-02 1.369267e-02 1.255830e-02
## [21] 1.197001e-02 1.154230e-02 1.093551e-02 1.070401e-02 9.689988e-03
## [26] 9.518977e-03 8.595509e-03 8.244777e-03 7.987228e-03 7.534686e-03
## [31] 6.949427e-03 6.798175e-03 6.464972e-03 5.915445e-03 5.336252e-03
## [36] 5.097727e-03 4.995801e-03 4.669091e-03 4.397231e-03 4.217893e-03
## [41] 4.097873e-03 4.575723e-32

Extended Data Figure 2d

pc.res <- 
  cm.hinte[genesToKeep, ] %>% 
  t() %>% 
  prcomp(center = T, 
         scale = T)

pc.tmp <- cm.loft[genesToKeep, ] %>% 
  as.data.frame() %>% 
  filter(rowSums(.) > 0) %>% 
  t() %>% 
  scale(pc.res$center, pc.res$scale) %*% pc.res$rotation

# Plot
dat.plot <- rbind(pc.res$x, pc.tmp) %>%
  data.frame() %>% 
  mutate(id = rownames(.)) %>% 
  mutate(type = grepl.replace(id, c("lean", "t0", "vis1", "vis2", "t1", "vis3"), c("lean", "obese", "obese", "short-term WL", "regressed", "regressed")) %>% factor(levels = c("lean", "obese", "short-term WL", "regressed")),
         sex = meta$sex[match(id, meta$sample)] %>% 
           as.character())

dat.plot$sex[is.na(dat.plot$sex)] <- "Hinte"

dat.plot %>% 
  mutate(sex = factor(sex, levels = c("Female", "Male", "Hinte"), labels = c("Loft, female", "Loft, male", "Hinte"))) %>% 
  ggplot(aes(PC1, PC2, col = type, shape = sex)) +
  geom_point(
    data = filter(dat.plot, sex == "Hinte"),
    aes(PC1, PC2), color = "yellow3", shape = 15, size = 5, alpha = 0.7) +
  geom_point(size = 3) +
  theme_bw() +
  scale_color_manual(values = c("purple", brewer.pal(9, "Greens")[c(3, 6, 9)])) +
  theme(line = element_blank()) +
  labs(shape = "Study and sex", col = "Sample type")

Variation

pc.res$x %>% 
  data.frame() %>% 
  mutate(var = pc.res$sdev^2 / sum(pc.res$sdev^2)) %>% 
  pull(var)

## [1] 3.975304e-01 2.327407e-01 2.067138e-01 1.630152e-01 2.705167e-31

Extended Data Figure 2e

tmp <- loft.transfer$labels %>%
  renameAnnotation("Lymphoid immune cells", "Lymphoid ICs") %>%
  renameAnnotation(c("Myeloid immune cells"), "Myeloid ICs") %>% 
  {data.frame(cid = names(.), anno = unname(.))} %>% 
  filter(!grepl("!!", cid)) %>% 
  mutate(sample = con.integration$clusters$hinte_condition$groups[match(cid, names(con.integration$clusters$hinte_condition$groups))])

tmp.sum <- tmp %>% 
  group_by(sample) %>% 
  summarize(total = n()) %>% 
  split(., .$sample)

tmp.sum.ct <- tmp %>% 
  group_by(sample, anno) %>% 
  summarize(total = n()) %>% 
  split(., .$sample)

## `summarise()` has grouped output by 'sample'. You can override using the
## `.groups` argument.

tmp.prop <- Map(\(ct, tot, an) setNames(ct$total / tot$total, an), ct = tmp.sum.ct, tot = tmp.sum, an = lapply(tmp.sum.ct, pull, anno))

df.plot <- tmp.prop %>% 
  {Map(\(nn, x) data.frame(sample = nn, anno = names(x), value = unname(x)), nn = names(.), x = .)} %>% 
  bind_rows() %>% 
  mutate(anno = factor(anno) %>% 
           factor(levels = levels(.)[c(1, 2, 5, 8, 3, 7, 6, 4, 9)]) %>% 
           renameAnnotation("Lymphatic ECs", "LECs"))

df.plot %>% 
  rbind(c("st_t0", "LECs", 0)) %>% # Adding an artificial 0 as LECs aren't detected in all conditions
  mutate(sample = strsplit(sample, "_") %>% sapply(\(x) if (length(x) == 1) x[1] else x[2]),
         anno = factor(anno, levels = c("Adipocytes", "ASPCs", "ECs", "Myeloid ICs", "LECs", "SMCs", "Pericytes", "Lymphoid ICs", "Mast cells")),
         sample = factor(sample, levels = c("lean", "t0", "t1"), labels = c("Lean", "Obese", "Regressed")),
         value = as.numeric(value)) %>% 
  ggplot(aes(anno, value)) +
  stat_summary(aes(fill = sample), fun = "mean", geom = "bar", position = position_dodge()) +
  geom_point(aes(col = sample), position = position_dodge(width = 0.9)) +
  theme_bw() + 
  labs(x = "", y = "% proportion per sample", title = "Loft to Hinte transfer, major annotation", fill = "", col = "") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1), 
        axis.text = element_text(colour = "black")) +
  scale_fill_manual(values = c(brewer.pal(9, "Greens")[c(3,6,9)])) +
  scale_color_manual(values = c(brewer.pal(9, "Purples")[c(9,7,5)]))

Extended Data Figure 2g

size_data <- read.csv("Adipocyte_size.csv",sep = ",", header = TRUE)
condition_levels <- c("F1", "F2", "F3", "M1", "M2", "M3")

ggplot(size_data, aes(x = Diameter, y = factor(Condition, levels = rev(condition_levels)), fill = Condition)) +
  ggridges::geom_density_ridges(scale = 0.9) +
  xlab(expression(paste("Diameter (µm)"))) + 
  ylab("Condition") + 
  ggtitle("Adipocyte diameter distribution") + 
  theme(legend.position="none", plot.title = element_text(hjust = 0.5, size = 14, face = "bold"), text = element_text(family = "serif", size = 12)) +
  scale_fill_manual(values = c("#F8B3BB", "#BD6563", "#7D2A25", "#ABD1D4", "#62649E", "#3E4282")) +
  theme(rect = element_rect(fill = "transparent"))

## Picking joint bandwidth of 3.16

Extended Data Figure 3

Load data

con <- qread("con_vascular.qs", nthreads = 10)
anno.vascular <- qread("anno_vascular.qs")

spc <- con$getDatasetPerCell()

Extended Data Figure 3a

varToPlot <- spc %>% 
  as.character() %>% 
  strsplit("_vis") %>% 
  sget(1) %>% 
  setNames(names(spc)) %>% 
  factor()

con$plotGraph(groups = varToPlot, 
              plot.na = F, 
              size = 0.3,
              alpha = 0.1, 
              embedding = "UMAP",
              mark.groups = F,
              show.labels = T,
              show.legend = T,
              title = "Donor") +
  labs(x = "UMAP2", y = "UMAP2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3)

Extended Data Figure 3b

var <- meta$sex

varToPlot <- grepl.replace(spc %>% 
                             as.character(), 
                           meta$sample, 
                           var) %>% 
  setNames(names(spc)) %>% 
  factor(labels = unique(var))

con$plotGraph(groups = varToPlot, 
              plot.na = F, 
              size = 0.1,
              alpha = 0.1, 
              embedding = "UMAP",
              mark.groups = F,
              show.labels = T,
              show.legend = T,
              title = "Sex") +
  labs(x = "UMAP1", y = "UMAP2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3) +
  scale_color_manual(values = c("firebrick", "navy"))

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Extended Data Figure 3c

varToPlot <- spc %>% 
  as.character() %>% 
  strsplit("_|!!") %>% 
  sget(3) %>% 
  setNames(names(spc)) %>% 
  factor()

con$plotGraph(groups = varToPlot, 
              plot.na = F, 
              size = 0.3,
              alpha = 0.1, 
              embedding = "UMAP",
              mark.groups = F,
              show.labels = T,
              show.legend = T,
              title = "Visit") +
  labs(x = "UMAP1", y = "UMAP2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3) +
  scale_color_manual(values = ggsci::pal_jama()(3))

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Extended Data Figure 3d

WATLAS <- read.delim("watlas_labels_l2_adipose_anno.tsv")
rownames(WATLAS)=WATLAS$X

# Define Vascular cells
column_order <- c("SOX5..VEC",  "BTNL9..VEC", "ACKR1..VEC", "LEC", "SMC", "Pericyte")
row_order <- c("arEC", "cEC", "cEC2","vEC", "lEC", "SMCs", "Pericytes")
WATLAS_ordered <- WATLAS[, rev(column_order)] # Reorder columns
WATLAS_ordered <- WATLAS_ordered[row_order, ] # Reorder rows

corrplot(WATLAS_ordered %>% t() %>% t(), method="color", col.lim=c(0, 1),
         col = rev(COL2("RdBu",200)),
         is.corr = F, 
         tl.col = "black",
         tl.pos = "lower",
         outline = "#C0C0C0",
         addgrid.col= NA)

Extended Data Figure 3e

Calculate

anno.minor <- qread("anno_minor.qs")

spc <- con.major$getDatasetPerCell() %>% 
  .[match(names(anno.minor), names(.))]

anno.minor.sex <- anno.minor %>% 
  data.frame(anno = ., cid = names(.), donor = unname(spc)) %>% 
  mutate(sex = meta$sex[match(donor, meta$sample)]) %>% 
  mutate(anno_sex = paste(anno, sex, sep = "_")) %$% 
  setNames(anno_sex, cid) %>% 
  factor()

# Calculate
res.minor <- c("vis1", "vis2", "vis3") %>% 
  sapply(\(vis) {
    con <- con.major$samples %>% 
      .[!grepl(vis, names(.))] %>%
      Conos$new()
    
    if (vis == "vis1") {
      vis.ref <- "vis2"
      vis.target <- "vis3" 
    } else {
      vis.ref <- "vis1"
      if (vis == "vis2") {
        vis.target <- "vis3"
      } else {
        vis.target <- "vis2"
      }
    }
    
    sample.groups <- con$samples %>%
      names() %>%
      `names<-`(ifelse(grepl(vis.ref, .), vis.ref, vis.target), .)
    
    cao <- Cacoa$new(con,
                     sample.groups,
                     anno.minor.sex,
                     ref.level = vis.ref,
                     target.level = vis.target,
                     n.cores = 32)
    
    cao$estimateCellLoadings()
    
    return(cao)
  }, simplify = F, USE.NAMES = T)

Plot

vasc.pal <- RColorBrewer::brewer.pal(9, "Purples")[-c(1,2)][c(1,4,7,2,6,3,5)] %>% 
  setNames(levels(qread("anno_vascular.qs"))) %>% 
  {setNames(c(rep(., 3)), c(names(.), paste(names(.), "Female", sep = "_"), paste(names(.), "Male", sep = "_")))}

visits <- c("vis1", "vis2", "vis3")

p <- visits %>% 
  lapply(\(vis) {
    comp <- visits[!visits == vis]
    
    dat.p <- res.minor[[vis]]$test.results$coda$padj
    
    dat.plot <- res.minor[[vis]]$test.results$coda$loadings %>% 
      tibble::rownames_to_column(var = "anno") %>% 
      reshape2::melt(id.var = "anno") %>% 
      filter(anno %in% names(vasc.pal))
    
    yvar <- dat.plot %>%
      group_by(anno) %>%
      summarize(var = mean(value)) %>%
      mutate(padj = dat.p[match(anno, names(dat.p))]) %>%
      arrange(padj, desc(abs(var)))
    
    dat.plot %<>% 
      mutate(anno = factor(anno, levels = rev(unique(yvar$anno))))
    
    ymax <- dat.plot %>%
      pull(value) %>%
      abs() %>%
      max()
    
    out <- dat.plot %>% 
      ggplot(aes(anno, value, fill = anno)) + 
      geom_hline(yintercept = 0, col = "black") +
      geom_jitter(alpha = 0.2, size = 0.3, col = "grey") +
      geom_boxplot(outliers = F) +
      coord_flip() +
      ylim(c(-ymax, ymax)) +
      theme_bw() + 
      theme(line = element_blank(),
            axis.text = element_text(color = "black")) +
      guides(fill = "none") +
      labs(title = paste0(comp[1], " vs ", comp[2]), x = "", y = "Loadings [AU]") +
      scale_fill_manual(values = vasc.pal)
    
    return(out)
  })

p

## [[1]]

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

## 
## [[2]]

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

## 
## [[3]]

Statistics

for (vis in visits) {
  comp <- visits[!visits == vis]
  message("Comparison: ", comp)
  print(res.minor[[vis]]$test.results$coda$padj %>% .[names(.) %in% names(vasc.pal)])
}

## Comparison: vis2vis3

##       vEC_Female        cvEC_Male         lEC_Male         vEC_Male 
##       0.01731602       0.01731602       0.06611570       0.01731602 
##      SMCs_Female Pericytes_Female         cEC_Male       lEC_Female 
##       0.10389610       0.04155844       0.01731602       0.41558442 
##       cEC_Female        SMCs_Male      arEC_Female   Pericytes_Male 
##       0.04155844       0.03896104       0.10389610       0.08658009 
##      cvEC_Female        arEC_Male 
##       0.22134387       0.15112161

## Comparison: vis1vis3

##       vEC_Female         vEC_Male        SMCs_Male         lEC_Male 
##       0.00944510       0.00944510       0.00944510       0.03280930 
##         cEC_Male   Pericytes_Male      SMCs_Female       cEC_Female 
##       0.00944510       0.00944510       0.16783217       0.00944510 
##        arEC_Male Pericytes_Female        cvEC_Male       lEC_Female 
##       0.00944510       0.03055768       0.09937888       0.66955267 
##      arEC_Female      cvEC_Female 
##       0.66493506       0.82886003

## Comparison: vis1vis2

##        cvEC_Male      cvEC_Female         vEC_Male        SMCs_Male 
##        0.1558442        0.4917749        0.4917749        0.4917749 
##   Pericytes_Male      arEC_Female         lEC_Male       lEC_Female 
##        0.4917749        0.4917749        0.7713499        0.8255528 
## Pericytes_Female       vEC_Female        arEC_Male         cEC_Male 
##        0.7713499        0.7713499        0.8410637        0.8410637 
##      SMCs_Female       cEC_Female 
##        0.9660127        0.9890110

Extended Data Figure 3f

watlas_data <- read.delim("watlas_mapped_percent.tsv", stringsAsFactors = FALSE)
watlas_data_count <- read.delim("watlas_mapped_counts.tsv", stringsAsFactors = FALSE)
watlas_data_count <- watlas_data_count %>%
  rowwise() %>%
  mutate(row_sum = sum(c_across(where(is.numeric)), na.rm = TRUE))  # Sum across numeric columns
valid_samples <- watlas_data_count %>%
  filter(row_sum >= 1000) %>%
  pull(sample)  # Extracts the valid sample IDs

filtered_data <- watlas_data %>%
  filter(type == "sn", tissue == "AT", depot == "SAT", grepl("Hs", sample))
filtered_data <- filtered_data %>%
  filter(sample %in% valid_samples)

long_data_VEC <- filtered_data %>%
  select(wtstatus, arEC, cEC, vEC,  lEC, SMCs, Pericytes) %>%  # Select relevant columns
  tidyr::pivot_longer(cols = -wtstatus, names_to = "Cell_Type", values_to = "Percentage")

long_data_VEC %>% 
  mutate(Cell_Type = factor(Cell_Type, levels = c("arEC", "cEC", "vEC", "lEC", "Pericytes", "SMCs"))) %>% 
  ggplot(aes(x = Cell_Type, y = Percentage, fill = wtstatus)) +
  geom_boxplot(position = position_dodge(width = 0.8)) +
  geom_point(aes(group = wtstatus), position = position_jitterdodge(jitter.width = 0.3, dodge.width = 0.8), alpha = 0.3, ) +
  theme_bw() +
  labs(title = "Comparison of Cell Types in Lean vs Obese",
       x = "Cell Type",
       y = "Percentage",
       fill = "") +
  scale_fill_manual(values = c(lean = "#26547c", obese = "#ef476f")) +
  stat_compare_means(aes(group = wtstatus), method = "wilcox.test", label = "p.signif", ) +
  theme(axis.text = element_text(color = "black"),
        line = element_blank())

Extended Data Figure 3g

We integrated our data with the data from Hinte et al. that were graciously provided by the authors. As these data are not public, here we will only show how we plotted the data.

tmp <- con.integration$clusters$loft_transfer_minor$groups %>%
  {data.frame(cid = names(.), anno = unname(.))} %>% 
  filter(!grepl("!!", cid)) %>% 
  mutate(sample = con.integration$clusters$hinte_condition$groups[match(cid, names(con.integration$clusters$hinte_condition$groups))])

tmp.sum <- tmp %>% 
  group_by(sample) %>% 
  summarize(total = n()) %>% 
  split(., .$sample)

tmp.sum.ct <- tmp %>% 
  group_by(sample, anno) %>% 
  summarize(total = n()) %>% 
  split(., .$sample)

## `summarise()` has grouped output by 'sample'. You can override using the
## `.groups` argument.

tmp.prop <- Map(\(ct, tot, an) setNames(ct$total / tot$total, an), ct = tmp.sum.ct, tot = tmp.sum, an = lapply(tmp.sum.ct, pull, anno))

df.plot <- tmp.prop %>%
  {Map(\(nn, x) data.frame(sample = nn, anno = names(x), value = unname(x)), nn = names(.), x = .)} %>% 
  bind_rows() %>% 
  mutate(anno = factor(anno, levels = unique(anno)),
         type = sapply(sample, \(x) if (grepl("lean", x)) "Lean" else if (grepl("t0", x)) "Obese" else "Regressed"))

cts <- c("arEC", "cEC", "vEC", "cvEC", "lEC", "Pericytes", "SMCs")

p <- df.plot %>% 
  filter(anno %in% cts) %>% 
  mutate(anno = factor(anno, levels = cts)) %>% 
  rbind(data.frame(sample = c("rg_t0", "rg_t1"), anno = c("lEC", "lEC"), value = c(0, 0), type = c("Obese", "Regressed"))) %>%
  ggplot(aes(anno, value)) + 
  stat_summary(mapping = aes(fill = type), geom = "bar", fun = "mean", position = "dodge") +
  geom_point(aes(col = type), position = position_dodge(width = 0.9)) +
  theme_bw() +
  labs(x = "", y = "% proportion per sample", title = "Loft to Hinte transfer, minor annotation", col = "", fill = "") +
  theme(axis.text = element_text(colour = "black"),
        line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
  scale_fill_manual(values = brewer.pal(9, "Greens")[c(3, 6, 9)]) +
  scale_color_manual(values = brewer.pal(9, "Purples")[c(9, 7, 5)])

p

Extended Data Figures 3h,i

# Metadata
metadata.all <- read.delim("Meta_Data_WL_Select.txt", h=T, dec = ",") %>% 
  mutate(sex = factor(sex, labels = c("Female", "Male")),
         donor = gsub("D", "Donor_", recordid) %>% 
           gsub("V", "vis", .))

# Bulk data
cm.bulk <- read.delim("Bulk_norm_counts.txt", header = T) %>% 
  .[match(unique(.$hgnc_symbol), .$hgnc_symbol), ] %>% 
  `rownames<-`(.$hgnc_symbol) %>% 
  select(-hgnc_symbol, -ensembl_gene_id, -entrezgene_id) %>% 
  scale()

colnames(cm.bulk) %<>% 
  gsub("D", "Donor_", .) %>% 
  gsub("V", "vis", .)

Extended Data Figure 3h

See Figure 2d for model setup

# Use Cacoa to calculate proportions
cao <- Cacoa$new(data.object = con.major, 
                 cell.groups = anno.major, 
                 ref.level = "vis2", 
                 target.level = "vis3", 
                 sample.groups = con.major$samples %>% 
                   names() %>% 
                   strsplit("_") %>% 
                   sapply('[[', 3) %>% 
                   gsub(1, 2, .) %>% 
                   `names<-`(con.major$samples %>% 
                               names()),
                 sample.groups.palette = ggsci::pal_jama()(7))

cao$sample.groups <- con.major$samples %>% 
  names() %>% 
  strsplit("_") %>% 
  sapply('[[', 3) %>% 
  `names<-`(con.major$samples %>% 
              names()) %>% 
  gsub("vis", "Visit ", .)

anno.comb <- anno.major[!names(anno.major) %in% names(anno.vascular)] %>% 
  {factor(c(., anno.vascular))}

# Get proportions
proportions <- cao$.__enclos_env__$private$extractCodaData(cell.groups = anno.comb, ret.groups = F) %>% 
  {100 * ./rowSums(.)} %>% 
  as.data.frame() %$% 
  setNames(cEC, rownames(.))

tmp <- qread("cEC_model.qs")

lasso_model <- tmp$lasso_model
best_lambda <- tmp$lasso_cv$lambda.min

cm.bulk.mat <- cm.bulk %>% 
  as.data.frame() %>% 
  filter(rownames(.) %in% rownames(coef(lasso_model, s = best_lambda))) %>% 
  as.matrix() %>% 
  t()

# Now, train a final model on all data for visualization and gene selection
cm.test <- cm.bulk.mat %>% 
  .[rownames(.) %in% names(proportions), ]

lasso_cv <- cv.glmnet(cm.test, proportions, alpha = 1, nfolds = 40)

## Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
## fold

best_lambda <- lasso_cv$lambda.min
lasso_model <- glmnet(cm.test, proportions, alpha = 1, lambda = best_lambda)

# Predict on all samples using the final model
lasso.predict.all <- predict(lasso_model, newx = cm.test, s = best_lambda)
r2.all <- cor(proportions, lasso.predict.all)^2

data.frame(Proportions = proportions,
           Predictions = unname(lasso.predict.all)) %>% 
  ggplot(aes(Proportions, Predictions)) + 
  geom_point() +
  theme_bw() +
  theme(axis.text = element_text(colour = "black")) +
  labs(title = paste0("r2 = ", formatC(r2.all, digits = 3)), y = "Predicted proportions (%)", x = "Proportions (%)") +
  geom_abline(intercept = 0, slope = 1, colour = "firebrick")

Extended Data Figure 3i

See Figure 2e for model setup

# Use Cacoa to calculate proportions
cao <- Cacoa$new(data.object = con.major, 
                 cell.groups = anno.major, 
                 ref.level = "vis2", 
                 target.level = "vis3", 
                 sample.groups = con.major$samples %>% 
                   names() %>% 
                   strsplit("_") %>% 
                   sapply('[[', 3) %>% 
                   gsub(1, 2, .) %>% 
                   `names<-`(con.major$samples %>% 
                               names()),
                 sample.groups.palette = ggsci::pal_jama()(7))

cao$sample.groups <- con.major$samples %>% 
  names() %>% 
  strsplit("_") %>% 
  sapply('[[', 3) %>% 
  `names<-`(con.major$samples %>% 
              names()) %>% 
  gsub("vis", "Visit ", .)

anno.comb <- anno.major[!names(anno.major) %in% names(anno.vascular)] %>% 
  {factor(c(., anno.vascular))}

# Get proportions
proportions <- cao$.__enclos_env__$private$extractCodaData(cell.groups = anno.comb, ret.groups = F) %>% 
  {100 * ./rowSums(.)} %>% 
  as.data.frame() %$% 
  setNames(vEC, rownames(.))

tmp <- qread("vEC_model.qs")

lasso_model <- tmp$lasso_model
best_lambda <- tmp$lasso_cv$lambda.min

cm.bulk.mat <- cm.bulk %>% 
  as.data.frame() %>% 
  filter(rownames(.) %in% rownames(coef(lasso_model, s = best_lambda))) %>% 
  as.matrix() %>% 
  t()

# Now, train a final model on all data for visualization and gene selection
cm.test <- cm.bulk.mat %>% 
  .[rownames(.) %in% names(proportions), ]

lasso_cv <- cv.glmnet(cm.test, proportions, alpha = 1, nfolds = 40)

## Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
## fold

best_lambda <- lasso_cv$lambda.min
lasso_model <- glmnet(cm.test, proportions, alpha = 1, lambda = best_lambda)

# Predict on all samples using the final model
lasso.predict.all <- predict(lasso_model, newx = cm.test, s = best_lambda)
r2.all <- cor(proportions, lasso.predict.all)^2

data.frame(Proportions = proportions,
           Predictions = unname(lasso.predict.all)) %>% 
  ggplot(aes(Proportions, Predictions)) + 
  geom_point() +
  theme_bw() +
  theme(axis.text = element_text(colour = "black")) +
  labs(title = paste0("r2 = ", formatC(r2.all, digits = 3)), y = "Predicted proportions (%)", x = "Proportions (%)") +
  geom_abline(intercept = 0, slope = 1, colour = "firebrick")

Extended Data Figure 4

Extended Data Figure 4a

Load data

## Plot Individual genes from RNA-seq data
Bulk_norm_counts <- read.delim("Bulk_norm_counts.txt", h=T)
colData <- read.delim("Meta_Data_WL_Select.txt", h=T, dec = ",")

gene_of_interest <- "TNF"

# Subset norm_counts to get counts for the gene of interest
gene_counts <- Bulk_norm_counts[Bulk_norm_counts$hgnc_symbol == gene_of_interest, ]
rownames(gene_counts)=gene_counts$hgnc_symbol
gene_counts=gene_counts[4:96]

# Reshape gene_counts into long format and merge with colData
gene_counts_long <- gene_counts %>%
  tibble::rownames_to_column(var = "gene") %>%  # Create a 'gene' column from row names
  tidyr::pivot_longer(cols = -gene, names_to = "recordid", values_to = "count")

gene_counts_long <- gene_counts_long %>%
  left_join(colData, by = "recordid")  # Assuming recordid matches between gene_counts and colData

# Define a helper function to map p-values to stars
pval_to_stars <- function(p) {
  if (p < 0.0001) {
    return("****")
  } else if (p < 0.001) {
    return("***")
  } else if (p < 0.01) {
    return("**")
  } else if (p < 0.05) {
    return("*")
  } else {
    return("ns")  # Not significant
  }
}

# Perform a Kruskal-Wallis test separately for each gender
kw_res_F <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "F"))
kw_res_M <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "M"))

# Extract Kruskal-Wallis p-values
kw_pval_F <- kw_res_F$p.value
kw_pval_M <- kw_res_M$p.value

# Perform paired Wilcoxon tests for each gender with Holm's correction
paired_wilcox <- function(data, sex_group) {
  data_sex <- data %>% filter(sex == sex_group)
  pairwise_tests <- list(
    visit1_vs_visit2 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_2") %>% pull(count),
      paired = TRUE
    )$p.value,
    visit1_vs_visit3 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
      paired = TRUE
    )$p.value,
    visit2_vs_visit3 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_2") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
      paired = TRUE
    )$p.value
  )
  corrected_pvals <- p.adjust(unlist(pairwise_tests), method = "holm")
  return(corrected_pvals)
}

wilcox_pvals_F <- paired_wilcox(gene_counts_long, "F")
wilcox_pvals_M <- paired_wilcox(gene_counts_long, "M")

# Map the Wilcoxon p-values to stars
wilcox_stars_F <- sapply(wilcox_pvals_F, pval_to_stars)
wilcox_stars_M <- sapply(wilcox_pvals_M, pval_to_stars)

# Create the boxplot with significance annotations
plot <- ggplot(gene_counts_long, aes(x = interaction(visit, sex), y = count, fill = visit)) +
  geom_boxplot(outlier.shape = NA) +  # Remove outliers
  geom_jitter(color = "grey", size = 1, width = 0.2, alpha = 0.5) +  # Add individual points
  labs(title = "Gene Expression Across Visits and Genders", x = "Visit and Sex", y = "Gene Count") +
  theme_minimal() +
  theme(plot.title = element_text(size = 10)) +
  geom_signif(
    comparisons = list(
      c("visit_1.F", "visit_2.F"), 
      c("visit_1.F", "visit_3.F"), 
      c("visit_2.F", "visit_3.F"),
      c("visit_1.M", "visit_2.M"), 
      c("visit_1.M", "visit_3.M"), 
      c("visit_2.M", "visit_3.M")
    ),
    annotations = c(
      wilcox_stars_F["visit1_vs_visit2"],
      wilcox_stars_F["visit1_vs_visit3"],
      wilcox_stars_F["visit2_vs_visit3"],
      wilcox_stars_M["visit1_vs_visit2"],
      wilcox_stars_M["visit1_vs_visit3"],
      wilcox_stars_M["visit2_vs_visit3"]
    ),
    textsize = 4,
    map_signif_level = FALSE
  )

# Add Kruskal-Wallis p-values as text
plot <- plot + 
  annotate("text", x = 1, y = max(gene_counts_long$count) * 1.1, 
           label = paste("KW Female p-value:", format(kw_pval_F, digits = 3)), 
           hjust = 0, size = 3) +
  annotate("text", x = 4, y = max(gene_counts_long$count) * 1.1, 
           label = paste("KW Male p-value:", format(kw_pval_M, digits = 3)), 
           hjust = 0, size = 3)

# Display the plot
print(plot)

## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded

gene_of_interest <- "CCL3"

# Subset norm_counts to get counts for the gene of interest
gene_counts <- Bulk_norm_counts[Bulk_norm_counts$hgnc_symbol == gene_of_interest, ]
rownames(gene_counts)=gene_counts$hgnc_symbol
gene_counts=gene_counts[4:96]

# Reshape gene_counts into long format and merge with colData
gene_counts_long <- gene_counts %>%
  tibble::rownames_to_column(var = "gene") %>%  # Create a 'gene' column from row names
  tidyr::pivot_longer(cols = -gene, names_to = "recordid", values_to = "count")

gene_counts_long <- gene_counts_long %>%
  left_join(colData, by = "recordid")  # Assuming recordid matches between gene_counts and colData

# Define a helper function to map p-values to stars
pval_to_stars <- function(p) {
  if (p < 0.0001) {
    return("****")
  } else if (p < 0.001) {
    return("***")
  } else if (p < 0.01) {
    return("**")
  } else if (p < 0.05) {
    return("*")
  } else {
    return("ns")  # Not significant
  }
}

# Perform a Kruskal-Wallis test separately for each gender
kw_res_F <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "F"))
kw_res_M <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "M"))

# Extract Kruskal-Wallis p-values
kw_pval_F <- kw_res_F$p.value
kw_pval_M <- kw_res_M$p.value

# Perform paired Wilcoxon tests for each gender with Holm's correction
paired_wilcox <- function(data, sex_group) {
  data_sex <- data %>% filter(sex == sex_group)
  pairwise_tests <- list(
    visit1_vs_visit2 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_2") %>% pull(count),
      paired = TRUE
    )$p.value,
    visit1_vs_visit3 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
      paired = TRUE
    )$p.value,
    visit2_vs_visit3 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_2") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
      paired = TRUE
    )$p.value
  )
  corrected_pvals <- p.adjust(unlist(pairwise_tests), method = "holm")
  return(corrected_pvals)
}

wilcox_pvals_F <- paired_wilcox(gene_counts_long, "F")
wilcox_pvals_M <- paired_wilcox(gene_counts_long, "M")

# Map the Wilcoxon p-values to stars
wilcox_stars_F <- sapply(wilcox_pvals_F, pval_to_stars)
wilcox_stars_M <- sapply(wilcox_pvals_M, pval_to_stars)

# Create the boxplot with significance annotations
plot <- ggplot(gene_counts_long, aes(x = interaction(visit, sex), y = count, fill = visit)) +
  geom_boxplot(outlier.shape = NA) +  # Remove outliers
  geom_jitter(color = "grey", size = 1, width = 0.2, alpha = 0.5) +  # Add individual points
  labs(title = "Gene Expression Across Visits and Genders", x = "Visit and Sex", y = "Gene Count") +
  theme_minimal() +
  theme(plot.title = element_text(size = 10)) +
  geom_signif(
    comparisons = list(
      c("visit_1.F", "visit_2.F"), 
      c("visit_1.F", "visit_3.F"), 
      c("visit_2.F", "visit_3.F"),
      c("visit_1.M", "visit_2.M"), 
      c("visit_1.M", "visit_3.M"), 
      c("visit_2.M", "visit_3.M")
    ),
    annotations = c(
      wilcox_stars_F["visit1_vs_visit2"],
      wilcox_stars_F["visit1_vs_visit3"],
      wilcox_stars_F["visit2_vs_visit3"],
      wilcox_stars_M["visit1_vs_visit2"],
      wilcox_stars_M["visit1_vs_visit3"],
      wilcox_stars_M["visit2_vs_visit3"]
    ),
    textsize = 4,
    map_signif_level = FALSE
  )

# Add Kruskal-Wallis p-values as text
plot <- plot + 
  annotate("text", x = 1, y = max(gene_counts_long$count) * 1.1, 
           label = paste("KW Female p-value:", format(kw_pval_F, digits = 3)), 
           hjust = 0, size = 3) +
  annotate("text", x = 4, y = max(gene_counts_long$count) * 1.1, 
           label = paste("KW Male p-value:", format(kw_pval_M, digits = 3)), 
           hjust = 0, size = 3)

# Display the plot
print(plot)

## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded

gene_of_interest <- "CCL5"

# Subset norm_counts to get counts for the gene of interest
gene_counts <- Bulk_norm_counts[Bulk_norm_counts$hgnc_symbol == gene_of_interest, ]
rownames(gene_counts)=gene_counts$hgnc_symbol
gene_counts=gene_counts[4:96]

# Reshape gene_counts into long format and merge with colData
gene_counts_long <- gene_counts %>%
  tibble::rownames_to_column(var = "gene") %>%  # Create a 'gene' column from row names
  tidyr::pivot_longer(cols = -gene, names_to = "recordid", values_to = "count")

gene_counts_long <- gene_counts_long %>%
  left_join(colData, by = "recordid")  # Assuming recordid matches between gene_counts and colData

# Define a helper function to map p-values to stars
pval_to_stars <- function(p) {
  if (p < 0.0001) {
    return("****")
  } else if (p < 0.001) {
    return("***")
  } else if (p < 0.01) {
    return("**")
  } else if (p < 0.05) {
    return("*")
  } else {
    return("ns")  # Not significant
  }
}

# Perform a Kruskal-Wallis test separately for each gender
kw_res_F <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "F"))
kw_res_M <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "M"))

# Extract Kruskal-Wallis p-values
kw_pval_F <- kw_res_F$p.value
kw_pval_M <- kw_res_M$p.value

# Perform paired Wilcoxon tests for each gender with Holm's correction
paired_wilcox <- function(data, sex_group) {
  data_sex <- data %>% filter(sex == sex_group)
  pairwise_tests <- list(
    visit1_vs_visit2 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_2") %>% pull(count),
      paired = TRUE
    )$p.value,
    visit1_vs_visit3 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
      paired = TRUE
    )$p.value,
    visit2_vs_visit3 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_2") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
      paired = TRUE
    )$p.value
  )
  corrected_pvals <- p.adjust(unlist(pairwise_tests), method = "holm")
  return(corrected_pvals)
}

wilcox_pvals_F <- paired_wilcox(gene_counts_long, "F")
wilcox_pvals_M <- paired_wilcox(gene_counts_long, "M")

# Map the Wilcoxon p-values to stars
wilcox_stars_F <- sapply(wilcox_pvals_F, pval_to_stars)
wilcox_stars_M <- sapply(wilcox_pvals_M, pval_to_stars)

# Create the boxplot with significance annotations
plot <- ggplot(gene_counts_long, aes(x = interaction(visit, sex), y = count, fill = visit)) +
  geom_boxplot(outlier.shape = NA) +  # Remove outliers
  geom_jitter(color = "grey", size = 1, width = 0.2, alpha = 0.5) +  # Add individual points
  labs(title = "Gene Expression Across Visits and Genders", x = "Visit and Sex", y = "Gene Count") +
  theme_minimal() +
  theme(plot.title = element_text(size = 10)) +
  geom_signif(
    comparisons = list(
      c("visit_1.F", "visit_2.F"), 
      c("visit_1.F", "visit_3.F"), 
      c("visit_2.F", "visit_3.F"),
      c("visit_1.M", "visit_2.M"), 
      c("visit_1.M", "visit_3.M"), 
      c("visit_2.M", "visit_3.M")
    ),
    annotations = c(
      wilcox_stars_F["visit1_vs_visit2"],
      wilcox_stars_F["visit1_vs_visit3"],
      wilcox_stars_F["visit2_vs_visit3"],
      wilcox_stars_M["visit1_vs_visit2"],
      wilcox_stars_M["visit1_vs_visit3"],
      wilcox_stars_M["visit2_vs_visit3"]
    ),
    textsize = 4,
    map_signif_level = FALSE
  )

# Add Kruskal-Wallis p-values as text
plot <- plot + 
  annotate("text", x = 1, y = max(gene_counts_long$count) * 1.1, 
           label = paste("KW Female p-value:", format(kw_pval_F, digits = 3)), 
           hjust = 0, size = 3) +
  annotate("text", x = 4, y = max(gene_counts_long$count) * 1.1, 
           label = paste("KW Male p-value:", format(kw_pval_M, digits = 3)), 
           hjust = 0, size = 3)

# Display the plot
print(plot)

## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded

Extended Data Figure 4b-g

Load data

con <- qread("con_lymphoid.qs", nthreads = 10)
anno.lymphoid <- qread("anno_lymphoid.qs")

spc <- con$getDatasetPerCell()

Extended Data Figure 4b

con$plotGraph(groups = anno.lymphoid, 
              plot.na = F, 
              size = 0.8,
              alpha = 1, 
              embedding = "largeVis", 
              shuffle.colors = T, 
              show.labels = T, 
              font.size = 5,
              show.legend = T,
              mark.groups = T) + 
  labs(x = "largeVis1", y = "largeVis2", col = "") + 
  theme(line = element_blank()) +
  scale_colour_manual(values = RColorBrewer::brewer.pal(9, "YlOrBr")[-c(1,2)][c(1,4,5,2,6)]) +
  dotSize(3)

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Extended Data Figure 4c

varToPlot <- spc %>% 
  as.character() %>% 
  strsplit("_vis") %>% 
  sget(1) %>% 
  setNames(names(spc)) %>%
  .[names(.) %in% names(anno.lymphoid)]

con$plotGraph(groups = varToPlot, 
              plot.na = F, 
              size = 0.5,
              alpha = 0.8, 
              embedding = "largeVis",
              mark.groups = F,
              show.labels = T,
              show.legend = T,
              title = "Donor") +
  labs(x = "largeVis1", y = "largeVis2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3)

Extended Data Figure 4d

var <- meta$sex

varToPlot <- grepl.replace(spc %>% 
                             as.character(), 
                           meta$sample, 
                           var) %>% 
  setNames(names(spc)) %>% 
  factor(labels = unique(var)) %>% 
  .[names(.) %in% names(anno.lymphoid)]

con$plotGraph(groups = varToPlot, 
              plot.na = F, 
              size = 0.5,
              alpha = 0.8, 
              embedding = "largeVis",
              mark.groups = F,
              show.labels = T,
              show.legend = T,
              title = "Sex") +
  labs(x = "largeVis1", y = "largeVis2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3) +
  scale_color_manual(values = c("firebrick", "navy"))

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Extended Data Figure 4e

varToPlot <- spc %>% 
  as.character() %>% 
  strsplit("_|!!") %>% 
  sget(3) %>% 
  setNames(names(spc)) %>% 
  factor() %>% 
  .[names(.) %in% names(anno.lymphoid)]

con$plotGraph(groups = varToPlot, 
              plot.na = F, 
              size = 0.5,
              alpha = 0.8, 
              embedding = "largeVis",
              mark.groups = F,
              show.labels = T,
              show.legend = T,
              title = "Visit") +
  labs(x = "largeVis1", y = "largeVis2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3) +
  scale_color_manual(values = ggsci::pal_jama()(3))

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Extended Data Figure 4f

cm.merged <- con$getJointCountMatrix() %>% 
  .[rownames(.) %in% names(anno.lymphoid), ]

c("FOXP3", "CTLA4",
  "IL7R", "CD4",
  "CD40LG", "CD8A",
  "GZMK", "GZMH",
  "GNLY", "KLRD1",
  "KLRF1", "NCAM1"
) %>% 
  sccore::dotPlot(., 
                  cm.merged, 
                  anno.lymphoid %>% 
                    factor(levels = c("Treg", "CD4 T cells", "CD8 T cells", "NK-like T cells", "NK")), 
                  gene.order = ., 
                  cols = c("white","yellow4"))

Extended Data Figure 4g

# Use Cacoa to calculate proportions
cao <- Cacoa$new(data.object = con.major, 
                 cell.groups = anno.major, 
                 ref.level = "vis2", 
                 target.level = "vis3",
                 sample.groups = con.major$samples %>% 
                   names() %>% 
                   strsplit("_") %>% 
                   sapply('[[', 3) %>% 
                   gsub(1, 2, .) %>% 
                   `names<-`(con.major$samples %>% 
                               names()),
                 sample.groups.palette = ggsci::pal_jama()(7))

cao$sample.groups <- con.major$samples %>% 
  names() %>% 
  strsplit("_") %>% 
  sapply('[[', 3) %>% 
  `names<-`(con.major$samples %>% 
              names()) %>% 
  gsub("vis", "Visit ", .)

anno.comb <- anno.major[!names(anno.major) %in% names(anno.lymphoid)] %>% 
  {factor(c(., anno.lymphoid))}

p <- cao$plotCellGroupSizes(cell.groups = anno.comb,
                            show.significance = F, 
                            filter.empty.cell.types = F)

plot.dat <- p$data %>% 
  filter(variable %in% levels(anno.lymphoid)) %>% 
  mutate(sex = rep(meta$sex, anno.lymphoid %>% levels() %>% length()),
         variable = factor(variable)) %>% 
  mutate(sex_visit = paste(sex, group, sep = " ") %>% gsub("Vis", "vis", .),
         variable = factor(variable, levels = c("Treg", "CD4 T cells", "CD8 T cells", "NK-like T cells", "NK")))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$variable)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$group)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(ct = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(ct = factor(ct, levels = unique(plot.dat$variable))) %>% 
  arrange(ct) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
  group_by(var, sex) %>%
  wilcox_test(value~sex_visit, paired = T) %>%
  mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>% 
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "var", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05) %>% # Adjust line position
  arrange(desc(y.position))

# Plot
ggplot(plot.dat, aes(variable, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "% cells per sample", fill = "") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 90, vjust = 0, hjust = 1)) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(ct, y = 5, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 1, hide.ns = F) # Wilcoxon

Extended Data Figure 5

Extended Data Figure 5a-c

Load data

con <- qread("con_myeloid.qs", nthreads = 10)
anno.immune <- qread("anno_immune.qs")

spc <- con$getDatasetPerCell()

Extended Data Figure 5a

varToPlot <- spc %>% 
  as.character() %>% 
  strsplit("_vis") %>% 
  sget(1) %>% 
  setNames(names(spc)) %>% 
  factor()

con$plotGraph(groups = varToPlot, 
              plot.na = F, 
              size = 0.3,
              alpha = 0.1, 
              embedding = "largeVis",
              mark.groups = F,
              show.labels = T,
              show.legend = T,
              title = "Donor") +
  labs(x = "largeVis1", y = "largeVis2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3)

Extended Data Figure 5b

var <- meta$sex

varToPlot <- grepl.replace(spc %>% 
                             as.character(), 
                           meta$sample, 
                           var) %>% 
  setNames(names(spc)) %>% 
  factor(labels = unique(var))

con$plotGraph(groups = varToPlot, 
              plot.na = F, 
              size = 0.1,
              alpha = 0.1, 
              embedding = "largeVis",
              mark.groups = F,
              show.labels = T,
              show.legend = T,
              title = "Sex") +
  labs(x = "largeVis1", y = "largeVis2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3) +
  scale_color_manual(values = c("firebrick", "navy"))

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Extended Data Figure 5c

varToPlot <- spc %>% 
  as.character() %>% 
  strsplit("_|!!") %>% 
  sget(3) %>% 
  setNames(names(spc)) %>% 
  factor()

con$plotGraph(groups = varToPlot, 
              plot.na = F, 
              size = 0.3,
              alpha = 0.1, 
              embedding = "largeVis",
              mark.groups = F,
              show.labels = T,
              show.legend = T,
              title = "Visit") +
  labs(x = "largeVis1", y = "largeVis2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3) +
  scale_color_manual(values = ggsci::pal_jama()(3))

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Extended Data Figure 5d

WATLAS <- read.delim("watlas_labels_l2_adipose_anno.tsv")
rownames(WATLAS)=WATLAS$X

# Define MACS
column_order <- c("LYVE1..Macrophage",  "CYP27A1..Macrophage", "LPL..Macrophage")
row_order <- c("ATM", "mono/mac", "Early LAM", "LAM")
WATLAS_ordered <- WATLAS[, rev(column_order)] # Reorder columns
WATLAS_ordered <- WATLAS_ordered[row_order, ] # Reorder rows

corrplot(t(WATLAS_ordered), method="color", order = , col.lim=c(0.5, 1),
         col = rev(COL2("RdBu",200)),
         is.corr = F, 
         tl.col = "black",
         tl.pos = "lower",
         outline = "#C0C0C0",
         addgrid.col= NA)

Extended Data Figure 5e

Calculate

anno.minor <- qread("anno_minor.qs")

spc <- con.major$getDatasetPerCell() %>% 
  .[match(names(anno.minor), names(.))]

anno.minor.sex <- anno.minor %>% 
  data.frame(anno = ., cid = names(.), donor = unname(spc)) %>% 
  mutate(sex = meta$sex[match(donor, meta$sample)]) %>% 
  mutate(anno_sex = paste(anno, sex, sep = "_")) %$% 
  setNames(anno_sex, cid) %>% 
  factor()

# Calculate
res.minor <- c("vis1", "vis2", "vis3") %>% 
  sapply(\(vis) {
    con <- con.major$samples %>% 
      .[!grepl(vis, names(.))] %>%
      Conos$new()
    
    if (vis == "vis1") {
      vis.ref <- "vis2"
      vis.target <- "vis3" 
    } else {
      vis.ref <- "vis1"
      if (vis == "vis2") {
        vis.target <- "vis3"
      } else {
        vis.target <- "vis2"
      }
    }
    
    sample.groups <- con$samples %>%
      names() %>%
      `names<-`(ifelse(grepl(vis.ref, .), vis.ref, vis.target), .)
    
    cao <- Cacoa$new(con,
                     sample.groups,
                     anno.minor.sex,
                     ref.level = vis.ref,
                     target.level = vis.target,
                     n.cores = 32)
    
    cao$estimateCellLoadings()
    
    return(cao)
  }, simplify = F, USE.NAMES = T)

Plot

myeloid.pal <- c("#174D86", "#9EC3B5", "#356790", "#ADD0BA", "#628EA0", "#80A9AA", "#719CA5") %>% 
  setNames(levels(qread("/work/02_data/09_export/anno_immune.qs") %>%  
                    .[!. %in% c("NKT","NK","Tem","Th1","Th","Treg","B-cells")] %>% 
                    factor(levels = c("ATM", "mono/mac", "Early LAM", "LAM", "cDC1", "cDC2", "mDC")))) %>%
  {setNames(c(rep(., 3)), c(names(.), paste(names(.), "Female", sep = "_"), paste(names(.), "Male", sep = "_")))} %>% 
  .[grepl("ATM|LAM|mono", names(.))]

visits <- c("vis1", "vis2", "vis3")

p <- visits %>% 
  lapply(\(vis) {
    comp <- visits[!visits == vis]
    
    dat.p <- res.minor[[vis]]$test.results$coda$padj
    
    dat.plot <- res.minor[[vis]]$test.results$coda$loadings %>% 
      tibble::rownames_to_column(var = "anno") %>% 
      reshape2::melt(id.var = "anno") %>% 
      filter(anno %in% names(myeloid.pal))
    
    yvar <- dat.plot %>%
      group_by(anno) %>%
      summarize(var = mean(value)) %>%
      mutate(padj = dat.p[match(anno, names(dat.p))]) %>%
      arrange(padj, desc(abs(var)))
    
    dat.plot %<>% 
      mutate(anno = factor(anno, levels = rev(unique(yvar$anno))))
    
    ymax <- dat.plot %>%
      pull(value) %>%
      abs() %>%
      max()
    
    out <- dat.plot %>% 
      ggplot(aes(anno, value, fill = anno)) + 
      geom_hline(yintercept = 0, col = "black") +
      geom_jitter(alpha = 0.2, size = 0.3, col = "grey") +
      geom_boxplot(outliers = F) +
      coord_flip() +
      ylim(c(-ymax, ymax)) +
      theme_bw() + 
      theme(line = element_blank(),
            axis.text = element_text(color = "black")) +
      guides(fill = "none") +
      labs(title = paste0(comp[1], " vs ", comp[2]), x = "", y = "Loadings [AU]") +
      scale_fill_manual(values = myeloid.pal)
    
    return(out)
  })

p

## [[1]]

## 
## [[2]]

## 
## [[3]]

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

Statistics

for (vis in visits) {
  comp <- visits[!visits == vis]
  message("Comparison: ", comp)
  print(res.minor[[vis]]$test.results$coda$padj %>% .[names(.) %in% names(myeloid.pal)])
}

## Comparison: vis2vis3

##   Early LAM_Male         LAM_Male       LAM_Female Early LAM_Female 
##       0.01731602       0.01731602       0.09647495       0.15112161 
##    mono/mac_Male  mono/mac_Female       ATM_Female         ATM_Male 
##       0.09590410       0.34909091       0.49350649       0.85248085

## Comparison: vis1vis3

##   Early LAM_Male         LAM_Male       LAM_Female Early LAM_Female 
##       0.00944510       0.01731602       0.03280930       0.16783217 
##    mono/mac_Male       ATM_Female         ATM_Male  mono/mac_Female 
##       0.44675325       0.82886003       0.82886003       0.98192004

## Comparison: vis1vis2

##   Early LAM_Male         ATM_Male Early LAM_Female       LAM_Female 
##        0.5085441        0.7316017        0.7713499        0.7713499 
##         LAM_Male       ATM_Female    mono/mac_Male  mono/mac_Female 
##        0.8410637        0.9660127        0.9890110        0.9890110

Extended Data Figure 5f

Load data

## Plot Individual genes from RNA-seq data
Bulk_norm_counts <- read.delim("Bulk_norm_counts.txt", h=T)
colData <- read.delim("Meta_Data_WL_Select.txt", h=T, dec = ",")

TREM2

gene_of_interest <- "TREM2"

# Subset norm_counts to get counts for the gene of interest
gene_counts <- Bulk_norm_counts[Bulk_norm_counts$hgnc_symbol == gene_of_interest, ]
rownames(gene_counts)=gene_counts$hgnc_symbol
gene_counts=gene_counts[4:96]

# Reshape gene_counts into long format and merge with colData
gene_counts_long <- gene_counts %>%
  tibble::rownames_to_column(var = "gene") %>%  # Create a 'gene' column from row names
  tidyr::pivot_longer(cols = -gene, names_to = "recordid", values_to = "count")

gene_counts_long <- gene_counts_long %>%
  left_join(colData, by = "recordid")  # Assuming recordid matches between gene_counts and colData

# Define a helper function to map p-values to stars
pval_to_stars <- function(p) {
  if (p < 0.0001) {
    return("****")
  } else if (p < 0.001) {
    return("***")
  } else if (p < 0.01) {
    return("**")
  } else if (p < 0.05) {
    return("*")
  } else {
    return("ns")  # Not significant
  }
}

# Perform a Kruskal-Wallis test separately for each gender
kw_res_F <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "F"))
kw_res_M <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "M"))

# Extract Kruskal-Wallis p-values
kw_pval_F <- kw_res_F$p.value
kw_pval_M <- kw_res_M$p.value

# Perform paired Wilcoxon tests for each gender with Holm's correction
paired_wilcox <- function(data, sex_group) {
  data_sex <- data %>% filter(sex == sex_group)
  pairwise_tests <- list(
    visit1_vs_visit2 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_2") %>% pull(count),
      paired = TRUE
    )$p.value,
    visit1_vs_visit3 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
      paired = TRUE
    )$p.value,
    visit2_vs_visit3 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_2") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
      paired = TRUE
    )$p.value
  )
  corrected_pvals <- p.adjust(unlist(pairwise_tests), method = "holm")
  return(corrected_pvals)
}

wilcox_pvals_F <- paired_wilcox(gene_counts_long, "F")
wilcox_pvals_M <- paired_wilcox(gene_counts_long, "M")

# Map the Wilcoxon p-values to stars
wilcox_stars_F <- sapply(wilcox_pvals_F, pval_to_stars)
wilcox_stars_M <- sapply(wilcox_pvals_M, pval_to_stars)

# Create the boxplot with significance annotations
plot <- ggplot(gene_counts_long, aes(x = interaction(visit, sex), y = count, fill = visit)) +
  geom_boxplot(outlier.shape = NA) +  # Remove outliers
  geom_jitter(color = "grey", size = 1, width = 0.2, alpha = 0.5) +  # Add individual points
  labs(title = "Gene Expression Across Visits and Genders", x = "Visit and Sex", y = "Gene Count") +
  theme_minimal() +
  theme(plot.title = element_text(size = 10)) +
  geom_signif(
    comparisons = list(
      c("visit_1.F", "visit_2.F"), 
      c("visit_1.F", "visit_3.F"), 
      c("visit_2.F", "visit_3.F"),
      c("visit_1.M", "visit_2.M"), 
      c("visit_1.M", "visit_3.M"), 
      c("visit_2.M", "visit_3.M")
    ),
    annotations = c(
      wilcox_stars_F["visit1_vs_visit2"],
      wilcox_stars_F["visit1_vs_visit3"],
      wilcox_stars_F["visit2_vs_visit3"],
      wilcox_stars_M["visit1_vs_visit2"],
      wilcox_stars_M["visit1_vs_visit3"],
      wilcox_stars_M["visit2_vs_visit3"]
    ),
    textsize = 4,
    map_signif_level = FALSE
  )

# Add Kruskal-Wallis p-values as text
plot <- plot + 
  annotate("text", x = 1, y = max(gene_counts_long$count) * 1.1, 
           label = paste("KW Female p-value:", format(kw_pval_F, digits = 3)), 
           hjust = 0, size = 3) +
  annotate("text", x = 4, y = max(gene_counts_long$count) * 1.1, 
           label = paste("KW Male p-value:", format(kw_pval_M, digits = 3)), 
           hjust = 0, size = 3)

# Display the plot
print(plot)

## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded

SPP1

gene_of_interest <- "SPP1"

# Subset norm_counts to get counts for the gene of interest
gene_counts <- Bulk_norm_counts[Bulk_norm_counts$hgnc_symbol == gene_of_interest, ]
rownames(gene_counts)=gene_counts$hgnc_symbol
gene_counts=gene_counts[4:96]

# Reshape gene_counts into long format and merge with colData
gene_counts_long <- gene_counts %>%
  tibble::rownames_to_column(var = "gene") %>%  # Create a 'gene' column from row names
  tidyr::pivot_longer(cols = -gene, names_to = "recordid", values_to = "count")

gene_counts_long <- gene_counts_long %>%
  left_join(colData, by = "recordid")  # Assuming recordid matches between gene_counts and colData

# Define a helper function to map p-values to stars
pval_to_stars <- function(p) {
  if (p < 0.0001) {
    return("****")
  } else if (p < 0.001) {
    return("***")
  } else if (p < 0.01) {
    return("**")
  } else if (p < 0.05) {
    return("*")
  } else {
    return("ns")  # Not significant
  }
}

# Perform a Kruskal-Wallis test separately for each gender
kw_res_F <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "F"))
kw_res_M <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "M"))

# Extract Kruskal-Wallis p-values
kw_pval_F <- kw_res_F$p.value
kw_pval_M <- kw_res_M$p.value

# Perform paired Wilcoxon tests for each gender with Holm's correction
paired_wilcox <- function(data, sex_group) {
  data_sex <- data %>% filter(sex == sex_group)
  pairwise_tests <- list(
    visit1_vs_visit2 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_2") %>% pull(count),
      paired = TRUE
    )$p.value,
    visit1_vs_visit3 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
      paired = TRUE
    )$p.value,
    visit2_vs_visit3 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_2") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
      paired = TRUE
    )$p.value
  )
  corrected_pvals <- p.adjust(unlist(pairwise_tests), method = "holm")
  return(corrected_pvals)
}

wilcox_pvals_F <- paired_wilcox(gene_counts_long, "F")
wilcox_pvals_M <- paired_wilcox(gene_counts_long, "M")

# Map the Wilcoxon p-values to stars
wilcox_stars_F <- sapply(wilcox_pvals_F, pval_to_stars)
wilcox_stars_M <- sapply(wilcox_pvals_M, pval_to_stars)

# Create the boxplot with significance annotations
plot <- ggplot(gene_counts_long, aes(x = interaction(visit, sex), y = count, fill = visit)) +
  geom_boxplot(outlier.shape = NA) +  # Remove outliers
  geom_jitter(color = "grey", size = 1, width = 0.2, alpha = 0.5) +  # Add individual points
  labs(title = "Gene Expression Across Visits and Genders", x = "Visit and Sex", y = "Gene Count") +
  theme_minimal() +
  theme(plot.title = element_text(size = 10)) +
  geom_signif(
    comparisons = list(
      c("visit_1.F", "visit_2.F"), 
      c("visit_1.F", "visit_3.F"), 
      c("visit_2.F", "visit_3.F"),
      c("visit_1.M", "visit_2.M"), 
      c("visit_1.M", "visit_3.M"), 
      c("visit_2.M", "visit_3.M")
    ),
    annotations = c(
      wilcox_stars_F["visit1_vs_visit2"],
      wilcox_stars_F["visit1_vs_visit3"],
      wilcox_stars_F["visit2_vs_visit3"],
      wilcox_stars_M["visit1_vs_visit2"],
      wilcox_stars_M["visit1_vs_visit3"],
      wilcox_stars_M["visit2_vs_visit3"]
    ),
    textsize = 4,
    map_signif_level = FALSE
  )

# Add Kruskal-Wallis p-values as text
plot <- plot + 
  annotate("text", x = 1, y = max(gene_counts_long$count) * 1.1, 
           label = paste("KW Female p-value:", format(kw_pval_F, digits = 3)), 
           hjust = 0, size = 3) +
  annotate("text", x = 4, y = max(gene_counts_long$count) * 1.1, 
           label = paste("KW Male p-value:", format(kw_pval_M, digits = 3)), 
           hjust = 0, size = 3)

# Display the plot
print(plot)

## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded

MMP9

gene_of_interest <- "MMP9"

# Subset norm_counts to get counts for the gene of interest
gene_counts <- Bulk_norm_counts[Bulk_norm_counts$hgnc_symbol == gene_of_interest, ]
rownames(gene_counts)=gene_counts$hgnc_symbol
gene_counts=gene_counts[4:96]

# Reshape gene_counts into long format and merge with colData
gene_counts_long <- gene_counts %>%
  tibble::rownames_to_column(var = "gene") %>%  # Create a 'gene' column from row names
  tidyr::pivot_longer(cols = -gene, names_to = "recordid", values_to = "count")

gene_counts_long <- gene_counts_long %>%
  left_join(colData, by = "recordid")  # Assuming recordid matches between gene_counts and colData

# Define a helper function to map p-values to stars
pval_to_stars <- function(p) {
  if (p < 0.0001) {
    return("****")
  } else if (p < 0.001) {
    return("***")
  } else if (p < 0.01) {
    return("**")
  } else if (p < 0.05) {
    return("*")
  } else {
    return("ns")  # Not significant
  }
}

# Perform a Kruskal-Wallis test separately for each gender
kw_res_F <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "F"))
kw_res_M <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "M"))

# Extract Kruskal-Wallis p-values
kw_pval_F <- kw_res_F$p.value
kw_pval_M <- kw_res_M$p.value

# Perform paired Wilcoxon tests for each gender with Holm's correction
paired_wilcox <- function(data, sex_group) {
  data_sex <- data %>% filter(sex == sex_group)
  pairwise_tests <- list(
    visit1_vs_visit2 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_2") %>% pull(count),
      paired = TRUE
    )$p.value,
    visit1_vs_visit3 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
      paired = TRUE
    )$p.value,
    visit2_vs_visit3 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_2") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
      paired = TRUE
    )$p.value
  )
  corrected_pvals <- p.adjust(unlist(pairwise_tests), method = "holm")
  return(corrected_pvals)
}

wilcox_pvals_F <- paired_wilcox(gene_counts_long, "F")
wilcox_pvals_M <- paired_wilcox(gene_counts_long, "M")

# Map the Wilcoxon p-values to stars
wilcox_stars_F <- sapply(wilcox_pvals_F, pval_to_stars)
wilcox_stars_M <- sapply(wilcox_pvals_M, pval_to_stars)

# Create the boxplot with significance annotations
plot <- ggplot(gene_counts_long, aes(x = interaction(visit, sex), y = count, fill = visit)) +
  geom_boxplot(outlier.shape = NA) +  # Remove outliers
  geom_jitter(color = "grey", size = 1, width = 0.2, alpha = 0.5) +  # Add individual points
  labs(title = "Gene Expression Across Visits and Genders", x = "Visit and Sex", y = "Gene Count") +
  theme_minimal() +
  theme(plot.title = element_text(size = 10)) +
  geom_signif(
    comparisons = list(
      c("visit_1.F", "visit_2.F"), 
      c("visit_1.F", "visit_3.F"), 
      c("visit_2.F", "visit_3.F"),
      c("visit_1.M", "visit_2.M"), 
      c("visit_1.M", "visit_3.M"), 
      c("visit_2.M", "visit_3.M")
    ),
    annotations = c(
      wilcox_stars_F["visit1_vs_visit2"],
      wilcox_stars_F["visit1_vs_visit3"],
      wilcox_stars_F["visit2_vs_visit3"],
      wilcox_stars_M["visit1_vs_visit2"],
      wilcox_stars_M["visit1_vs_visit3"],
      wilcox_stars_M["visit2_vs_visit3"]
    ),
    textsize = 4,
    map_signif_level = FALSE
  )

# Add Kruskal-Wallis p-values as text
plot <- plot + 
  annotate("text", x = 1, y = max(gene_counts_long$count) * 1.1, 
           label = paste("KW Female p-value:", format(kw_pval_F, digits = 3)), 
           hjust = 0, size = 3) +
  annotate("text", x = 4, y = max(gene_counts_long$count) * 1.1, 
           label = paste("KW Male p-value:", format(kw_pval_M, digits = 3)), 
           hjust = 0, size = 3)

# Display the plot
print(plot)

## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded

Extended Data Figure 5g-h

# Metadata
metadata.all <- read.delim("Meta_Data_WL_Select.txt", h=T, dec = ",") %>% 
  mutate(sex = factor(sex, labels = c("Female", "Male")),
         donor = gsub("D", "Donor_", recordid) %>% 
           gsub("V", "vis", .))

# Bulk data
cm.bulk <- read.delim("Bulk_norm_counts.txt", header = T) %>% 
  .[match(unique(.$hgnc_symbol), .$hgnc_symbol), ] %>% 
  `rownames<-`(.$hgnc_symbol) %>% 
  select(-hgnc_symbol, -ensembl_gene_id, -entrezgene_id) %>% 
  scale()

colnames(cm.bulk) %<>% 
  gsub("D", "Donor_", .) %>% 
  gsub("V", "vis", .)

Extended Data Figure 5g

See Figure 3d for model setup

# Use Cacoa to calculate proportions
cao <- Cacoa$new(data.object = con.major, 
                 cell.groups = anno.major, 
                 ref.level = "vis2", 
                 target.level = "vis3", 
                 sample.groups = con.major$samples %>% 
                   names() %>% 
                   strsplit("_") %>% 
                   sapply('[[', 3) %>% 
                   gsub(1, 2, .) %>% 
                   `names<-`(con.major$samples %>% 
                               names()),
                 sample.groups.palette = ggsci::pal_jama()(7))

cao$sample.groups <- con.major$samples %>% 
  names() %>% 
  strsplit("_") %>% 
  sapply('[[', 3) %>% 
  `names<-`(con.major$samples %>% 
              names()) %>% 
  gsub("vis", "Visit ", .)

anno.comb <- anno.major[!names(anno.major) %in% names(anno.immune)] %>% 
  {factor(c(., anno.immune))}

# Get proportions
proportions <- cao$.__enclos_env__$private$extractCodaData(cell.groups = anno.comb, ret.groups = F) %>% 
  {100 * ./rowSums(.)} %>% 
  as.data.frame() %$% 
  setNames(`Early LAM`, rownames(.))

tmp <- qread("eLAM_model.qs")

lasso_model <- tmp$lasso_model
best_lambda <- tmp$lasso_cv$lambda.min

cm.bulk.mat <- cm.bulk %>% 
  as.data.frame() %>% 
  filter(rownames(.) %in% rownames(coef(lasso_model, s = best_lambda))) %>% 
  as.matrix() %>% 
  t()

# Now, train a final model on all data for visualization and gene selection
cm.test <- cm.bulk.mat %>% 
  .[rownames(.) %in% names(proportions), ]

lasso_cv <- cv.glmnet(cm.test, proportions, alpha = 1, nfolds = 40)

## Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
## fold

best_lambda <- lasso_cv$lambda.min
lasso_model <- glmnet(cm.test, proportions, alpha = 1, lambda = best_lambda)

# Predict on all samples using the final model
lasso.predict.all <- predict(lasso_model, newx = cm.test, s = best_lambda)
r2.all <- cor(proportions, lasso.predict.all)^2

p <- data.frame(Proportions = proportions,
           Predictions = unname(lasso.predict.all)) %>% 
  ggplot(aes(Proportions, Predictions)) + 
  geom_point() +
  theme_bw() +
  theme(axis.text = element_text(colour = "black")) +
  labs(title = paste0("r2 = ", formatC(r2.all, digits = 3)), y = "Predicted proportions (%)", x = "Proportions (%)") +
  geom_abline(intercept = 0, slope = 1, colour = "firebrick")

p

Extended Data Figure 5h

See Figure 3e for model setup

# Use Cacoa to calculate proportions
cao <- Cacoa$new(data.object = con.major, 
                 cell.groups = anno.major, 
                 ref.level = "vis2", 
                 target.level = "vis3", 
                 sample.groups = con.major$samples %>% 
                   names() %>% 
                   strsplit("_") %>% 
                   sapply('[[', 3) %>% 
                   gsub(1, 2, .) %>% 
                   `names<-`(con.major$samples %>% 
                               names()),
                 sample.groups.palette = ggsci::pal_jama()(7))

cao$sample.groups <- con.major$samples %>% 
  names() %>% 
  strsplit("_") %>% 
  sapply('[[', 3) %>% 
  `names<-`(con.major$samples %>% 
              names()) %>% 
  gsub("vis", "Visit ", .)

anno.comb <- anno.major[!names(anno.major) %in% names(anno.immune)] %>% 
  {factor(c(., anno.immune))}

# Get proportions
proportions <- cao$.__enclos_env__$private$extractCodaData(cell.groups = anno.comb, ret.groups = F) %>% 
  {100 * ./rowSums(.)} %>% 
  as.data.frame() %$% 
  setNames(LAM, rownames(.))

tmp <- qread("LAM_model.qs")

lasso_model <- tmp$lasso_model
best_lambda <- tmp$lasso_cv$lambda.min

cm.bulk.mat <- cm.bulk %>% 
  as.data.frame() %>% 
  filter(rownames(.) %in% rownames(coef(lasso_model, s = best_lambda))) %>% 
  as.matrix() %>% 
  t()

# Now, train a final model on all data for visualization and gene selection
cm.test <- cm.bulk.mat %>% 
  .[rownames(.) %in% names(proportions), ]

lasso_cv <- cv.glmnet(cm.test, proportions, alpha = 1, nfolds = 40)

## Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
## fold

best_lambda <- lasso_cv$lambda.min
lasso_model <- glmnet(cm.test, proportions, alpha = 1, lambda = best_lambda)

# Predict on all samples using the final model
lasso.predict.all <- predict(lasso_model, newx = cm.test, s = best_lambda)
r2.all <- cor(proportions, lasso.predict.all)^2

data.frame(Proportions = proportions,
           Predictions = unname(lasso.predict.all)) %>% 
  ggplot(aes(Proportions, Predictions)) + 
  geom_point() +
  theme_bw() +
  theme(axis.text = element_text(colour = "black")) +
  labs(title = paste0("r2 = ", formatC(r2.all, digits = 3)), y = "Predicted proportions (%)", x = "Proportions (%)") +
  geom_abline(intercept = 0, slope = 1, colour = "firebrick")

Extended Data Figure 5i

watlas_data <- read.delim("watlas_mapped_percent.tsv", stringsAsFactors = FALSE)
watlas_data_count <- read.delim("watlas_mapped_counts.tsv", stringsAsFactors = FALSE)
watlas_data_count <- watlas_data_count %>%
  rowwise() %>%
  mutate(row_sum = sum(c_across(where(is.numeric)), na.rm = TRUE))  # Sum across numeric columns
valid_samples <- watlas_data_count %>%
  filter(row_sum >= 1000) %>%
  pull(sample)  # Extracts the valid sample IDs

filtered_data <- watlas_data %>%
  filter(type == "sn", tissue == "AT", depot == "SAT", grepl("Hs", sample))
filtered_data <- filtered_data %>%
  filter(sample %in% valid_samples)

long_data_mac <- filtered_data %>%
  select(wtstatus, ATM, Early.LAM, LAM, mono.mac) %>%  # Select relevant columns
  tidyr::pivot_longer(cols = -wtstatus, names_to = "Cell_Type", values_to = "Percentage")

long_data_mac %>% 
  ggplot(aes(x = Cell_Type, y = Percentage, fill = wtstatus)) +
  geom_boxplot(position = position_dodge(width = 0.8)) + 
  geom_point(aes(group = wtstatus), position = position_jitterdodge(jitter.width = 0.3, dodge.width = 0.8), alpha = 0.3, ) +
  theme_bw() +
  labs(title = "Comparison of Cell Types in Lean vs Obese",
       x = "Cell Type",
       y = "Percentage",
       fill = "") +
  scale_fill_manual(values = c(lean = "#26547c", obese = "#ef476f")) +
  stat_compare_means(aes(group = wtstatus), method = "wilcox.test", label = "p.signif", ) +
  theme(axis.text = element_text(color = "black"),
        line = element_blank())

Extended Data Figure 5j

We integrated our data with the data from Hinte et al. that were graciously provided by the authors. As these data are not public, here we will only show how we plotted the data.

tmp <- con.integration$clusters$loft_transfer_minor$groups %>%
  {data.frame(cid = names(.), anno = unname(.))} %>% 
  filter(!grepl("!!", cid)) %>% 
  mutate(sample = con.integration$clusters$hinte_condition$groups[match(cid, names(con.integration$clusters$hinte_condition$groups))])

tmp.sum <- tmp %>% 
  group_by(sample) %>% 
  summarize(total = n()) %>% 
  split(., .$sample)

tmp.sum.ct <- tmp %>% 
  group_by(sample, anno) %>% 
  summarize(total = n()) %>% 
  split(., .$sample)

## `summarise()` has grouped output by 'sample'. You can override using the
## `.groups` argument.

tmp.prop <- Map(\(ct, tot, an) setNames(ct$total / tot$total, an), ct = tmp.sum.ct, tot = tmp.sum, an = lapply(tmp.sum.ct, pull, anno))

df.plot <- tmp.prop %>%
  {Map(\(nn, x) data.frame(sample = nn, anno = names(x), value = unname(x)), nn = names(.), x = .)} %>% 
  bind_rows() %>% 
  mutate(anno = factor(anno, levels = unique(anno)),
         type = sapply(sample, \(x) if (grepl("lean", x)) "Lean" else if (grepl("t0", x)) "Obese" else "Regressed"))

cts <- c("ATM", "mono/mac", "Early LAM", "LAM")

df.plot %>% 
  filter(anno %in% cts) %>% 
  mutate(anno = factor(anno, levels = cts)) %>% 
  ggplot(aes(anno, value)) + 
  stat_summary(mapping = aes(fill = type), geom = "bar", fun = "mean", position = "dodge") +
  geom_point(aes(col = type), position = position_dodge(width = 0.9)) +
  theme_bw() +
  labs(x = "", y = "% proportion per sample", title = "Loft to Hinte transfer, minor annotation", col = "", fill = "") +
  theme(axis.text = element_text(colour = "black"),
        line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
  scale_fill_manual(values = brewer.pal(9, "Greens")[c(3, 6, 9)]) +
  scale_color_manual(values = brewer.pal(9, "Purples")[c(9, 7, 5)])

Extended Data Figure 5k

Preparations

Load data for S3H-J first !!!

cts <- c("Early LAM", "LAM", "mono/mac", "ATM")

cm.merged <- con$getJointCountMatrix(raw = T)

# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[rownames(cm.merged)]
anno.subtype <- anno.immune[anno.immune %in% cts] %>% 
  .[!is.na(.)] %>% 
  factor()

idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())

anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>% 
  {`names<-`(as.character(.), names(.))}

anno.final <- paste0(anno.donor,"!!",anno.subtype) %>% 
  `names<-`(anno.donor %>% names()) %>%
  gsub("Foam-like Mac", "Early LAM", .)

cm.pseudo.tmp <- sccore::collapseCellsByType(cm.merged,
                                             groups = anno.final, min.cell.count = 1) %>% 
  t() %>%
  apply(2, as, "integer")

cm.pseudo <- cm.pseudo.tmp %>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("_|!!") %>% 
                                   sget(3) %>% 
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(cm.pseudo.tmp), "group")), 
                                 design = ~ group) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = T) %>% 
  t()

## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors

plot.dat <- cm.pseudo %>% 
  .[, colnames(.) %in% c("HLA-B")] %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column(var = "sample") %>% 
  mutate(visit = strsplit(sample, "!!|_") %>% 
           sget(3) %>% 
           factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
         anno = strsplit(sample, "!!") %>% 
           sget(2) %>% 
           factor(levels = c("ATM", "mono/mac", "Early LAM", "LAM")),
         donor = strsplit(sample, "!!") %>% 
           sget(1)) %>% 
  reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
  mutate(sex = meta$sex[match(donor, meta$sample)]) %>% 
  mutate(sex_visit = paste(sex, visit, sep = " "))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$anno)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>% 
  arrange(anno) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
  group_by(anno, sex) %>%
  wilcox_test(value~sex_visit, paired = F) %>%
  mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>% 
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)

plot.dat %>% 
  ggplot(aes(anno, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Normalized pseudobulk expression", title = "HLA-B") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 400, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) # Wilcoxon

Extended Data Figure 5l

Load and prepare

cm.merged.raw <- con.integration$getJointCountMatrix(raw = T)
cts <- c("ATM", "mono/mac", "Early LAM", "LAM")
anno <- getConosCluster(con.integration, "loft_transfer_minor")
anno.subtype <- anno[anno %in% cts]

# Create sample-wise annotation
anno.donor <- con.integration$getDatasetPerCell()[rownames(cm.merged.raw)] %>% 
  .[!grepl("!!", names(.))] %>% 
  factor()

idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())

anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>% 
  {`names<-`(as.character(.), names(.))}

anno.final <- paste0(anno.donor,"!!",anno.subtype) %>% 
  `names<-`(anno.donor %>% names())

cm.pseudo.tmp <- sccore::collapseCellsByType(cm.merged.raw,
                                             groups = anno.final, min.cell.count = 1) %>% 
  t() %>%
  apply(2, as, "integer")

cm.pseudo <- cm.pseudo.tmp %>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("!!") %>% 
                                   sget(1) %>%
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(cm.pseudo.tmp), "group")), 
                                 design = ~ group) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = TRUE)

## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors

idx <- cm.pseudo %>% 
  colnames() %>% 
  data.frame(id = .) %>% 
  mutate(type = strsplit(id, "_|!!") %>% 
           sapply(\(x) if (length(x) == 3) x[2] else x[1]),
         ct = strsplit(id, "!!") %>% sget(2) %>% 
           factor(levels = cts)) %>% 
  mutate(ord = order(ct, type))

genes.order <- read.delim("Macrophages_DEG_heatmap_genes.tsv") %$% 
  setNames(gene, cluster) %>%
  .[order(names(.))]

x <- cm.pseudo %>% 
  .[genes.order, ] %>%
  .[rowSums(.) > 0, idx$ord] %>% 
  Matrix::t() %>%
  scale() %>%
  Matrix::t()

row.order <- names(genes.order)[genes.order %in% rownames(x)]

groups <- colnames(x) %>%
  `names<-`(colnames(x))

# Limit scale
x[x > 2] <- 2
x[x < -1.5] <- -1.5

# Create top annotation
tannot <- idx %$%
  .[match(colnames(x), id), ] %>% 
  select(type) %>% 
  dplyr::rename(Type = type) %$% 
  {HeatmapAnnotation(df=.,
                     border=T,
                     col=list(Type = setNames(brewer.pal(9, "Greens")[c(3, 6, 9)], unique(Type))), 
                     show_legend=T)}

# Create color palette
pal <- colorRampPalette(c('navy','grey95','firebrick'))(1024)
labeled.gene.subset <- NULL

# Plot
set.seed(1337)

ha <- ComplexHeatmap::Heatmap(x, 
                              name='Expression', 
                              col=pal, 
                              cluster_columns=FALSE, 
                              cluster_rows = F,
                              show_row_names=FALSE, 
                              show_column_names=FALSE, 
                              top_annotation=tannot,
                              left_annotation=NULL,
                              border=TRUE,
                              show_column_dend = FALSE, 
                              show_row_dend = FALSE,
                              row_split = row.order,
                              # row_km = 5,
                              # row_km_repeats = 500,
                              column_split = idx %$% 
                                .[match(colnames(x), id), ] %>% 
                                pull(ct) %>% 
                                factor(levels = cts) %>% 
                                levels() %>%
                                {c("C1", rep(.[1], 4), "C2", rep(.[2], 4), "C3", rep(.[3], 4), "C4", rep(.[4], 4))} %>%
                                factor(., levels = unique(.)),
                              column_gap = unit(rep(0:1, 4), "mm") %>% .[-length(.)],
                              border_gp = gpar(col = "black", lwd = 1.5))

ht = draw(ha)

Extended Data Figure 5m

We integrated our data with the data from Hinte et al. that were graciously provided by the authors. As these data are not public, here we will only show how we plotted the data.

cts <- c("Early LAM", "LAM", "mono/mac", "ATM")
anno <- getConosCluster(con.integration, "loft_transfer_minor") %>% 
  .[. %in% cts] %>% 
  factor()
cm.clean <- con.integration$getJointCountMatrix(raw = T) %>%
  .[rownames(.) %in% names(anno), ]
spc <- con.integration$getDatasetPerCell()

markers <- con$getDifferentialGenes(groups = anno, 
                                    z.threshold = 1, 
                                    upregulated.only = TRUE, 
                                    append.specificity.metrics = TRUE, 
                                    append.auc = TRUE)

## Estimating marker genes per sample

## Aggregating marker genes

## Estimating specificity metrics

## All done!

genes <- markers$LAM %>% 
  filter(AUC > 0.8) %>% 
  pull(Gene)

lam.scores <- list(LAM.activity = genes)
seu_obj <- CreateSeuratObject(counts = cm.clean %>% Matrix::t() %>% drop0(),
                              min.cells = 0, 
                              min.features = 0)
m.scores <- UCell::AddModuleScore_UCell(seu_obj, features = lam.scores, ncores = 32)

tmp <- reshape2::melt(m.scores$LAM.activity_UCell) %>% 
  mutate(anno = anno[rownames(.)],
         group = spc[rownames(.)] %>% as.character() %>% strsplit("_") %>% sapply(\(x) if (length(x) == 3) x[3] else if (length(x) == 2) paste(x[1], x[2], sep = "_") else x[1])) %>% 
  mutate(type = grepl.replace(group, unique(group), c("obese", "short-term WL", "regressed", "lean", "obese", "regressed", "obese", "regressed"))) %>% 
  mutate(anno = anno[rownames(.)] %>% factor(levels = c("ATM", "mono/mac", "Early LAM", "LAM"))) %>% 
  mutate(type_group = paste(type, group, sep = "_") %>% factor(levels = c("obese_rg_t0", "obese_st_t0", "obese_vis1", "short-term WL_vis2", "lean_lean", "regressed_rg_t1", "regressed_st_t1", "regressed_vis3"))) %>% 
  group_by(group, type, anno) %>%
  summarize(value = mean(value)) %>%
  mutate(group = factor(group)) %>% 
  arrange(group, anno)

## `summarise()` has grouped output by 'group', 'type'. You can override using the
## `.groups` argument.

dat.plot1 <- tmp %>% 
  filter(!group %in% c("lean")) %$% 
  split(., type) %>% 
  {rbind(data.frame(.[[2]], diff = .[[2]]$value - .[[1]]$value), data.frame(.[[3]], diff = .[[3]]$value - .[[1]]$value[.[[1]]$group == "vis1"]))}  %>%
  mutate(group = factor(group, labels = c("Hinte_rg", "Hinte_st", "Loft_short-term WL", "Loft_long-term WL"))) %>% 
  filter(group %in% c("Hinte_rg", "Hinte_st")) %>% 
  mutate(group = factor(group),
         group2 = "t1-t0")

dat.plot2 <- tmp %>% 
  filter(group %in% c("lean", "rg_t0", "st_t0")) %>% 
  mutate(group = factor(group)) %$% 
  split(., group) %>% 
  {rbind(data.frame(.[[2]], diff = .[[1]]$value - .[[2]]$value), data.frame(.[[3]], diff = .[[1]]$value - .[[3]]$value))} %>%
  mutate(group = factor(group, labels = c("lean_rg", "lean_st"))) %>%
  mutate(group = factor(group),
         group2 = "lean-t0")

dat.plot <- rbind(dat.plot1, dat.plot2) %>% 
  mutate(group = as.character(group) %>% strsplit("_") %>% sget(2) %>% factor())

dat.plot %>% 
  ggplot(aes(anno, diff, fill = group2)) +
  stat_summary(geom = "bar", fun = "mean", position = "dodge") +
  geom_point(aes(col = group, group = group2), position = position_dodge(width = 0.8)) +
  geom_hline(yintercept = 0, color = "black") +
  theme_bw() +
  labs(x = "", y = "Difference in mean UCell module scores for\ntop gene marker expression", title = "LAM top markers module scores", fill = "Difference", col = "Dataset") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c("grey70", "grey40")) +
  scale_color_manual(values = brewer.pal(9, "Purples")[c(5,9)])

Extended Data Figure 6

Extended Data Figure 6a-c

Load data

con <- qread("con_aspc.qs", nthreads = 10)
anno.aspc <- qread("anno_aspc.qs")

spc <- con$getDatasetPerCell()

Extended Data Figure 6a

varToPlot <- spc %>% 
  as.character() %>% 
  strsplit("_vis") %>% 
  sget(1) %>% 
  setNames(names(spc)) %>% 
  factor()

con$plotGraph(groups = varToPlot, 
              plot.na = F, 
              size = 0.3,
              alpha = 0.1, 
              embedding = "largeVis",
              mark.groups = F,
              show.labels = T,
              show.legend = T,
              title = "Donor") +
  labs(x = "largeVis1", y = "largeVis2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3)

Extended Data Figure 6b

var <- meta$sex

varToPlot <- grepl.replace(spc %>% 
                             as.character(), 
                           meta$sample, 
                           var) %>% 
  setNames(names(spc)) %>% 
  factor(labels = unique(var))

con$plotGraph(groups = varToPlot, 
              plot.na = F, 
              size = 0.1,
              alpha = 0.1, 
              embedding = "largeVis",
              mark.groups = F,
              show.labels = T,
              show.legend = T,
              title = "Sex") +
  labs(x = "largeVis1", y = "largeVis2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3) +
  scale_color_manual(values = c("firebrick", "navy"))

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Extended Data Figure 6c

varToPlot <- spc %>% 
  as.character() %>% 
  strsplit("_|!!") %>% 
  sget(3) %>% 
  setNames(names(spc)) %>% 
  factor()

con$plotGraph(groups = varToPlot, 
              plot.na = F, 
              size = 0.3,
              alpha = 0.1, 
              embedding = "largeVis",
              mark.groups = F,
              show.labels = T,
              show.legend = T,
              title = "Visit") +
  labs(x = "largeVis1", y = "largeVis2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3) +
  scale_color_manual(values = ggsci::pal_jama()(3))

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Extended Data Figure 6d

FAP_sub <- read.delim("fap.tsv")
rownames(FAP_sub)=FAP_sub$anno
FAP_sub=FAP_sub[2:5]

# Define ASPCs 
column_order <- c("FAP2","FAP1", "FAP4","FAP3")
row_order <- c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")
FAP_ordered <- FAP_sub[, rev(column_order)] # Reorder columns
FAP_ordered <- FAP_ordered[row_order, ] # Reorder rows

corrplot(t(t(FAP_ordered)), method="color", order = , col.lim=c(-1, 1.09),
         col = rev(COL2("RdBu",200)),
         is.corr = F, 
         tl.col = "black",
         tl.pos = "lower",
         outline = "#C0C0C0",
         addgrid.col= NA)

Extended Data Figure 6e

WATLAS <- read.delim("watlas_labels_l2_adipose_anno.tsv")
rownames(WATLAS)=WATLAS$X

# Define ASPCs 
column_order <- c("DPP4..FAP",  "CXCL14..FAP", "PPARG..FAP", "ICAM1..FAP")
row_order <- c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")
WATLAS_ordered <- WATLAS[, rev(column_order)] # Reorder columns
WATLAS_ordered <- WATLAS_ordered[row_order, ] # Reorder rows

corrplot(t(WATLAS_ordered), method="color", order = , col.lim=c(0.4, 1),
         col = rev(COL2("RdBu",200)),
         is.corr = F, 
         tl.col = "black",
         tl.pos = "lower",
         outline = "#C0C0C0",
         addgrid.col= NA)

Extended Data Figure 6f

Calculate

anno.minor <- qread("anno_minor.qs")

spc <- con.major$getDatasetPerCell() %>% 
  .[match(names(anno.minor), names(.))]

anno.minor.sex <- anno.minor %>% 
  data.frame(anno = ., cid = names(.), donor = unname(spc)) %>% 
  mutate(sex = meta$sex[match(donor, meta$sample)]) %>% 
  mutate(anno_sex = paste(anno, sex, sep = "_")) %$% 
  setNames(anno_sex, cid) %>% 
  factor()

# Calculate
res.minor <- c("vis1", "vis2", "vis3") %>% 
  sapply(\(vis) {
    con <- con.major$samples %>% 
      .[!grepl(vis, names(.))] %>%
      Conos$new()
    
    if (vis == "vis1") {
      vis.ref <- "vis2"
      vis.target <- "vis3" 
    } else {
      vis.ref <- "vis1"
      if (vis == "vis2") {
        vis.target <- "vis3"
      } else {
        vis.target <- "vis2"
      }
    }
    
    sample.groups <- con$samples %>%
      names() %>%
      `names<-`(ifelse(grepl(vis.ref, .), vis.ref, vis.target), .)
    
    cao <- Cacoa$new(con,
                     sample.groups,
                     anno.minor.sex,
                     ref.level = vis.ref,
                     target.level = vis.target,
                     n.cores = 32)
    
    cao$estimateCellLoadings()
    
    return(cao)
  }, simplify = F, USE.NAMES = T)

Plot

aspc.pal <- RColorBrewer::brewer.pal(5, "Greens")[-1][c(2,1,3,4)] %>% 
  setNames(levels(qread("/work/02_data/09_export/anno_aspc.qs"))) %>% 
  {setNames(c(rep(., 3)), c(names(.), paste(names(.), "Female", sep = "_"), paste(names(.), "Male", sep = "_")))}

visits <- c("vis1", "vis2", "vis3")

p <- visits %>% 
  lapply(\(vis) {
    comp <- visits[!visits == vis]
    
    dat.p <- res.minor[[vis]]$test.results$coda$padj
    
    dat.plot <- res.minor[[vis]]$test.results$coda$loadings %>% 
      tibble::rownames_to_column(var = "anno") %>% 
      reshape2::melt(id.var = "anno") %>% 
      filter(anno %in% names(aspc.pal))
    
    yvar <- dat.plot %>%
      group_by(anno) %>%
      summarize(var = mean(value)) %>%
      mutate(padj = dat.p[match(anno, names(dat.p))]) %>%
      arrange(padj, desc(abs(var)))
    
    dat.plot %<>% 
      mutate(anno = factor(anno, levels = rev(unique(yvar$anno))))
    
    ymax <- dat.plot %>%
      pull(value) %>%
      abs() %>%
      max()
    
    out <- dat.plot %>% 
      ggplot(aes(anno, value, fill = anno)) + 
      geom_hline(yintercept = 0, col = "black") +
      geom_jitter(alpha = 0.2, size = 0.3, col = "grey") +
      geom_boxplot(outliers = F) +
      coord_flip() +
      ylim(c(-ymax, ymax)) +
      theme_bw() + 
      theme(line = element_blank(),
            axis.text = element_text(color = "black")) +
      guides(fill = "none") +
      labs(title = paste0(comp[1], " vs ", comp[2]), x = "", y = "Loadings [AU]") +
      scale_fill_manual(values = aspc.pal)
    
    return(out)
  })

p

## [[1]]

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

## 
## [[2]]

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

## 
## [[3]]

Statistics

for (vis in visits) {
  comp <- visits[!visits == vis]
  message("Comparison: ", comp)
  print(res.minor[[vis]]$test.results$coda$padj %>% .[names(.) %in% names(aspc.pal)])
}

## Comparison: vis2vis3

##    ASPC_PPARG_Male    ASPC_EPHA3_Male   ASPC_DPP4_Female  ASPC_EPHA3_Female 
##          0.1038961          0.1038961          0.8524809          0.9595111 
##  ASPC_PPARG_Female ASPC_CXCL14_Female     ASPC_DPP4_Male   ASPC_CXCL14_Male 
##          0.9376623          0.9400124          0.9439072          0.9595111

## Comparison: vis1vis3

##   ASPC_DPP4_Female     ASPC_DPP4_Male  ASPC_PPARG_Female    ASPC_PPARG_Male 
##         0.00944510         0.00944510         0.02770563         0.03055768 
##  ASPC_EPHA3_Female    ASPC_EPHA3_Male   ASPC_CXCL14_Male ASPC_CXCL14_Female 
##         0.04155844         0.08500590         0.57448434         0.73008073

## Comparison: vis1vis2

##   ASPC_DPP4_Female     ASPC_DPP4_Male  ASPC_PPARG_Female    ASPC_EPHA3_Male 
##          0.2077922          0.2077922          0.4363636          0.5085441 
##  ASPC_EPHA3_Female ASPC_CXCL14_Female    ASPC_PPARG_Male   ASPC_CXCL14_Male 
##          0.7713499          0.8410637          0.8255528          0.9256198

Extended Data Figure 6g

See Figure 4d for model setup

# Use Cacoa to calculate proportions
cao <- Cacoa$new(data.object = con.major, 
                 cell.groups = anno.major, 
                 ref.level = "vis2", 
                 target.level = "vis3", 
                 sample.groups = con.major$samples %>% 
                   names() %>% 
                   strsplit("_") %>% 
                   sapply('[[', 3) %>% 
                   gsub(1, 2, .) %>% 
                   `names<-`(con.major$samples %>% 
                               names()),
                 sample.groups.palette = ggsci::pal_jama()(7))

cao$sample.groups <- con.major$samples %>% 
  names() %>% 
  strsplit("_") %>% 
  sapply('[[', 3) %>% 
  `names<-`(con.major$samples %>% 
              names()) %>% 
  gsub("vis", "Visit ", .)

anno.comb <- anno.major[!names(anno.major) %in% names(anno.aspc)] %>% 
  {factor(c(., anno.aspc))}

# Get proportions
proportions <- cao$.__enclos_env__$private$extractCodaData(cell.groups = anno.comb, ret.groups = F) %>% 
  {100 * ./rowSums(.)} %>% 
  as.data.frame() %$% 
  setNames(ASPC_DPP4, rownames(.))

tmp <- qread("DPP4_model.qs")

lasso_model <- tmp$lasso_model
best_lambda <- tmp$lasso_cv$lambda.min

cm.bulk.mat <- cm.bulk %>% 
  as.data.frame() %>% 
  filter(rownames(.) %in% rownames(coef(lasso_model, s = best_lambda))) %>% 
  as.matrix() %>% 
  t()

# Now, train a final model on all data for visualization and gene selection
cm.test <- cm.bulk.mat %>% 
  .[rownames(.) %in% names(proportions), ]

lasso_cv <- cv.glmnet(cm.test, proportions, alpha = 1, nfolds = 40)

## Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
## fold

best_lambda <- lasso_cv$lambda.min
lasso_model <- glmnet(cm.test, proportions, alpha = 1, lambda = best_lambda)

# Predict on all samples using the final model
lasso.predict.all <- predict(lasso_model, newx = cm.test, s = best_lambda)
r2.all <- cor(proportions, lasso.predict.all)^2

data.frame(Proportions = proportions,
           Predictions = unname(lasso.predict.all)) %>% 
  ggplot(aes(Proportions, Predictions)) + 
  geom_point() +
  theme_bw() +
  theme(axis.text = element_text(colour = "black")) +
  labs(title = paste0("r2 = ", formatC(r2.all, digits = 3)), y = "Predicted proportions (%)", x = "Proportions (%)") +
  geom_abline(intercept = 0, slope = 1, colour = "firebrick")

Extended Data Figure 7

Extended Data Figure 7a,d

Prepare data

cm.merged <- con$getJointCountMatrix(raw = T)

# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[rownames(cm.merged)]
anno.subtype <- anno.aspc %>% 
  .[!is.na(.)] %>% 
  factor()

idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())

anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>% 
  {`names<-`(as.character(.), names(.))}

anno.final <- paste0(anno.donor,"!!",anno.subtype) %>% 
  `names<-`(anno.donor %>% names())

cm.pseudo.tmp <- sccore::collapseCellsByType(cm.merged,
                                             groups = anno.final, min.cell.count = 1) %>% 
  t() %>%
  apply(2, as, "integer")

cm.pseudo <- cm.pseudo.tmp %>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("_|!!") %>% 
                                   sget(3) %>% 
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(cm.pseudo.tmp), "group")), 
                                 design = ~ group) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = T) %>% 
  t()

## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors

Extended Data Figure 7a

gene = "HSP90B1"

plot.dat <- cm.pseudo %>% 
  .[, colnames(.) %in% gene] %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column(var = "sample") %>% 
  mutate(visit = strsplit(sample, "!!|_") %>% 
           sget(3) %>% 
           factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
         anno = strsplit(sample, "!!") %>% 
           sget(2) %>% 
           factor(levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")),
         donor = strsplit(sample, "!!") %>% 
           sget(1)) %>% 
  reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
  mutate(sex = meta$sex[match(donor, meta$sample)]) %>% 
  mutate(sex_visit = paste(sex, visit, sep = " "))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$anno)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>% 
  arrange(anno) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  group_by(anno, sex) %>%
  wilcox_test(value~sex_visit, paired = T) %>% 
  mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>% 
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)

plot.dat %>% 
  ggplot(aes(anno, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Normalized pseudobulk expression", title = gene) +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 300, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) + # Wilcoxon
  guides(fill = "none")

Extended Data Figure 7d

gene = "JUN"

plot.dat <- cm.pseudo %>% 
  .[, colnames(.) %in% gene] %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column(var = "sample") %>% 
  mutate(visit = strsplit(sample, "!!|_") %>% 
           sget(3) %>% 
           factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
         anno = strsplit(sample, "!!") %>% 
           sget(2) %>% 
           factor(levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")),
         donor = strsplit(sample, "!!") %>% 
           sget(1)) %>% 
  reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
  mutate(sex = meta$sex[match(donor, meta$sample)]) %>% 
  mutate(sex_visit = paste(sex, visit, sep = " "))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$anno)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>% 
  arrange(anno) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  group_by(anno, sex) %>%
  wilcox_test(value~sex_visit, paired = T) %>% 
  mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>% 
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)

plot.dat %>% 
  ggplot(aes(anno, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Normalized pseudobulk expression", title = gene) +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 600, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) + # Wilcoxon
  guides(fill = "none")

gene = "FOS"

plot.dat <- cm.pseudo %>% 
  .[, colnames(.) %in% gene] %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column(var = "sample") %>% 
  mutate(visit = strsplit(sample, "!!|_") %>% 
           sget(3) %>% 
           factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
         anno = strsplit(sample, "!!") %>% 
           sget(2) %>% 
           factor(levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")),
         donor = strsplit(sample, "!!") %>% 
           sget(1)) %>% 
  reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
  mutate(sex = meta$sex[match(donor, meta$sample)]) %>% 
  mutate(sex_visit = paste(sex, visit, sep = " "))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$anno)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>% 
  arrange(anno) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  group_by(anno, sex) %>%
  wilcox_test(value~sex_visit, paired = F) %>% 
  mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>% 
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)

plot.dat %>% 
  ggplot(aes(anno, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Normalized pseudobulk expression", title = gene) +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 2e3, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) + # Wilcoxon
  guides(fill = "none")

gene = "FOSB"

plot.dat <- cm.pseudo %>% 
  .[, colnames(.) %in% gene] %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column(var = "sample") %>% 
  mutate(visit = strsplit(sample, "!!|_") %>% 
           sget(3) %>% 
           factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
         anno = strsplit(sample, "!!") %>% 
           sget(2) %>% 
           factor(levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")),
         donor = strsplit(sample, "!!") %>% 
           sget(1)) %>% 
  reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
  mutate(sex = meta$sex[match(donor, meta$sample)]) %>% 
  mutate(sex_visit = paste(sex, visit, sep = " "))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$anno)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>% 
  arrange(anno) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  group_by(anno, sex) %>%
  wilcox_test(value~sex_visit, paired = T) %>% 
  mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>% 
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)

plot.dat %>% 
  ggplot(aes(anno, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Normalized pseudobulk expression", title = gene) +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 1.6e3, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) + # Wilcoxon
  guides(fill = "none")

Extended Data Figure 7b

cts <- c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")
anno <- getConosCluster(con.integration, "loft_transfer_minor")
anno.subtype <- anno[anno %in% cts]

# Create sample-wise annotation
anno.donor <- con.integration$getDatasetPerCell()[rownames(cm.merged.raw)] %>% 
  .[!grepl("!!", names(.))] %>% 
  factor()

idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())

anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>% 
  {`names<-`(as.character(.), names(.))}

anno.final <- paste0(anno.donor,"!!",anno.subtype) %>% 
  `names<-`(anno.donor %>% names())

cm.pseudo.tmp <- sccore::collapseCellsByType(cm.merged.raw,
                                             groups = anno.final, min.cell.count = 1) %>% 
  t() %>%
  apply(2, as, "integer")

cm.pseudo <- cm.pseudo.tmp %>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("!!") %>% 
                                   sget(1) %>%
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(cm.pseudo.tmp), "group")), 
                                 design = ~ group) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = TRUE)

## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors

idx <- cm.pseudo %>% 
  colnames() %>% 
  data.frame(id = .) %>% 
  mutate(type = strsplit(id, "_|!!") %>% 
           sapply(\(x) if (length(x) == 4) x[2] else x[1]),
         ct = strsplit(id, "!!") %>% sget(2) %>% 
           factor(levels = cts)) %>% 
  mutate(ord = order(ct, type))

genes.order <- read.delim("ASPC_DEG_heatmap_genes.tsv") %$% 
  setNames(gene, cluster) %>%
  .[order(names(.))]

x <- cm.pseudo %>% 
  .[genes.order, ] %>%
  .[rowSums(.) > 0, idx$ord] %>% 
  Matrix::t() %>%
  scale() %>%
  Matrix::t()

row.order <- names(genes.order)[genes.order %in% rownames(x)]

groups <- colnames(x) %>%
  `names<-`(colnames(x))

# Limit scale
x[x > 2] <- 2
x[x < -1.5] <- -1.5

# Create top annotation
tannot <- idx %$%
  .[match(colnames(x), id), ] %>% 
  select(type) %>% 
  dplyr::rename(Type = type) %$% 
  {HeatmapAnnotation(df=.,
                     border=T,
                     col=list(Type = setNames(brewer.pal(9, "Greens")[c(3, 6, 9)], unique(Type))), 
                     show_legend=T)}

# Create color palette
pal <- colorRampPalette(c('navy','grey95','firebrick'))(1024)
labeled.gene.subset <- NULL

# Plot
set.seed(1337)

ha <- ComplexHeatmap::Heatmap(x, 
                              name='Expression', 
                              col=pal, 
                              cluster_columns=FALSE, 
                              cluster_rows = F,
                              show_row_names=FALSE, 
                              show_column_names=FALSE, 
                              top_annotation=tannot,
                              left_annotation=NULL,
                              border=TRUE,
                              show_column_dend = FALSE, 
                              show_row_dend = FALSE,
                              row_split = row.order,
                              column_split = idx %$% 
                                .[match(colnames(x), id), ] %>% 
                                pull(ct) %>% 
                                factor(levels = cts) %>% 
                                levels() %>%
                                {c("C1", rep(.[1], 4), "C2", rep(.[2], 4), "C3", rep(.[3], 4), "C4", rep(.[4], 4))} %>%
                                factor(., levels = unique(.)),
                              column_gap = unit(rep(0:1, 4), "mm") %>% .[-length(.)],
                              border_gp = gpar(col = "black", lwd = 1.5))

ht = draw(ha)

Extended Data Figure 7c

# We extracted genes from the selected GO term using this snippet
# genes.go <- clusterProfiler::bitr("GO:0097193", 
#                                   fromType="GOALL", 
#                                   toType="SYMBOL", 
#                                   OrgDb='org.Hs.eg.db')$SYMBOL %>% 
#   unique()

# Then, we intersected with the DEG genes plotted in our heatmap. The resulting genes are the following:
genes <- c("CLU", "CRIP1", "GRINA", "HNRNPK", "MMP2", "P4HB", "CXCL12", "TMBIM6", "ARL6IP5")

cts <- c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")
anno <- getConosCluster(con.integration, "loft_transfer_minor")
anno.subtype <- anno[anno %in% cts] %>% 
  factor()
cm.merged.sub <- cm.merged.raw %>% 
  .[rownames(.) %in% names(anno.subtype), ]

cm.clean <- cm.merged.sub[rownames(cm.merged.sub) %in% names(anno.subtype), ]
lam.scores <- list(LAM.activity = genes)
seu_obj <- CreateSeuratObject(counts = cm.clean %>% Matrix::t() %>% drop0(),
                              min.cells = 0, 
                              min.features = 0)
m.scores <- UCell::AddModuleScore_UCell(seu_obj, features = lam.scores, ncores = 32)

Prepare

cm.sub <- m.scores$LAM.activity_UCell

# Create sample-wise annotation
anno.donor <- con.integration$getDatasetPerCell()[names(cm.sub)]
anno.subtype <- anno.subtype %>% 
  .[!is.na(.)] %>% 
  factor()

idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())

anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>% 
  {`names<-`(as.character(.), names(.))}

anno.final <- paste0(anno.donor,"!!",anno.subtype) %>% 
  `names<-`(anno.donor %>% names())

# Prepare plot
plot.tmp <- cm.sub %>% 
  data.frame(cid = names(.), value = .) %>% 
  mutate(donor_anno = anno.final[cid]) %>% 
  mutate(donor = strsplit(donor_anno, "!!") %>% 
           sget(1),
         anno = strsplit(donor_anno, "!!") %>% 
           sget(2),
         sex = meta$sex[match(donor, meta$sample)]) %>% 
  mutate(visit = strsplit(donor, "_") %>% 
           sapply(\(x) if (length(x) == 3) x[3] else if (length(x) == 2) x[2] else x[1]),
         type = strsplit(donor, "_") %>% sget(1)) %>% 
  mutate(sex_visit = paste(sex, visit, sep = "_") %>% 
           gsub("NA_", "", .) %>% 
           factor(levels = c("Female_vis1", "Female_vis2", "Female_vis3", "Male_vis1", "Male_vis2", "Male_vis3", "lean", "t0", "t1"))) %>% 
  group_by(donor_anno, donor, anno, sex, sex_visit, type, visit) %>% 
  summarize(value = mean(value))

## `summarise()` has grouped output by 'donor_anno', 'donor', 'anno', 'sex',
## 'sex_visit', 'type'. You can override using the `.groups` argument.

plot.tmp %>% 
  filter(sex_visit %in% c("lean", "t0", "t1")) %>% 
  mutate(anno = factor(anno, levels = cts)) %>% 
  ggplot(aes(anno, value, fill = sex_visit)) +
  stat_summary(geom = "bar", fun = "mean", position = "dodge") +
  geom_point(aes(col = type, group = sex_visit), position = position_dodge(width = 0.9)) +
  theme_bw() + 
  theme(axis.text = element_text(color = "black"),
        line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
  labs(x = "", y = "Mean UCell module score per sample", fill = "Type", title = "'Intrinsic apoptotic signaling pathway' - cluster 1", col  = "Dataset") +
  scale_fill_manual(values = brewer.pal(9, "Greens")[c(3, 6, 9)]) +
  scale_color_manual(values = brewer.pal(9, "Purples")[c(9, 7, 5)])

Extended Data Figure 7e

See Scenic.ipynb for mat object calculation.

mat <- fastMatMR::fmm_to_mat("aspc_adi_auc_mat.mtx") %>% 
  `dimnames<-`(list(
    read.table("aspc_adi_auc_mat.rownames", header = T)[, 1],
    read.table("aspc_adi_auc_mat.colnames", header = T)[, 1]
  ))

con.major <- qread("con_major.qs", nthreads = 10)
spc <- con.major$getDatasetPerCell()
vpc <- spc %>% 
  as.character() %>% 
  strsplit("_|!!") %>% 
  sget(3) %>% 
  setNames(names(spc))

anno.adipocytes <- qread("anno_major.qs") %>% 
  .[. == "Adipocytes"] %>% 
  factor()
anno.comb <- factor(c(anno.adipocytes, anno.aspc))

plot.dat.tmp <- c("FOS(+)", "FOSB(+)") %>% # ZEB1 is just a random pick to make it easier to manipulate data, omitted later
  {mat[, colnames(mat) %in% .]} %>% 
  as.data.frame() %>%
  mutate(., 
         visit = unname(vpc)[match(rownames(.), 
                                   names(vpc) %>% 
                                     strsplit("!!") %>% 
                                     sapply(\(x) paste0(x[2],"!!",x[3]))
         )
         ],
         anno = unname(anno.comb)[match(rownames(.),
                                        names(anno.comb) %>% 
                                          strsplit("!!") %>% 
                                          sapply(\(x) paste0(x[2],"!!",x[3]))
         )
         ],
         sample = unname(spc)[match(rownames(.), 
                                    names(spc) %>% 
                                      strsplit("!!") %>% 
                                      sapply(\(x) paste0(x[2],"!!",x[3]))
         )
         ]
  ) %>% 
  filter(!is.na(anno)) %>% 
  reshape2::melt(id.vars = c("visit", "anno", "sample")) %>% 
  mutate(anno_vis = paste0(anno,"_",visit),
         anno_var = paste0(anno,"_",variable)) %>% 
  group_by(visit, anno, variable, sample) %>% 
  summarize(mm = median(value)) %>% 
  ungroup() %>% 
  filter(#variable == "JUN(+)",
    !anno == "Adipocytes") %>% 
  mutate(anno = factor(anno, levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3", "Adipocytes")),
         visit = factor(visit, levels = c("vis1", "vis2", "vis3"), labels = c("Visit 1", "Visit 2", "Visit 3")),
         sex = meta$sex[match(.$sample, meta$sample)],
         variable = factor(variable)) %>% 
  mutate(sex_visit = paste(sex, visit, sep = " ") %>% gsub("Visit", "visit", .),
         anno = factor(anno, levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")))

## `summarise()` has grouped output by 'visit', 'anno', 'variable'. You can
## override using the `.groups` argument.

plot.dat <- plot.dat.tmp %>% 
  filter(variable == "FOS(+)")

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$anno)) %>% 
  lapply(lapply, \(x) kruskal.test(x$mm, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(ct = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(ct = factor(ct, levels = unique(plot.dat$anno))) %>% 
  arrange(ct) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
  group_by(anno, sex) %>%
  wilcox_test(mm~sex_visit, paired = TRUE) %>%
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05) %>% # Adjust line position
  arrange(desc(y.position))

# Plot
ggplot(plot.dat, aes(anno, mm)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1), 
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Mean AUC", fill = "") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(ct, y = 6, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 2, hide.ns = F) # Wilcoxon

plot.dat <- plot.dat.tmp %>% 
  filter(variable == "FOSB(+)")

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$anno)) %>% 
  lapply(lapply, \(x) kruskal.test(x$mm, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(ct = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(ct = factor(ct, levels = unique(plot.dat$anno))) %>% 
  arrange(ct) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
  group_by(anno, sex) %>%
  wilcox_test(mm~sex_visit, paired = TRUE) %>%
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05) %>% # Adjust line position
  arrange(desc(y.position))

# Plot
ggplot(plot.dat, aes(anno, mm)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1), 
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Mean AUC", fill = "") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(ct, y = 6, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 2, hide.ns = F) # Wilcoxon

Extended Data Figure 7f

Here, we’re showing how we calculated regulon activity from the output files from Scenic (see Scenic.ipynb). This is not run here.

reg <- read.table("aspc_adi_macro_reg.csv", sep = ",", header = T, skip = 1) %>% 
  `colnames<-`(., c(.[1, 1:2], colnames(.)[-c(1,2)])) %>% 
  dplyr::slice(-1)

tfs <- c("FOS", "FOSB", "JUN", "JUND", "JUNB", "ZEB1", "EBF1", "PPARG", "EBF3", "EBF2")

geneSets <- reg %>% 
  filter(TF %in% tfs) %$% 
  split(., TF) %>% 
  lapply(filter, AUC == max(AUC)) %>% 
  lapply(pull, TargetGenes) %>%
  lapply(gsub, pattern = "\\[|\\(|\\'|\b'|\\]|\\)", replacement = "") %>% 
  lapply(strsplit, ", ") %>% 
  lget(1) %>% 
  lapply(\(x) x[seq(1, length(x)-1, 2)])

anno <- getConosCluster(con.integration, "loft_transfer_minor")

anno.sub <- anno %>% 
  .[. %in% c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3", "Adipocytes")]

cm.sub <- cm.merged[rownames(cm.merged) %in% names(anno.sub), ] %>% 
  Matrix::t()

auc <- AUCell_run(cm.sub, geneSets)

tmp.regulons <- auc@assays@data@listData$AUC %>%
  {lapply(split(., rownames(.)), `names<-`, colnames(.))}

qsave(tmp.regulons, "regulons.qs")
qsave(geneSets, "genesets.qs")

cm <- qread("regulons.qs")[c("FOS", "JUN", "JUND", "JUNB")] %>%
  as.data.frame()

spc.hinte <- con.integration$getDatasetPerCell() %>% 
  .[!grepl("!!", names(.))]

anno.sub <- anno %>% 
  .[. %in% c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3", "Adipocytes")]

cg <- anno.sub[!anno.sub == "Adipocytes"] %>% 
  .[match(names(spc.hinte), names(.), nomatch = F)]

spc.final <- spc.hinte[names(cg)] %>% 
  {setNames(strsplit(as.character(.), "_") %>% sapply(\(x) if (length(x) == 1) x else x[2]), names(.))}

cg.visit <- data.frame(anno = unname(cg), sample = unname(spc.final), row.names = names(cg)) %>% 
  mutate(anno_sample = paste(anno, sample, sep = "_")) %$% 
  setNames(anno_sample, rownames(.)) %>% 
  factor(levels = rev(c("ASPC_DPP4_lean", "ASPC_DPP4_t0", "ASPC_DPP4_t1", "ASPC_CXCL14_lean", "ASPC_CXCL14_t0", "ASPC_CXCL14_t1", "ASPC_PPARG_lean", "ASPC_PPARG_t0", "ASPC_PPARG_t1", "ASPC_EPHA3_lean", "ASPC_EPHA3_t0", "ASPC_EPHA3_t1")))

dotPlot(c("FOS", "JUN", "JUND", "JUNB"), cm, cg.visit, gene.order = rev(c("FOS", "JUN", "JUND", "JUNB")), ) +
  coord_flip() +
  guides(size = "none", col = "none") +
  guides(size = "legend", col = "colorbar") +
  labs(size = "Fraction of cells in group (%)", col = "Regulon activity (mean AUC)", x = "", y = "") +
  scale_color_gradient(low = "grey", high = "firebrick")

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Extended Data Figure 8

Load data

con <- qread("con_adipocytes.qs", nthreads = 10)
anno.adipocytes <- qread("anno_adipocytes_archetypes.qs")
spc <- con$getDatasetPerCell()

Extended Data Figure 8a

varToPlot <- spc %>% 
  as.character() %>% 
  strsplit("_vis") %>% 
  sget(1) %>% 
  setNames(names(spc)) %>% 
  factor()

con$plotGraph(groups = varToPlot, 
              plot.na = F, 
              size = 0.3,
              alpha = 0.1, 
              embedding = "largeVis",
              mark.groups = F,
              show.labels = T,
              show.legend = T,
              title = "Donor") +
  labs(x = "largeVis1", y = "largeVis2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3)

Extended Data Figure 8b

var <- meta$sex

varToPlot <- grepl.replace(spc %>% 
                             as.character(), 
                           meta$sample, 
                           var) %>% 
  setNames(names(spc)) %>% 
  factor(labels = unique(var))

con$plotGraph(groups = varToPlot, 
              plot.na = F, 
              size = 0.3,
              alpha = 0.1, 
              embedding = "largeVis",
              mark.groups = F,
              show.labels = T,
              show.legend = T,
              title = "Sex") +
  labs(x = "largeVis1", y = "largeVis2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3) +
  scale_color_manual(values = c("firebrick", "navy"))

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Extended Data Figure 8c

varToPlot <- spc %>% 
  as.character() %>% 
  strsplit("_|!!") %>% 
  sget(3) %>% 
  setNames(names(spc)) %>% 
  factor()

con$plotGraph(groups = varToPlot, 
              plot.na = F, 
              size = 0.3,
              alpha = 0.1, 
              embedding = "largeVis",
              mark.groups = F,
              show.labels = T,
              show.legend = T,
              title = "Visit") +
  labs(x = "largeVis1", y = "largeVis2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3) +
  scale_color_manual(values = ggsci::pal_jama()(3))

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Extended Data Figure 8d

Please note, the clustering algorithm holds some randomness, clusters will not always be the same.

con$findCommunities(name = "leiden1", resolution = 0.5)
con$findCommunities(name = "leiden2", resolution = 0.5)
con$findCommunities(name = "leiden3", resolution = 0.5)
con$findCommunities(name = "leiden4", resolution = 0.5)

seq(4) %>% 
  lapply(\(x) {
    con$plotGraph(embedding = "largeVis", clustering = paste0("leiden",x), font.size = 6) +
      theme(line = element_blank()) +
      scale_color_manual(values = RColorBrewer::brewer.pal(4, "Reds")[-1])
  }) %>% 
  cowplot::plot_grid(plotlist = ., ncol = 2)

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Extended Data Figure 8e

Please note, the clustering algorithm holds some randomness, stabilities will not always be the same.

con$findCommunities(resolution = 0.5, test.stability = T, name = "l05stab")

## running 100 subsampling iterations ...

## done

## calculating flat stability stats ...

## adjusted Rand ...

## done

## calculating hierarchical stability stats ...

## upper clustering ...

## clusterTree Jaccard ...

## done

plot_grid(plotlist = list(
  con$plotClusterStability("l05stab", "ari") +
    theme_bw() +
    theme(line = element_blank(),
          axis.text.x = element_blank()) + 
    geom_boxplot(aes(col = "#E41A1C"), notch = T) +
    labs(x = " \n "),
  con$plotClusterStability("l05stab", "hjc") + 
    theme_bw() +
    theme(line = element_blank()) + 
    scale_color_manual(values = RColorBrewer::brewer.pal(4, "Reds")[-1])
), ncol = 2, rel_widths = c(1.3,3))

## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the conos package.
##   Please report the issue at <https://github.com/kharchenkolab/conos/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Extended Data Figure 8f

Please note, the clustering algorithm holds some randomness, stabilities will not always be the same.

con$findCommunities(resolution = 1, test.stability = T, name = "l1stab")

## running 100 subsampling iterations ...

## done

## calculating flat stability stats ...

## adjusted Rand ...

## done

## calculating hierarchical stability stats ...

## upper clustering ...

## clusterTree Jaccard ...

## done

plot_grid(plotlist = list(
  con$plotClusterStability("l05stab", "ari") +
    theme_bw() +
    theme(line = element_blank(),
          axis.text.x = element_blank()) + 
    geom_boxplot(aes(col = "#E41A1C"), notch = T) +
    labs(x = " \n "),
  con$plotClusterStability("l1stab", "hjc") + 
    theme_bw() +
    theme(line = element_blank()) + 
    scale_color_manual(values = RColorBrewer::brewer.pal(8, "Reds")[-1])
), ncol = 2, rel_widths = c(1.3,3))

Extended Data Figure 8g

cm.merged <- con$getJointCountMatrix()

c("PRSS23","PDE5A","ADIPOQ", "DGAT2", "GPAM", "LPL", "RORA", "CLSTN2", "CRIM1", "ITGA1") %>% 
  sccore::dotPlot(., 
                  cm.merged, 
                  getConosCluster(con, "leiden0.5"), 
                  gene.order = ., 
                  cols = c("white","red3"))

Extended Data Figure 8h-i

Preparation

cm.merged <- con$getJointCountMatrix(raw = T)

# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[rownames(cm.merged)]
anno.subtype <- anno.adipocytes %>% 
  .[!is.na(.)] %>% 
  factor()

idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())

anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>% 
  {`names<-`(as.character(.), names(.))}

anno.final <- paste0(anno.donor,"!!",anno.subtype) %>% 
  `names<-`(anno.donor %>% names())

cm.pseudo.tmp <- sccore::collapseCellsByType(cm.merged,
                                             groups = anno.final, min.cell.count = 1) %>% 
  t() %>%
  apply(2, as, "integer")

cm.pseudo <- cm.pseudo.tmp %>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("_|!!") %>% 
                                   sget(3) %>% 
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(cm.pseudo.tmp), "group")), 
                                 design = ~ group) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = T) %>% 
  t()

## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors

Extended Data Figure 8h

gene = "MAP3K5"

plot.dat <- cm.pseudo %>% 
  .[, colnames(.) %in% gene] %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column(var = "sample") %>% 
  mutate(visit = strsplit(sample, "!!|_") %>% 
           sget(3) %>% 
           factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
         anno = strsplit(sample, "!!") %>% 
           sget(2) %>% 
           factor(levels = c("DGAT2_archetype", "CLSTN2_archetype", "PRSS23_archetype")),
         donor = strsplit(sample, "!!") %>% 
           sget(1)) %>% 
  reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
  mutate(sex = meta$sex[match(donor, meta$sample)]) %>% 
  mutate(sex_visit = paste(sex, visit, sep = " "))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$anno)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>% 
  arrange(anno) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
  group_by(anno, sex) %>%
  wilcox_test(value~sex_visit, paired = F) %>%
  mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>% 
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)

plot.dat %>% 
  ggplot(aes(anno, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Normalized pseudobulk expression", title = gene) +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 200, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) + # Wilcoxon
  guides(fill = "none")

gene = "AKT3"

plot.dat <- cm.pseudo %>% 
  .[, colnames(.) %in% gene] %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column(var = "sample") %>% 
  mutate(visit = strsplit(sample, "!!|_") %>% 
           sget(3) %>% 
           factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
         anno = strsplit(sample, "!!") %>% 
           sget(2) %>% 
           factor(levels = c("DGAT2_archetype", "CLSTN2_archetype", "PRSS23_archetype")),
         donor = strsplit(sample, "!!") %>% 
           sget(1)) %>% 
  reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
  mutate(sex = meta$sex[match(donor, meta$sample)]) %>% 
  mutate(sex_visit = paste(sex, visit, sep = " "))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$anno)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>% 
  arrange(anno) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
  group_by(anno, sex) %>%
  wilcox_test(value~sex_visit, paired = F) %>%
  mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>% 
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)

plot.dat %>% 
  ggplot(aes(anno, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Normalized pseudobulk expression", title = gene) +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 100, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) + # Wilcoxon
  guides(fill = "none")

Extended Data Figure 8i

gene = "ADAMTS10"

plot.dat <- cm.pseudo %>% 
  .[, colnames(.) %in% gene] %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column(var = "sample") %>% 
  mutate(visit = strsplit(sample, "!!|_") %>% 
           sget(3) %>% 
           factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
         anno = strsplit(sample, "!!") %>% 
           sget(2) %>% 
           factor(levels = c("DGAT2_archetype", "CLSTN2_archetype", "PRSS23_archetype")),
         donor = strsplit(sample, "!!") %>% 
           sget(1)) %>% 
  reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
  mutate(sex = meta$sex[match(donor, meta$sample)]) %>% 
  mutate(sex_visit = paste(sex, visit, sep = " "))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$anno)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>% 
  arrange(anno) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
  group_by(anno, sex) %>%
  wilcox_test(value~sex_visit, paired = F) %>%
  mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>% 
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)

plot.dat %>% 
  ggplot(aes(anno, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Normalized pseudobulk expression", title = gene) +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 200, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) + # Wilcoxon
  guides(fill = "none")

Extended Data Figure 8j

First, we integrate adipocytes from both studies. This is not run here, and the data are not shared.

con.adipocytes <- qread("con_adipocytes.qs", nthreads = 10)
anno.adi <- qread("anno_adipocytes_archetypes.qs")

anno <- getConosCluster(con.integration, "loft_transfer")

anno.hinte <- anno %>%
  .[!grepl("!!", names(.))] %>%
  .[. == "Adipocytes"]

samples.processed <- con.integration$samples %>%
  .[!grepl("vis", names(.))] %>%
  lapply(conos:::getRawCountMatrix) %>%
  lapply(\(cm) cm[, colnames(cm) %in% names(anno.hinte)]) %>%
  lapply(basicP2proc, n.cores = 32, get.largevis = F, get.tsne = F, make.geneknn = F)

con.adipocytes$addSamples(samples.processed)
con.adipocytes$buildGraph()
con.adipocytes$findCommunities(resolution = 0.5)
con.adipocytes$embedGraph()
anno.archetypes <- con.adipocytes$propagateLabels(anno.adi)

qsave(con.adipocytes, "con_adipocytes_integrated.qs", nthreads = 10)
qsave(anno.archetypes, "anno_archetypes_integrated.qs")

Load final data

anno.archetypes <- qread("anno_archetypes_integrated.qs")
con.adipocytes <- qread("con_adipocytes_integrated.qs", nthreads = 10)

# We pick top1k cells per subtype based on the uncertainty parameter from the label propagation
anno.df <- anno.archetypes %$% 
  data.frame(uncertainty = unname(uncertainty),
             label = unname(labels),
             cid = names(labels)) %>% 
  filter(!grepl("!!", cid)) %$% 
  split(., label) %>% 
  lapply(arrange, uncertainty) %>% 
  lapply(dplyr::slice, seq(1e3)) %>% 
  bind_rows()

anno.archetypes.hinte <- anno.df %$% 
  setNames(label, cid) %>% 
  factor()

anno.subtype <- anno.archetypes.hinte

# Create sample-wise annotation
anno.donor <- con.integration$getDatasetPerCell()[rownames(cm.merged.raw)] %>% 
  .[!grepl("!!", names(.))] %>% 
  factor()

idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())

anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>% 
  {`names<-`(as.character(.), names(.))}

anno.final <- paste0(anno.donor,"!!",anno.subtype) %>% 
  `names<-`(anno.donor %>% names())

cm.pseudo.tmp <- sccore::collapseCellsByType(cm.merged.raw,
                                             groups = anno.final, min.cell.count = 1) %>% 
  t() %>%
  apply(2, as, "integer")

cm.pseudo <- cm.pseudo.tmp %>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("!!") %>% 
                                   sget(1) %>%
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(cm.pseudo.tmp), "group")), 
                                 design = ~ group) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = TRUE)

## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors

idx <- cm.pseudo %>% 
  colnames() %>% 
  data.frame(id = .) %>% 
  mutate(type = strsplit(id, "_|!!") %>% 
           sapply(\(x) if (length(x) == 3) x[1] else x[2]),
         ct = strsplit(id, "!!") %>% sget(2)) %>% 
  mutate(ord = order(ct, type))

# Order genereted in Figure 5c
genes.order <- read.delim("Archetypes_DEG_heatmap_genes.tsv") %$% 
  setNames(gene, cluster %>% factor(labels = seq(4, 1, -1)) %>% as.character()) %>% 
  .[order(names(.))]

x <- cm.pseudo %>% 
  .[genes.order, ] %>%
  .[rowSums(.) > 0, idx$ord] %>%
  Matrix::t() %>%
  scale() %>%
  Matrix::t()

groups <- colnames(x) %>%
  `names<-`(colnames(x))

# Limit scale
x[x > 2] <- 2
x[x < -1] <- -1

# Create top annotation
tannot <- idx %$%
  .[match(colnames(x), id), ] %>% 
  select(type) %>% 
  dplyr::rename(Type = type) %$% 
  {HeatmapAnnotation(df=.,
                     border=T,
                     col=list(Type = setNames(brewer.pal(9, "Greens")[c(3, 6, 9)], unique(Type))), 
                     show_legend=T)}

# Create color palette
pal <- colorRampPalette(c('navy','grey95','firebrick'))(1024)
labeled.gene.subset <- NULL

# Plot
set.seed(1337)

ha <- ComplexHeatmap::Heatmap(x, 
                              name='Expression', 
                              col=pal, 
                              cluster_columns=FALSE, 
                              cluster_rows = F,
                              show_row_names=FALSE, 
                              show_column_names=FALSE, 
                              top_annotation=tannot,
                              left_annotation=NULL,
                              border=TRUE,
                              show_column_dend = FALSE, 
                              show_row_dend = FALSE,
                              row_split = names(genes.order)[genes.order %in% rownames(x)],
                              column_split = anno.archetypes.hinte %>% 
                                levels() %>%
                                {c("C1", rep(.[1], 4), "C2", rep(.[2], 4), "C3", rep(.[3], 4))} %>%
                                factor(., levels = unique(.)),
                              column_gap = unit(rep(0:1, 3), "mm") %>% .[-length(.)],
                              border_gp = gpar(col = "black", lwd = 1.5))

ht = draw(ha)

Extended Data Figure 8k

We integrated our data with the data from Hinte et al. that were graciously provided by the authors. As these data are not public, here we will only show how we plotted the data.

genes.order <- read.delim("Archetypes_DEG_heatmap_genes.tsv") %$% 
  setNames(gene, cluster %>% factor(labels = seq(4, 1, -1)) %>% as.character()) %>% 
  .[order(names(.))] %>%
  {split(unname(.), names(.))}

cts <- anno.archetypes.hinte %>% 
  levels()

anno <- getConosCluster(con.integration, "loft_transfer_minor")
anno.subtype <- anno[names(anno) %in% names(anno.archetypes.hinte)] %>% 
  factor()

cm.merged.sub <- cm.merged.raw %>% 
  .[rownames(.) %in% names(anno.subtype), ]

cm.clean <- cm.merged.sub[rownames(cm.merged.sub) %in% names(anno.subtype), ]

cluster.scores <- genes.order %>% 
  lapply(\(x) {
    cls.scores <- list(cluster.scores = x)
    seu_obj <- CreateSeuratObject(counts = cm.clean %>% Matrix::t() %>% drop0(),
                                  min.cells = 0, 
                                  min.features = 0)
    m.scores <- UCell::AddModuleScore_UCell(seu_obj, features = cls.scores, ncores = 32)
    
    return(m.scores)
  })

Prepare

cm.sub <- cluster.scores %>% 
  lget("cluster.scores_UCell") %>% 
  lapply(\(x) mutate(x, cid = rownames(x))) %>% 
  bind_rows(.id = "cluster")

# Create sample-wise annotation
anno.donor <- con.integration$getDatasetPerCell()[unique(cm.sub$cid)]
anno.subtype <- anno.archetypes.hinte %>% 
  .[!is.na(.)] %>% 
  factor()

idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())

anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>% 
  {`names<-`(as.character(.), names(.))}

anno.final <- paste0(anno.donor,"!!",anno.subtype) %>% 
  `names<-`(anno.donor %>% names())

# Prepare plot
plot.tmp <- cm.sub %>% 
  dplyr::rename(value = cluster.scores_UCell) %>% 
  mutate(donor_anno = anno.final[cid]) %>% 
  mutate(donor = strsplit(donor_anno, "!!") %>% 
           sget(1),
         anno = strsplit(donor_anno, "!!") %>% 
           sget(2)) %>% 
  mutate(visit = strsplit(donor, "_") %>% 
           sapply(\(x) if (length(x) == 2) x[2] else x[1])) %>%
  group_by(donor_anno, donor, anno, visit, cluster) %>% 
  summarize(value = median(value))

## `summarise()` has grouped output by 'donor_anno', 'donor', 'anno', 'visit'. You
## can override using the `.groups` argument.

plot.tmp %>% 
  filter(visit %in% c("lean", "t0", "t1")) %>% 
  ggplot(aes(anno, value, fill = visit)) +
  # geom_boxplot() +
  facet_grid(~cluster) +
  stat_summary(geom = "bar", fun = mean, position = "dodge") +
  geom_point(aes(col = visit), position = position_dodge(width = 0.9)) +
  theme_bw() + 
  theme(axis.text = element_text(color = "black"),
        line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
  labs(x = "", y = "Mean UCell module score per sample", fill = "", title = "", col = "") +
  scale_fill_manual(values = brewer.pal(9, "Greens")[c(3, 6, 9)]) +
  scale_color_manual(values = brewer.pal(9, "Purples")[c(9, 7, 5)])

Extended Data Figure 8l

gene <- "PLIN5"

plot.dat <- cm.pseudo %>% 
  t() %>% 
  .[, colnames(.) %in% gene] %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column(var = "sample") %>% 
  mutate(visit = strsplit(sample, "!!") %>% 
           sget(1) %>% 
           strsplit("_") %>% 
           sapply(\(x) if (length(x) == 1) x[1] else x[2]) %>% 
           factor(levels = c("lean", "t0", "t1")),
         anno = strsplit(sample, "!!") %>% 
           sget(2) %>% 
           factor(levels = c("DGAT2_archetype", "CLSTN2_archetype", "PRSS23_archetype"))) %>%
  reshape2::melt(id.vars = c("sample", "visit", "anno"))

plot.dat %>% 
  ggplot(aes(anno, value, fill = visit)) + 
  stat_summary(fun = "mean", geom = "bar", position = position_dodge()) +
  geom_point(aes(col = visit),
             position = position_dodge(width = 0.9)) +
  theme_bw() +
  labs(x = "", y = "Normalized pseudobulk expression", title = gene, fill = "", col = "") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(brewer.pal(9, "Greens")[c(3,6,9)])) +
  scale_color_manual(values = c(brewer.pal(9, "Purples")[c(9,7,5)]))

gene <- "ADAMTS10"

plot.dat <- cm.pseudo %>% 
  t() %>% 
  .[, colnames(.) %in% gene] %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column(var = "sample") %>% 
  mutate(visit = strsplit(sample, "!!") %>% 
           sget(1) %>% 
           strsplit("_") %>% 
           sapply(\(x) if (length(x) == 1) x[1] else x[2]) %>% 
           factor(levels = c("lean", "t0", "t1")),
         anno = strsplit(sample, "!!") %>% 
           sget(2) %>% 
           factor(levels = c("DGAT2_archetype", "CLSTN2_archetype", "PRSS23_archetype"))) %>%
  reshape2::melt(id.vars = c("sample", "visit", "anno"))

plot.dat %>% 
  ggplot(aes(anno, value, fill = visit)) + 
  stat_summary(fun = "mean", geom = "bar", position = position_dodge()) +
  geom_point(aes(col = visit),
             position = position_dodge(width = 0.9)) +
  theme_bw() +
  labs(x = "", y = "Normalized pseudobulk expression", title = gene, fill = "", col = "") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(brewer.pal(9, "Greens")[c(3,6,9)])) +
  scale_color_manual(values = c(brewer.pal(9, "Purples")[c(9,7,5)]))

gene <- "MAP3K5"

plot.dat <- cm.pseudo %>% 
  t() %>% 
  .[, colnames(.) %in% gene] %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column(var = "sample") %>% 
  mutate(visit = strsplit(sample, "!!") %>% 
           sget(1) %>% 
           strsplit("_") %>% 
           sapply(\(x) if (length(x) == 1) x[1] else x[2]) %>% 
           factor(levels = c("lean", "t0", "t1")),
         anno = strsplit(sample, "!!") %>% 
           sget(2) %>% 
           factor(levels = c("DGAT2_archetype", "CLSTN2_archetype", "PRSS23_archetype"))) %>%
  reshape2::melt(id.vars = c("sample", "visit", "anno"))

plot.dat %>% 
  ggplot(aes(anno, value, fill = visit)) + 
  stat_summary(fun = "mean", geom = "bar", position = position_dodge()) +
  geom_point(aes(col = visit),
             position = position_dodge(width = 0.9)) +
  theme_bw() +
  labs(x = "", y = "Normalized pseudobulk expression", title = gene, fill = "", col = "") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(brewer.pal(9, "Greens")[c(3,6,9)])) +
  scale_color_manual(values = c(brewer.pal(9, "Purples")[c(9,7,5)]))

Extended Data Figure 9

Extended Data Figures 9a-c

Preparations

We downsampled to 15k nuclei due to calculation times. We provide sds_obj_reduced.qs for slingshot pseudotime estimations for the downsampled dataset.

Here, we calculate for visit 1 and visit 3.

sds_obj <- qread("sds_obj_reduced.qs")

pseudotime <- sds_obj@assays@data@listData$pseudotime[, 1] %>% 
  .[!is.na(.)]

con <- qread("con_adipocytes_aspc.qs", nthreads = 10)

mat <- con$getJointCountMatrix() %>%
  .[match(names(pseudotime), rownames(.)), colSums(.) > 2e2]

mt <- mat %>%
  as.matrix() %>%
  as.data.frame() %>%
  tibble::rownames_to_column() %>%
  {message("Mutating"); mutate(.,
                               pseudotime = pseudotime[rowname],
                               visit = con$getDatasetPerCell()[rowname] %>%
                                 as.character() %>%
                                 strsplit("_") %>%
                                 sget(3),
                               donor = strsplit(rowname, "!!") %>%
                                 sget(1))} %>%
  .[complete.cases(.), ] %>%
  {message("Melting"); reshape2::melt(., id.vars = c("rowname", "pseudotime", "visit", "donor"))} %$%
  {message("Splitting"); split(., variable)}

The calculations take app. 11 h with 14k genes and 10k cells. We’ve added pre-calculated results as pseudotime_res_novis2.qs.

res <- mt %>% 
  sccore::plapply(\(y) {
    x = y %>% 
      filter(!visit == "vis2")
    
    fit_full <- gamm4(data = x, formula = value ~ s(pseudotime, by = factor(visit)), random = ~(1 | donor), REML = F)
    fit_reduced <- gamm4(data = x, formula = value ~ s(pseudotime), random = ~(1 | donor), REML = F)
    fit_none <- gamm4(data = x, formula = value ~ 1, random = ~(1 | donor), REML = F)
    
    ann <- anova(fit_full$mer, fit_reduced$mer, fit_none$mer)
    
    if (any(ann$`Pr(>Chisq)` <= 0.05, na.rm = T)) {
      residuals <- predict(fit_full$gam, se.fit = T)$fit %>% 
        unname()
      
      r.sq <- c(summary(fit_full$gam)$r.sq, summary(fit_reduced$gam)$r.sq) %>% 
        setNames(c("full", "reduced"))
      
      out <- list(annova = ann,
                  residuals = residuals,
                  r.sq = r.sq)
      
      return(out)
    }
  }, n.cores = 5, mc.preschedule = T, mc.cleanup = T, progress = F) # 10 cores fails, 5 is max tested that finishes

Extended Data Figures 9a,b

Load data. For ease, we also provide pseudotime W/O visit 2 nuclei. We need the sds_obj from Figure 6a. Also, for ease of use we provide slingshot pseudotime W/O visit 2 as pseudotime_novis2.qs.

res <- qread("pseudotime_res_novis2.qs", nthreads = 10) %>% 
  .[!sapply(., is.null)]

sds_obj <- qread("sds_obj.qs")

pseudotime <- qread("pseudotime_novis2.qs")

con <- qread("con_adipocytes_aspc.qs", nthreads = 10)

both <- res %>%
  sapply(\(gene) {
    ann <- gene$annova
    (ann[2, "Pr(>Chisq)"] <= 0.05 & ann[3, "Pr(>Chisq)"] <= 0.05)
  })

res.both <- res[both]

rsq.both <- res.both %>% 
  sapply(\(gene) gene$r.sq[1]) %>% # Ranking by r2 for pseudotime
  sort(decreasing = T)

res.filter <- res.both %>% 
  .[rsq.both %>% .[. > 0.2] %>% names() %>% gsub(".full", "", .)]

cm.merged <- con$getJointCountMatrix() %>% 
  .[match(names(pseudotime), rownames(.)), colnames(.) %in% names(res.filter)] %>% 
  .[rowSums(.) > 0, ]

## Predict smoothend expression 
pseudotime.both <- pseudotime[rownames(cm.merged)]
weights.both <- sds_obj@assays@data@listData$weights %>% .[match(names(pseudotime.both), rownames(.)), 1] + 1E-7

scFit <- cm.merged %>% 
  Matrix::t() %>% 
  tradeSeq::fitGAM(pseudotime = pseudotime.both, cellWeights = weights.both, verbose = T)

Smooth <- tradeSeq::predictSmooth(scFit, gene = colnames(cm.merged), tidy = F, n=100)

# Average across replicates and scale
Smooth <- t(scale(t(Smooth)))

# Seriate the results
Smooth <- Smooth[ seriation::get_order(seriation::seriate(Smooth, method="PCA_angle")), ]

# Omit MALAT1
Smooth %<>% .[!rownames(.) == "MALAT1", ]

# Create heatmap
gene.subset <- which(rownames(Smooth) %in% c("PDGFRB", "DCN", "FBN1", "LPL", "ADIPOQ"))
labels <- rownames(Smooth)[gene.subset]
lannot <- rowAnnotation(link = anno_mark(at = gene.subset,
                                         labels = labels,
                                         labels_gp = grid::gpar(fontsize = 7)))

col_fun = circlize::colorRamp2(c(-4, 0, 4), c("navy", "white", "firebrick"))

Heatmap(Smooth, 
        col = col_fun,
        cluster_columns=F, 
        cluster_rows=F, 
        show_column_names = F, 
        show_row_names = F,
        left_annotation = lannot)

repressed.start <- 1
tempinduced.start = which(rownames(Smooth) == "SMOC2")
tempinduced.end <- which(rownames(Smooth) == "APOD")
repressed.end <- tempinduced.start - 1
induced.start <- tempinduced.end + 1
induced.end <- nrow(Smooth)

go.list <- Smooth %>% 
  rownames() %>% 
  {
    list(
      repressed = .[repressed.start : repressed.end],
      tempinduced = .[tempinduced.start : tempinduced.end],
      induced = .[induced.start : induced.end]
    )
  } %>% 
  lapply(\(x) clusterProfiler::enrichGO(x, 
                                        OrgDb = org.Hs.eg.db::org.Hs.eg.db, 
                                        keyType = "SYMBOL", 
                                        ont = "BP", 
                                        universe = rownames(con$samples[[1]])))

go.list %>% 
  lapply(getElement, "result") %>% 
  lapply(head, 10)

## $repressed
##                    ID
## GO:0030198 GO:0030198
## GO:0043062 GO:0043062
## GO:0045229 GO:0045229
## GO:0031589 GO:0031589
## GO:0085029 GO:0085029
## GO:0048251 GO:0048251
## GO:0061448 GO:0061448
## GO:0071560 GO:0071560
## GO:0071230 GO:0071230
## GO:0071559 GO:0071559
##                                                              Description
## GO:0030198                             extracellular matrix organization
## GO:0043062                          extracellular structure organization
## GO:0045229                 external encapsulating structure organization
## GO:0031589                                       cell-substrate adhesion
## GO:0085029                                 extracellular matrix assembly
## GO:0048251                                        elastic fiber assembly
## GO:0061448                                 connective tissue development
## GO:0071560 cellular response to transforming growth factor beta stimulus
## GO:0071230                      cellular response to amino acid stimulus
## GO:0071559                   response to transforming growth factor beta
##            GeneRatio   BgRatio RichFactor FoldEnrichment    zScore       pvalue
## GO:0030198    23/128 339/18805 0.06784661       9.967621 13.793269 9.082953e-17
## GO:0043062    23/128 340/18805 0.06764706       9.938304 13.768812 9.689092e-17
## GO:0045229    23/128 341/18805 0.06744868       9.909160 13.744457 1.033348e-16
## GO:0031589    19/128 361/18805 0.05263158       7.732319 10.692205 4.915608e-12
## GO:0085029     8/128  56/18805 0.14285714      20.987723 12.400661 3.991196e-09
## GO:0048251     5/128  11/18805 0.45454545      66.779119 18.065556 6.039213e-09
## GO:0061448    13/128 291/18805 0.04467354       6.563171  7.917632 9.654499e-08
## GO:0071560    13/128 308/18805 0.04220779       6.200918  7.618707 1.860436e-07
## GO:0071230     8/128  92/18805 0.08695652      12.775136  9.372699 2.131588e-07
## GO:0071559    13/128 315/18805 0.04126984       6.063120  7.502078 2.407654e-07
##                p.adjust       qvalue
## GO:0030198 8.108337e-14 6.635182e-14
## GO:0043062 8.108337e-14 6.635182e-14
## GO:0045229 8.108337e-14 6.635182e-14
## GO:0031589 2.892835e-09 2.367253e-09
## GO:0085029 1.879055e-06 1.537661e-06
## GO:0048251 2.369384e-06 1.938905e-06
## GO:0061448 3.246670e-05 2.656802e-05
## GO:0071560 5.474333e-05 4.479734e-05
## GO:0071230 5.575287e-05 4.562347e-05
## GO:0071559 5.667618e-05 4.637902e-05
##                                                                                                                                                    geneID
## GO:0030198 ADAMTS5/CST3/DPP4/MFAP4/LTBP4/MMP2/COL1A1/ADAMTSL1/COL3A1/SH3PXD2B/TNXB/COL1A2/RECK/ANTXR1/COL14A1/APP/CCDC80/HMCN1/LUM/ELN/FBLN1/PDGFRA/FBLN5
## GO:0043062 ADAMTS5/CST3/DPP4/MFAP4/LTBP4/MMP2/COL1A1/ADAMTSL1/COL3A1/SH3PXD2B/TNXB/COL1A2/RECK/ANTXR1/COL14A1/APP/CCDC80/HMCN1/LUM/ELN/FBLN1/PDGFRA/FBLN5
## GO:0045229 ADAMTS5/CST3/DPP4/MFAP4/LTBP4/MMP2/COL1A1/ADAMTSL1/COL3A1/SH3PXD2B/TNXB/COL1A2/RECK/ANTXR1/COL14A1/APP/CCDC80/HMCN1/LUM/ELN/FBLN1/PDGFRA/FBLN5
## GO:0031589                             ACTG1/S100A10/CCN2/AXL/ITGA11/CD63/COL1A1/CD34/ITGBL1/COL3A1/PXN/TNXB/FBLN2/CARMIL1/ANTXR1/CD44/CCDC80/FBLN1/FBLN5
## GO:0085029                                                                                                MFAP4/LTBP4/COL3A1/TNXB/COL1A2/ANTXR1/ELN/FBLN5
## GO:0048251                                                                                                                  MFAP4/LTBP4/COL3A1/TNXB/FBLN5
## GO:0061448                                                                    TIMP1/CRIP1/EBF2/CCN2/ECM1/COL1A1/CD34/COL3A1/SH3PXD2B/ZEB1/CD44/PDGFD/GLI3
## GO:0071560                                                                     SMURF2/FBN1/HTRA3/LTBP4/CILP/COL1A1/COL3A1/PXN/COL1A2/ZEB1/PDGFD/IGF1R/FYN
## GO:0071230                                                                                                MMP2/COL1A1/COL3A1/COL1A2/ZEB1/PDGFD/PDGFRA/FYN
## GO:0071559                                                                     SMURF2/FBN1/HTRA3/LTBP4/CILP/COL1A1/COL3A1/PXN/COL1A2/ZEB1/PDGFD/IGF1R/FYN
##            Count
## GO:0030198    23
## GO:0043062    23
## GO:0045229    23
## GO:0031589    19
## GO:0085029     8
## GO:0048251     5
## GO:0061448    13
## GO:0071560    13
## GO:0071230     8
## GO:0071559    13
## 
## $tempinduced
##                    ID
## GO:0006956 GO:0006956
## GO:0006959 GO:0006959
## GO:0051895 GO:0051895
## GO:0150118 GO:0150118
## GO:0006957 GO:0006957
## GO:1901889 GO:1901889
## GO:0001953 GO:0001953
## GO:0006958 GO:0006958
## GO:0032970 GO:0032970
## GO:0002455 GO:0002455
##                                                               Description
## GO:0006956                                          complement activation
## GO:0006959                                        humoral immune response
## GO:0051895                 negative regulation of focal adhesion assembly
## GO:0150118    negative regulation of cell-substrate junction organization
## GO:0006957                     complement activation, alternative pathway
## GO:1901889                  negative regulation of cell junction assembly
## GO:0001953                    negative regulation of cell-matrix adhesion
## GO:0006958                       complement activation, classical pathway
## GO:0032970                     regulation of actin filament-based process
## GO:0002455 humoral immune response mediated by circulating immunoglobulin
##            GeneRatio   BgRatio RichFactor FoldEnrichment    zScore       pvalue
## GO:0006956      5/34  65/18805 0.07692308      42.545249 14.279537 1.086271e-07
## GO:0006959      7/34 309/18805 0.02265372      12.529507  8.697049 1.110972e-06
## GO:0051895      3/34  16/18805 0.18750000     103.704044 17.491078 2.975747e-06
## GO:0150118      3/34  16/18805 0.18750000     103.704044 17.491078 2.975747e-06
## GO:0006957      3/34  18/18805 0.16666667      92.181373 16.471553 4.325377e-06
## GO:1901889      3/34  34/18805 0.08823529      48.801903 11.873037 3.109821e-05
## GO:0001953      3/34  40/18805 0.07500000      41.481618 10.907732 5.096604e-05
## GO:0006958      3/34  40/18805 0.07500000      41.481618 10.907732 5.096604e-05
## GO:0032970      6/34 392/18805 0.01530612       8.465636  6.357238 6.478737e-05
## GO:0002455      3/34  54/18805 0.05555556      30.727124  9.310188 1.257581e-04
##                p.adjust       qvalue                              geneID Count
## GO:0006956 0.0001175345 8.255658e-05                  C1R/CFD/C1S/C3/CFH     5
## GO:0006959 0.0006010359 4.221694e-04    C1R/CFD/C1S/C3/CFH/CXCL12/CXCL14     7
## GO:0051895 0.0008049396 5.653920e-04                   ARHGAP6/DLC1/APOD     3
## GO:0150118 0.0008049396 5.653920e-04                   ARHGAP6/DLC1/APOD     3
## GO:0006957 0.0009360117 6.574574e-04                          CFD/C3/CFH     3
## GO:1901889 0.0056080436 3.939106e-03                   ARHGAP6/DLC1/APOD     3
## GO:0001953 0.0068931573 4.841774e-03                   ARHGAP6/DLC1/APOD     3
## GO:0006958 0.0068931573 4.841774e-03                          C1R/C1S/C3     3
## GO:0032970 0.0077888818 5.470934e-03 GSN/PDGFRB/ANK2/ARHGAP6/DLC1/CXCL12     6
## GO:0002455 0.0130178838 9.143800e-03                          C1R/C1S/C3     3
## 
## $induced
##                    ID                               Description GeneRatio
## GO:0019915 GO:0019915                             lipid storage      8/56
## GO:0170062 GO:0170062                          nutrient storage      8/56
## GO:0010889 GO:0010889        regulation of triglyceride storage      5/56
## GO:0030730 GO:0030730                      triglyceride storage      5/56
## GO:0010883 GO:0010883               regulation of lipid storage      6/56
## GO:1905954 GO:1905954 positive regulation of lipid localization      7/56
## GO:1905952 GO:1905952          regulation of lipid localization      8/56
## GO:0034308 GO:0034308         primary alcohol metabolic process      6/56
## GO:0016042 GO:0016042                   lipid catabolic process      9/56
## GO:0006641 GO:0006641            triglyceride metabolic process      6/56
##              BgRatio RichFactor FoldEnrichment    zScore       pvalue
## GO:0019915  97/18805 0.08247423      27.695140 14.405703 4.329521e-10
## GO:0170062  97/18805 0.08247423      27.695140 14.405703 4.329521e-10
## GO:0010889  15/18805 0.33333333     111.934524 23.489785 5.725524e-10
## GO:0030730  18/18805 0.27777778      93.278770 21.406192 1.622525e-09
## GO:0010883  55/18805 0.10909091      36.633117 14.463207 1.371437e-08
## GO:1905954 113/18805 0.06194690      20.801991 11.538525 4.265558e-08
## GO:1905952 189/18805 0.04232804      14.213908  9.978128 8.442191e-08
## GO:0034308 103/18805 0.05825243      19.561373 10.323210 6.062461e-07
## GO:0016042 350/18805 0.02571429       8.634949  7.879764 8.478839e-07
## GO:0006641 110/18805 0.05454545      18.316558  9.954633 8.937458e-07
##                p.adjust       qvalue
## GO:0019915 2.467701e-07 1.777926e-07
## GO:0170062 2.467701e-07 1.777926e-07
## GO:0010889 2.467701e-07 1.777926e-07
## GO:0030730 5.244811e-07 3.778774e-07
## GO:0010883 3.546536e-06 2.555203e-06
## GO:1905954 9.192278e-06 6.622840e-06
## GO:1905952 1.559393e-05 1.123510e-05
## GO:0034308 9.798452e-05 7.059576e-05
## GO:0016042 1.155613e-04 8.325947e-05
## GO:0006641 1.155613e-04 8.325947e-05
##                                                           geneID Count
## GO:0019915       PPARG/ACACB/NRIP1/CIDEA/PNPLA2/PLIN5/LPL/ACVR1C     8
## GO:0170062       PPARG/ACACB/NRIP1/CIDEA/PNPLA2/PLIN5/LPL/ACVR1C     8
## GO:0010889                          PPARG/CIDEA/PNPLA2/PLIN5/LPL     5
## GO:0030730                          PPARG/CIDEA/PNPLA2/PLIN5/LPL     5
## GO:0010883                    PPARG/ACACB/CIDEA/PNPLA2/PLIN5/LPL     6
## GO:1905954              PPARG/ACACB/CIDEA/ACSL1/PLIN5/ADIPOQ/LPL     7
## GO:1905952       PPARG/ACACB/CIDEA/ACSL1/PNPLA2/PLIN5/ADIPOQ/LPL     8
## GO:0034308                   ADH1B/ALDH2/PNPLA2/PECR/RETSAT/LIPE     6
## GO:0016042 ACACB/CIDEA/PNPLA2/PLIN5/ADIPOQ/LIPE/LPL/PDE3B/PLAAT3     9
## GO:0006641                    ACSL1/PNPLA2/PLIN5/LIPE/LPL/PLAAT3     6

Extended Data Figure 9c

Figure S6A needs to be calculated first. We obtain list with human transcription factors from public resource.

tfs <- read.table("https://humantfs.ccbr.utoronto.ca/download/v_1.01/TF_names_v_1.01.txt") %>% 
  pull(V1)

tf.filter <- intersect(names(res.filter), tfs)

cm.merged <- con$getJointCountMatrix() %>% 
  .[match(names(pseudotime), rownames(.)), colnames(.) %in% tf.filter] %>% 
  .[rowSums(.) > 0, ]

## Predict smoothend expression 
pseudotime.both <- pseudotime[rownames(cm.merged)]
weights.both <- sds_obj@assays@data@listData$weights %>% .[match(names(pseudotime.both), rownames(.)), 1] + 1E-7

scFit <- cm.merged %>% 
  Matrix::t() %>% 
  tradeSeq::fitGAM(pseudotime = pseudotime.both, cellWeights = weights.both, verbose = T)

## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf

Smooth <- tradeSeq::predictSmooth(scFit, gene = colnames(cm.merged), tidy = F, n=100)

# Average across replicates and scale
Smooth <- t(scale(t(Smooth)))

# Seriate the results
Smooth <- Smooth[ seriation::get_order(seriation::seriate(Smooth, method="PCA_angle")), ]

# Omit MALAT1
Smooth %<>% .[!rownames(.) == "MALAT1", ]

col_fun = circlize::colorRamp2(c(-4, 0, 4), c("navy", "white", "firebrick"))

Heatmap(Smooth, 
        col = col_fun,
        cluster_columns=F, 
        cluster_rows=F, 
        show_column_names = F, 
        row_names_gp = grid::gpar(fontsize = 10)
)

Extended Data Figure 9d

We need the sds_obj object from Figure 6a.

con <- qread("con_adipocytes_aspc.qs", nthreads = 10)
spc <- con$getDatasetPerCell()
vpc <- spc %>% 
  as.character() %>% 
  strsplit("_|!!") %>% 
  sget(3) %>% 
  setNames(names(spc))

anno.adipocytes <- qread("anno_major.qs") %>% 
  .[. == "Adipocytes"] %>% 
  factor()
anno.aspc <- qread("anno_aspc.qs")
anno <- factor(c(anno.adipocytes, anno.aspc))

cm.merged <- con$getJointCountMatrix()

mat.df <- c("ZEB1", "EBF2", "PPARG") %>%
  {cm.merged[, colnames(cm.merged) %in% .]} %>% 
  as.matrix() %>% 
  as.data.frame() %>% 
  mutate(., 
         visit = unname(vpc)[match(rownames(.), 
                                   names(vpc)
         )
         ] %>% 
           factor(labels = c("Visit 1", "Visit 2", "Visit 3")),
         anno = unname(anno)[match(rownames(.),
                                   names(anno)
         )
         ],
         sample = unname(spc)[match(rownames(.), 
                                    names(spc)
         )
         ]
  ) %>% 
  filter(!is.na(anno))

pseudotime_all <- qread("sds_obj.qs", nthreads = 10)@assays@data@listData$pseudotime %>% 
  as.data.frame() %>% 
  filter(!is.na(Lineage1)) %>% 
  {setNames(.$Lineage1, rownames(.))}

mat.df %<>% .[match(names(pseudotime_all), rownames(.), nomatch = F), ]

pseudotime_all %<>% .[rownames(mat.df)]

mat.df2 <- mat.df %>% 
  mutate(pseudotime = pseudotime_all) %>% 
  mutate(bin = cut(pseudotime, breaks = c(0, 0.005, 0.01, 0.0165, 0.023))) %>% 
  na.omit() %>% 
  mutate(bin = as.character(bin) %>% 
           strsplit(",") %>% 
           sget(2) %>% 
           gsub("]", "", ., fixed = T)) %>%
  select(-anno, -pseudotime) %>%
  reshape2::melt(id.vars = c("visit", "bin", "sample")) %>% 
  filter(value > 0) %>% 
  group_by(visit, variable, bin, sample) %>% 
  mutate(value = as.numeric(value)) %>% 
  summarize(mm = mean(value)) %>% 
  mutate(sex = meta$sex[match(sample, meta$sample)]) %>% 
  mutate(sex_visit = paste(sex, gsub("Visit", "visit", visit), sep = " "))

## `summarise()` has grouped output by 'visit', 'variable', 'bin'. You can
## override using the `.groups` argument.

mat.df2 %>% 
  pull(variable) %>% 
  unique() %>% 
  as.character() %>% 
  lapply(\(vv) {
    plot.dat <- mat.df2 %>% 
      filter(variable == vv) %>%
      ungroup() %>%
      mutate(sex = meta$sex[match(.$sample, meta$sample)]) %>% 
      mutate(sex_visit = paste(sex, visit, sep = " ") %>% gsub("Visit", "visit", .))
    
    # Kruskal-Wallis
    sex.stat <- plot.dat %$% 
      split(., sex) %>% 
      lapply(\(x) split(x, x$bin)) %>% 
      lapply(lapply, \(x) kruskal.test(x$mm, g = x$visit)$p.value) %>% 
      lapply(sapply, gtools::stars.pval) %>% 
      {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
      `names<-`(c("Female","Male")) %>% 
      lapply(\(x) data.frame(ct = names(x), sig = unname(x))) %>% 
      {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
      bind_rows() %>% 
      mutate(ct = factor(ct, levels = unique(plot.dat$bin))) %>% 
      arrange(ct) %>% 
      mutate(sig = gsub(".", "", sig, fixed = T))
    
    # Wilcoxon
    stat.test <- plot.dat %>%
      group_by(bin, sex) %>%
      wilcox_test(mm~sex_visit, paired = F) %>% 
      filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
      add_xy_position(x = "bin", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05) %>% # Adjust line position
      arrange(desc(y.position))
    
    # Plot
    ggplot(plot.dat, aes(bin, mm)) + 
      geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
      geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1), 
                 color = "black", size = 0.5, alpha = 0.2) +
      theme_bw() +
      labs(x = "", y = "Mean AUC", fill = "", title = vv) +
      theme(line = element_blank(),
            axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
            axis.text = element_text(color = "black")) +
      scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
      geom_text(inherit.aes = F, data = sex.stat, aes(ct, y = 1.4, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
      stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 0.2, hide.ns = F) # Wilcoxon
  }) %>% 
  cowplot::plot_grid(plotlist = ., ncol = 3)

Extended Data Figure 9e

First, we need to integrate samples, calculate new diffusion map, and calculate new trajectory before plotting using the same genes identified for Loft cells only. These initial steps are not run here, load data below.

Create new Conos object

con.new = qread("con_adipocytes_aspc.qs", nthreads = 10)
anno <- getConosCluster(con.integration, "loft_transfer") %>% 
  .[. %in% c("Adipocytes", "FAPs")] %>% 
  gsub("FAPs", "ASPCs", .)

anno.hinte <- anno %>% 
  .[!grepl("!!", names(.))]

samples.processed <- con.integration$samples %>% 
  .[!grepl("vis", names(.))] %>% 
  lapply(\(sid) {
    conos:::getRawCountMatrix(sid)
  }) %>% 
  lapply(basicP2proc, n.cores = 32, get.largevis = F, get.tsne = F, make.geneknn = F)

con.new$addSamples(samples.processed)
con.new$buildGraph()
con.new$findCommunities()
con.new$embedGraph()
con.new$embedGraph(method = "UMAP")

Calculate diffusion map

data <- con.new

## Convert raw counts to Seurat
for (name in names(data$samples)) {
  seu <- CreateSeuratObject(CreateAssayObject(counts = t(data$samples[[name]]$misc$rawCounts)))
  seu$orig.ident <- substr(name, 0, regexpr("vis", name)-2)
  seu$visit <- substr(name, regexpr("vis", name), nchar(name))
  if (name == names(data$samples)[1]) {
    final <- seu
  } else {
    final <- merge(final, seu)
  }
}

## Merge annotation
final <- subset(final, cells = names(anno))
final <- NormalizeData(final)
anno.final <- anno[ match(colnames(final), names(anno))]
final$fine <- anno.final
final$coarse <- anno.final
final$dataset <- paste(final$orig.ident, final$visit, sep="_")
final <- subset(final, cells = names(which(!is.na(final$fine))))
full <- final

embeddings <- list()
for (nfeats in c(100)) { 
  # Recreate original object
  final <- full
  # ASPC
  ASPC <- subset(final, coarse == "ASPCs")
  VariableFeatures(ASPC) <- NULL
  obj.list <- SplitObject(ASPC, split.by = "dataset")
  feats_ASPC <- suppressWarnings(SelectIntegrationFeatures(obj.list, nfeatures = nfeats, verbose = FALSE))
  # Adipocytes
  Ad <- subset(final, coarse == "Adipocytes")
  VariableFeatures(Ad) <- NULL
  obj.list <- SplitObject(Ad, split.by = "dataset")
  feats_Ad <- suppressWarnings(SelectIntegrationFeatures(obj.list, nfeatures = nfeats, verbose = FALSE))
  # Combine
  feats <- unique(c(feats_ASPC, feats_Ad))
  ## Embed
  # PCA
  VariableFeatures(final) <- feats
  final <- ScaleData(final)
  final <- RunPCA(final)
  for (useHarmony in c(TRUE)) {
    if (useHarmony) {
      final <- suppressWarnings(RunHarmony(final, group.by.vars = "dataset", verbose  = FALSE))
      pcs <- final@reductions$harmony@cell.embeddings
    } else {
      pcs <- final@reductions$pca@cell.embeddings
    }
    for (dims in c(10)) {
      dm <- destiny::DiffusionMap(pcs[,1:dims], verbose = FALSE, n_pcs = NA)
      dm_ev <- dm@eigenvectors
      colnames(dm_ev) <- paste("DC", c(1:20), sep="_")
      embeddings[[(length(embeddings) + 1)]] <- dm_ev
      names(embeddings)[length(embeddings)] <- paste(nfeats, useHarmony, dims, sep="_")
    }
  }
}

# Save embedding
qsave(embeddings[[1]], "diffusion_embedding.qs")

Create trajectory

# Calculate slingshot trajectory
emb <- qread("diffusion_embedding.qs")
anno.minor <- getConosCluster(con.integration, "loft_transfer_minor")
anno.sort <- anno.minor[rownames(emb)] %>%
  .[!. %in% c("ATM", "cvEC", "Mast cells", "mono/mac", "Pericytes")]

sds_obj <- slingshot(emb[names(anno.sort), 1:2] %>%
                       `colnames<-`(c("UMAP1","UMAP2")),
                     anno.sort %>% unname(),
                     end.clus = "Adipocytes",
                     start.clus = "ASPC_DPP4")

qsave(sds_obj, "sds_obj_hinte.qs")

Load data

emb <- qread("diffusion_embedding.qs")
sds_obj <- qread("sds_obj_hinte.qs")
sds <- as.SlingshotDataSet(sds_obj)
res <- qread("pseudotime_res_novis2.qs", nthreads = 10) %>% .[!sapply(., is.null)]

# Here, we paste the order of genes so we don't have to calculate it
pseudoGeneOrder <- c("GPC3", "ITM2B", "CXCL14", "CXCL12", "FOS", "CD81", "B2M", "TMSB10", "SERPING1", "CLU", "CRIP1", "S100A4", "LUM", "ACTB", "S100A10", "LMNA", "C1S", "C3", "CD63", "C1R", "CST3", "IGFBP6", "PLAC9", "AEBP1", "S100A6", "MMP2", "TIMP2", "FYN", "SDC2", "UST", "FLRT2", "MEG8", "SMURF2", "AL110292.1", "PKD2", "MYO9A", "PALLD", "FTO", "MSC-AS1", "LDLRAD4", "TTC3", "MAP3K20", "FBXO11", "DIP2C", "ROCK2", "UPP2", "LINC02456", "PDXDC1", "ADIPOQ-AS1", "IGF1", "PLEKHH2", "C2CD2", "TBL1XR1", "SLC1A3", "C6", "MAP3K5", "RGS6", "LENG8")

Get genes

both <- res %>%
  sapply(\(gene) {
    ann <- gene$annova
    (ann[2, "Pr(>Chisq)"] <= 0.05 & ann[3, "Pr(>Chisq)"] <= 0.05)
  })

res.both <- res[both]

p.both <- res.both %>% 
  sapply(\(gene) gene$annova[3, "Pr(>Chisq)"])

tmp <- res.both %>% 
  lget("annova") %>% 
  lapply(dplyr::slice, 2:3) %>% 
  lapply(pull, AIC) %>%
  .[sapply(., \(x) abs(x[2]) > abs(x[1]))] %>% 
  bind_rows() %>% 
  t() %>% 
  as.data.frame() %>% 
  setNames(c("reduced", "full")) %>% 
  mutate(diff = abs(full) - abs(reduced)) %>% 
  mutate(diff.frac = diff / abs(reduced))

res.filter <- res[
  tmp %>% 
    filter(diff.frac >= 0.05) %>% 
    rownames()
]

anno <- getConosCluster(con.integration, "loft_transfer") %>% 
  .[. %in% c("Adipocytes", "FAPs")] %>% 
  gsub("FAPs", "ASPCs", .)

anno.hinte <- anno %>% 
  .[!grepl("!!", names(.))] %>% 
  factor()

pseudotime_hinte <- sds_obj@assays@data@listData$pseudotime[, 1] %>% 
  .[!is.na(.)] %>% 
  .[names(anno.hinte)[!anno.hinte %in% c("ATM", "cvEC", "Mast cells", "mono/mac", "Pericytes")]] %>% 
  .[!is.na(.)]

cm.merged.tmp <- con.integration$getJointCountMatrix() %>% 
  .[match(names(pseudotime_hinte), rownames(.)), colnames(.) %in% names(res.filter)]

visit = con.integration$getDatasetPerCell()[rownames(cm.merged.tmp)] %>%
  as.character() %>%
  .[!grepl("!!", .)] %>% 
  strsplit("_") %>%
  sapply(\(x) if (length(x) == 1) x else x[2])

# t1
cm.merged.vis <- cm.merged.tmp[visit == "t1", ]
pseudotime.vis <- pseudotime_hinte[rownames(cm.merged.vis)]
weights.vis <- sds_obj@assays@data@listData$weights %>% .[match(names(pseudotime.vis), rownames(.)), 1] + 1E-7

scFit <- cm.merged.vis %>% 
  Matrix::t() %>% 
  tradeSeq::fitGAM(pseudotime = pseudotime.vis, cellWeights = weights.vis, verbose = TRUE)

Smooth3 <- tradeSeq::predictSmooth(scFit, gene = colnames(cm.merged.vis), tidy = FALSE, n=100)

# t0
cm.merged.vis <- cm.merged.tmp[visit == "t0", ]
pseudotime.vis <- pseudotime_hinte[rownames(cm.merged.vis)]
weights.vis <- sds_obj@assays@data@listData$weights %>% .[match(names(pseudotime.vis), rownames(.)), 1] + 1E-7

scFit <- cm.merged.vis %>% 
  Matrix::t() %>% 
  tradeSeq::fitGAM(pseudotime = pseudotime.vis, cellWeights = weights.vis, verbose = TRUE)

Smooth1 <- tradeSeq::predictSmooth(scFit, gene = colnames(cm.merged.vis), tidy = FALSE, n=100)

# lean
cm.merged.vis <- cm.merged.tmp[visit == "lean", ]
pseudotime.vis <- pseudotime_hinte[rownames(cm.merged.vis)]
weights.vis <- sds_obj@assays@data@listData$weights %>% .[match(names(pseudotime.vis), rownames(.)), 1] + 1E-7

scFit <- cm.merged.vis %>% 
  Matrix::t() %>% 
  tradeSeq::fitGAM(pseudotime = pseudotime.vis, cellWeights = weights.vis, verbose = TRUE)

Smoothl <- tradeSeq::predictSmooth(scFit, gene = colnames(cm.merged.vis), tidy = FALSE, n=100)

# Combine smooth
Smooth <- cbind(Smooth3, Smooth1, Smoothl)

# Average across replicates and scale
Smooth <- t(scale(t(Smooth)))

Smooth %<>% 
  .[pseudoGeneOrder, ]

# Split smooth
Smooth33 <- Smooth[, 1:100]
Smooth11 <- Smooth[, 101:200]
Smoothll <- Smooth[, 201:300]

# t0
# Seriate the results

col_fun = circlize::colorRamp2(c(-4, 0, 4), c("navy", "white", "firebrick"))

p3 <- Heatmap(Smooth33, 
              col = col_fun,
              cluster_columns=F, 
              cluster_rows=F, 
              show_column_names = F, 
              show_row_names = F,
              show_heatmap_legend = T
)

# t0
p1 <- Heatmap(Smooth11, 
              col = col_fun,
              cluster_columns=F, 
              cluster_rows=F, 
              show_column_names = F, 
              show_row_names = F,
              show_heatmap_legend = F
)

# lean
gene.highlights <- c("CXCL14", "CXCL12", "CD81", "SERPING1", "CLU", "C1S", "C3", "C1R", "MMP2", "TIMP2", "FTO", "MAP3K20", "PLEKHH2", "TBL1XR1", "C6", "MAP3K5") %>% 
  {which(rownames(Smoothll) %in% .)}

row_anno <- rowAnnotation(foo = anno_mark(at = gene.highlights, 
                                          labels = rownames(Smoothll)[gene.highlights],
                                          side = "left"))


pl <- Heatmap(Smoothll, 
              col = col_fun,
              left_annotation = row_anno,
              cluster_columns=F, 
              cluster_rows=F, 
              show_column_names = F, 
              show_row_names = F,
              show_heatmap_legend = F
)

pl

p1

p3

Extended Data Figure 9f

tmp.regulons <- qread("regulons.qs")

pseudotime_all <- sds_obj@assays@data@listData$pseudotime %>% 
  as.data.frame() %>% 
  filter(!is.na(Lineage1)) %>% 
  {setNames(pull(., Lineage1), rownames(.))} %>% 
  .[names(tmp.regulons[[1]])]

spc <- con.integration$getDatasetPerCell()

tmp.regulons$PPARG %>% 
  {data.frame(value = unname(.), row.names = names(.))} %>% 
  mutate(sample = spc[match(rownames(.), names(spc))] %>% unname()) %>%
  mutate(pseudo = pseudotime_all) %>%
  mutate(bin = cut(pseudo, breaks = c(0, 0.01, 0.0165, 0.02, 0.026))) %>% 
  na.omit() %>% 
  mutate(bin = as.character(bin) %>% 
           strsplit(",") %>% 
           sget(2) %>% 
           gsub("]", "", ., fixed = TRUE)) %>%
  mutate(visit = as.character(sample) %>% 
           strsplit("_") %>% 
           sapply(\(x) if (length(x) == 3) x[3] else if (length(x) == 2) x[2] else x),
         sex = meta$sex[match(sample, meta$sample)],
         study = ifelse(grepl("!!", rownames(.)), "Loft", "Hinte")) %>%
  mutate(sex_visit = paste(sex, visit, sep = "_") %>% 
           gsub("NA_", "", .) %>% 
           factor() %>% 
           factor(levels = c(levels(.)[c(1:3, 5:9, 4)]))) %>%
  arrange(bin) %>% 
  mutate(bin = factor(bin, labels = c("Early preadipocytes", "Mid preadipocytes", "Late preadipocytes", "Adipocyte"))) %>% 
  filter(study == "Hinte") %>%
  group_by(bin, sex_visit, study, sample) %>%
  summarize(mean_auc = mean(value)) %>%
  mutate(sex_visit = factor(sex_visit, levels = c("lean", "t0", "t1"))) %>% 
  group_by(bin) %>% # Addition
  mutate(mean_auc = mean_auc - mean(mean_auc[sex_visit == "t0"])) %>% # Addition
  filter(!sex_visit == "t0") %>% # Addition
  ggplot(aes(bin, mean_auc)) +
  stat_summary(aes(fill = sex_visit), fun = "mean", position = position_dodge(width = 0.8), geom = "bar") +
  geom_point(aes(col = sex_visit), position = position_dodge(width = 0.8)) + 
  theme_bw() +
  theme(line = element_blank(),
        axis.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
  scale_fill_manual(values = c(brewer.pal(9, "Greys")[c(3,6,9)])) +
  scale_color_manual(values = c(brewer.pal(9, "Purples")[c(9,7,5)])) +
  labs(title = "PPARG regulon activity", y = "Change from obese (t0) mean AUC", x = "", fill = "", col = "")

## `summarise()` has grouped output by 'bin', 'sex_visit', 'study'. You can
## override using the `.groups` argument.

Extended Data Figure 10

These plots are presented in Liana.ipynb.

Session info

Time to knit

Sys.time() - tt

## Time difference of 50.88765 mins

sessionInfo()

## R version 4.5.1 (2025-06-13)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.2 LTS
## 
## Matrix products: default
## BLAS/LAPACK: /opt/intel/oneapi/mkl/2025.1/lib/libmkl_gf_lp64.so.2;  LAPACK version 3.12.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Berlin
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    grid      stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] glmnet_4.1-8                RColorBrewer_1.1-3         
##  [3] corrplot_0.95               sccore_1.0.5               
##  [5] cowplot_1.1.3               circlize_0.4.16            
##  [7] slingshot_2.16.0            TrajectoryUtils_1.16.1     
##  [9] SingleCellExperiment_1.30.1 SummarizedExperiment_1.38.1
## [11] Biobase_2.68.0              GenomicRanges_1.60.0       
## [13] GenomeInfoDb_1.44.0         IRanges_2.42.0             
## [15] S4Vectors_0.46.0            BiocGenerics_0.54.0        
## [17] generics_0.1.4              MatrixGenerics_1.20.0      
## [19] matrixStats_1.5.0           princurve_2.1.6            
## [21] harmony_1.2.3               Rcpp_1.0.14                
## [23] Seurat_5.3.0                SeuratObject_5.1.0         
## [25] sp_2.2-0                    destiny_3.22.0             
## [27] ComplexHeatmap_2.24.0       ggpubr_0.6.0               
## [29] rstatix_0.7.2               ggplot2_3.5.2              
## [31] cacoa_0.4.0                 scHelper_0.0.5             
## [33] pagoda2_1.0.12              magrittr_2.0.3             
## [35] dplyr_1.1.4                 qs_0.27.3                  
## [37] conos_1.5.2                 igraph_2.1.4               
## [39] Matrix_1.7-3               
## 
## loaded via a namespace (and not attached):
##   [1] R.methodsS3_1.8.2         dichromat_2.0-0.1        
##   [3] nnet_7.3-20               goftest_1.2-3            
##   [5] Biostrings_2.76.0         vctrs_0.6.5              
##   [7] ggtangle_0.0.6            spatstat.random_3.4-1    
##   [9] RApiSerialize_0.1.4       digest_0.6.37            
##  [11] png_0.1-8                 shape_1.4.6.1            
##  [13] proxy_0.4-27              registry_0.5-1           
##  [15] ggrepel_0.9.6             deldir_2.0-4             
##  [17] parallelly_1.44.0         magick_2.8.6             
##  [19] MASS_7.3-65               reshape2_1.4.4           
##  [21] httpuv_1.6.16             foreach_1.5.2            
##  [23] qvalue_2.40.0             withr_3.0.2              
##  [25] psych_2.5.3               xfun_0.52                
##  [27] ggfun_0.1.8               survival_3.8-3           
##  [29] memoise_2.0.1             hexbin_1.28.5            
##  [31] gson_0.1.0                clusterProfiler_4.16.0   
##  [33] ggsci_3.2.0               tidytree_0.4.6           
##  [35] zoo_1.8-14                GlobalOptions_0.1.2      
##  [37] gtools_3.9.5              pbapply_1.7-2            
##  [39] R.oo_1.27.1               DEoptimR_1.1-3-1         
##  [41] Formula_1.2-5             KEGGREST_1.48.0          
##  [43] promises_1.3.2            scatterplot3d_0.3-44     
##  [45] httr_1.4.7                globals_0.18.0           
##  [47] fitdistrplus_1.2-2        stringfish_0.16.0        
##  [49] rstudioapi_0.17.1         UCSC.utils_1.4.0         
##  [51] miniUI_0.1.2              DOSE_4.2.0               
##  [53] curl_6.2.2                ca_0.71.1                
##  [55] polyclip_1.10-7           quadprog_1.5-8           
##  [57] GenomeInfoDbData_1.2.14   SparseArray_1.8.0        
##  [59] RcppEigen_0.3.4.0.2       xtable_1.8-4             
##  [61] stringr_1.5.1             doParallel_1.0.17        
##  [63] fastMatMR_1.2.5           evaluate_1.0.3           
##  [65] S4Arrays_1.8.0            irlba_2.3.5.1            
##  [67] colorspace_2.1-1          ROCR_1.0-11              
##  [69] reticulate_1.42.0         spatstat.data_3.1-6      
##  [71] lmtest_0.9-40             viridis_0.6.5            
##  [73] ggtree_3.16.0             later_1.4.2              
##  [75] lattice_0.22-7            spatstat.geom_3.4-1      
##  [77] future.apply_1.11.3       robustbase_0.99-4-1      
##  [79] scattermore_1.2           triebeard_0.4.1          
##  [81] RcppAnnoy_0.0.22          xts_0.14.1               
##  [83] class_7.3-23              pillar_1.10.2            
##  [85] nlme_3.1-168              iterators_1.0.14         
##  [87] compiler_4.5.1            RSpectra_0.16-2          
##  [89] stringi_1.8.7             TSP_1.2-4                
##  [91] tensor_1.5                plyr_1.8.9               
##  [93] drat_0.2.5                crayon_1.5.3             
##  [95] abind_1.4-8               gridGraphics_0.5-1       
##  [97] locfit_1.5-9.12           org.Hs.eg.db_3.21.0      
##  [99] bit_4.6.0                 pcaMethods_2.0.0         
## [101] fastmatch_1.1-6           tradeSeq_1.22.0          
## [103] codetools_0.2-20          TTR_0.24.4               
## [105] bslib_0.9.0               e1071_1.7-16             
## [107] GetoptLong_1.0.5          ggplot.multistats_1.0.1  
## [109] plotly_4.10.4             mime_0.13                
## [111] splines_4.5.1             fastDummies_1.7.5        
## [113] sparseMatrixStats_1.20.0  brew_1.0-10              
## [115] N2R_1.0.3                 knitr_1.50               
## [117] blob_1.2.4                utf8_1.2.5               
## [119] clue_0.3-66               fs_1.6.6                 
## [121] listenv_0.9.1             DelayedMatrixStats_1.30.0
## [123] ggsignif_0.6.4            ggplotify_0.1.2          
## [125] tibble_3.2.1              statmod_1.5.0            
## [127] pkgconfig_2.0.3           tools_4.5.1              
## [129] Rook_1.2                  cachem_1.1.0             
## [131] RSQLite_2.3.11            viridisLite_0.4.2        
## [133] smoother_1.3              DBI_1.2.3                
## [135] fastmap_1.2.0             rmarkdown_2.29           
## [137] scales_1.4.0              pbmcapply_1.5.1          
## [139] ica_1.0-3                 broom_1.0.8              
## [141] sass_0.4.10               patchwork_1.3.0          
## [143] dotCall64_1.2             carData_3.0-5            
## [145] RANN_2.6.2                farver_2.1.2             
## [147] mgcv_1.9-3                yaml_2.3.10              
## [149] ggthemes_5.1.0            cli_3.6.5                
## [151] purrr_1.0.4               UCell_2.12.0             
## [153] lifecycle_1.0.4           uwot_0.2.3               
## [155] backports_1.5.0           BiocParallel_1.42.0      
## [157] gtable_0.3.6              rjson_0.2.23             
## [159] ggridges_0.5.6            progressr_0.15.1         
## [161] limma_3.64.0              parallel_4.5.1           
## [163] ape_5.8-1                 edgeR_4.6.2              
## [165] jsonlite_2.0.0            seriation_1.5.7          
## [167] RcppHNSW_0.6.0            bit64_4.6.0-1            
## [169] Rtsne_0.17                yulab.utils_0.2.0        
## [171] coda.base_1.0.0           BiocNeighbors_2.2.0      
## [173] spatstat.utils_3.1-4      ranger_0.17.0            
## [175] urltools_1.7.3            RcppParallel_5.1.10      
## [177] jquerylib_0.1.4           GOSemSim_2.34.0          
## [179] spatstat.univar_3.1-3     R.utils_2.13.0           
## [181] lazyeval_0.2.2            shiny_1.10.0             
## [183] htmltools_0.5.8.1         enrichplot_1.28.2        
## [185] GO.db_3.21.0              sctransform_0.4.2        
## [187] glue_1.8.0                spam_2.11-1              
## [189] XVector_0.48.0            VIM_6.2.2                
## [191] treeio_1.32.0             mnormt_2.1.1             
## [193] RMTstat_0.3.1             gridExtra_2.3            
## [195] boot_1.3-31               R6_2.6.1                 
## [197] tidyr_1.3.1               DESeq2_1.48.1            
## [199] vcd_1.4-13                labeling_0.4.3           
## [201] cluster_2.1.8.1           aplot_0.2.5              
## [203] DelayedArray_0.34.1       tidyselect_1.2.1         
## [205] dendsort_0.3.4            car_3.1-3                
## [207] AnnotationDbi_1.70.0      future_1.49.0            
## [209] leidenAlg_1.1.5           KernSmooth_2.23-26       
## [211] laeken_0.5.3              data.table_1.17.2        
## [213] htmlwidgets_1.6.4         fgsea_1.34.0             
## [215] rlang_1.1.6               spatstat.sparse_3.1-0    
## [217] spatstat.explore_3.4-3    Cairo_1.6-2

Manuscript figures

Rasmus Rydbirk

2025-07-07

Setup

Figure 1

Load data

Figures 1b-1f

Figure 1g

Figure 1h

Figure 1i

Figure 1k

Figure 1m

Figure 1o

Figure 2

Load data

Figure 2a

Figure 2b

Figure 2c

Figures 2d,e

Figure 2d

Figure 2e

Figure 3

Load data

Figure 3a

Figure 3b

Figure 3c

Figures 3d,e

Figure 3d

Figure 3e

Figure 3f

Figures 3g,h

Figure 3g

Figure 3h

Figure 4

Load data

Figure 4a

Figure 4b

Figure 4c

Figure 4d

Figure 4e

Figure 4f

Figure 4g

Figure 4h

Figure 5

Load data

Figure 5a

Figure 5b

Figure 5c

Figure 5d

Figure 6

Figure 6a

Figure 6b

Figure 6c

Figure 6d

Figure 6e

Prepare data

Plot

Figure 7

Figures 7a-c

Figure 7d

Figure 7e

Extended Data Figure 1

Load data

Extended Data Figure 1a

Extended Data Figure 1b

Extended Data Figure 1c

Extended Data Figure 1d

Extended Data Figure 1e

Extended Data Figure 1f

Extended Data Figure 2

Load and prepare

Extended Data Figure 2a

Extended Data Figure 2b

Extended Data Figure 2c

Extended Data Figure 2d

Extended Data Figure 2e

Extended Data Figure 2g

Extended Data Figure 3

Load data

Extended Data Figure 3a