Setup

library(magrittr)
library(dplyr)
library(CRMetrics) # github.com/khodosevichlab/CRMetrics
library(sccore)
library(qs)
library(scHelper) # github.com/rrydbirk/scHelper

QC per seq run

crm <- CRMetrics$new(data.path = "counts_vis1-2/", 
                     verbose = TRUE, 
                     n.cores = 32, 
                     pal = c(ggsci::pal_npg()(10), ggsci::pal_d3()(3)))
crm$addCms(add.metadata = FALSE)
crm$addDetailedMetrics()

Plot summary metrics

metrics <- crm$selectMetrics(ids = c(1,2,3,4,6,18,19))
crm$plotSummaryMetrics(metrics = metrics)
## Using 'sample' for 'comp.group'
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.

crm$plotDetailedMetrics()
## Using 'sample' for 'comp.group'
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.

Initial embedding

crm$doPreprocessing()
crm$createEmbedding()
crm$plotEmbedding()

Per visit

atlas.meta <- qread("ATLAS_meta.qs")
cell.names <- atlas.meta %>%
  rownames() %>% 
  gsub("_", "!!", .) %>% 
  sapply(\(x) paste0(x,"-1"))

visits <- atlas.meta %>% 
  pull(visit) %>% 
  `names<-`(cell.names)

crm$plotEmbedding(groups = visits, 
                  plot.na = FALSE, 
                  title = "Visit", 
                  mark.groups = FALSE, 
                  show.legend = TRUE) + 
  dotSize(3)

Per donor

donor <- atlas.meta %>% 
  pull(donor) %>% 
  `names<-`(cell.names)

crm$plotEmbedding(groups = donor, 
                  plot.na = FALSE, 
                  title = "Donors", 
                  mark.groups = FALSE)

CellBender

crm$prepareCellbender()
## 2024-11-18 21:52:56.840112 Started run using 13 cores
## 2024-11-18 21:52:56.849491 Loading HDF5 Cell Ranger outputs
## 2024-11-18 21:52:56.976368 Loading 13 count matrices using 13 cores
## 2024-11-18 21:53:39.92825 Done!
## 2024-11-18 21:53:39.929728 Calculating UMI counts per sample
## 2024-11-18 21:53:57.78629 Plotting
## 2024-11-18 21:53:58.169064 Done!
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.

crm$getTotalDroplets()
##  9972  9973  9975  9976  9978  9979  9980  9981  9983  9984  9985  9986  9987 
## 47364 43458 41700 15624 15297 14661 19689 19437 18780 27312 33774 29907 31023
crm$saveCellbenderScript()
crm$saveCellbenderScript(file = "cellbender_script2.sh", samples = "9972", args = "--learning-rate = 0.00001") # Rerun with new settings

Scripts are run in terminal.

Plots

For some samples, we lose a lot of cells.

crm$plotCbCells()
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.

All looks good.

crm$plotCbTraining()
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.

A bit many cells in the first 2-3 samples, everything’s fine.

crm$plotCbCellProbs()
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.

crm$plotCbAmbExp()
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.

crm$plotCbAmbGenes()
## Warning in crm$plotCbAmbGenes(): Palette has 13 colors but there are 15 genes,
## omitting palette.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.

Load and plot cleaned CMs

Let’s load the corrected count matrices from CellBender and plot metrics for these.

crm$data.path <- "counts_vis1-2/"
crm$cms <- NULL
crm$detailed.metrics <- NULL
crm$addCms(cellbender = T)
## 2024-11-18 21:55:15.56019 Loading 13 count matrices using 13 cores
## 2024-11-18 21:55:22.353111 Done!
## Warning in crm$addCms(cellbender = T): Overwriting metadata
## Warning in crm$addCms(cellbender = T): Consider updating embedding by setting $cms.preprocessed <- NULL and $con <- NULL, and running $doPreprocessing() and $createEmbedding().
crm$addSummaryFromCms()
## Warning in crm$addSummaryFromCms(): Overwriting existing summary metrics
## 2024-11-18 21:55:22.360764 Calculating 13 summaries using 13 cores
## 2024-11-18 21:55:32.398345 Done!
crm$plotSummaryMetrics()
## Using 'sample' for 'comp.group'
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.

crm$addDetailedMetrics()
## 2024-11-18 21:55:36.43147 Counting using 13 cores
## 2024-11-18 21:55:49.202377 Creating table
## 2024-11-18 21:55:49.307545 Done!
crm$plotDetailedMetrics()
## Using 'sample' for 'comp.group'
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.

QC per donor

Get demultiplexed CBs

These are the results for demultiplexed cell barcodes.

donor_cb <- data.frame(cell = cell.names,
                       donor_id = atlas.meta %>% 
                         pull(donor))
cm.merge <- crm$cms %>% 
  lapply(\(cm) cm[,colnames(cm) %in% donor_cb$cell]) %>% 
  conos:::mergeCountMatrices()

cb.per.donor <- donor_cb %$%
  split(cell, donor_id)

cms <- cb.per.donor %>% 
  lapply(\(donor) cm.merge[,colnames(cm.merge) %in% donor])

Create embedding

Let’s create an embedding of all cells per donor instead of seq run

crm <- CRMetrics$new(cms = cms, n.cores = n.cores, pal = c(ggsci::pal_npg()(10), ggsci::pal_d3()(3)))
crm$addSummaryFromCms()
crm$addDetailedMetrics()
crm$doPreprocessing()
crm$createEmbedding()

Clusters

crm$plotEmbedding(title = "Clusters")

We can plot cells per donor.

spc <- crm$con$getDatasetPerCell()
crm$plotEmbedding(groups = spc, mark.groups = F, title = "Donors")

Per seq

seqs <- spc %>% 
  names() %>% 
  strsplit("!!") %>% 
  sapply('[[', 2) %>% 
  `names<-`(spc %>% names())

crm$plotEmbedding(groups = seqs, mark.groups = F, title = "Seq runs")

Per visit

visits <- atlas.meta %>% 
  nrow() %>% 
  seq() %>% 
  sapply(\(x) paste0(atlas.meta$donor[x],"!!",gsub("_", "!!", rownames(atlas.meta)[x]),"-1"))

visits %<>%
  `names<-`(atlas.meta %>% 
              pull(visit), .)

crm$plotEmbedding(groups = visits, mark.groups = F, title = "Visits")

Depth

We’ll highlight all cells with UMI count < 1,000. We find some cells with low depth, but they are sparse and evenly distributed.

crm$plotEmbedding(depth = T, depth.cutoff = 1e3)

Instead, let’s look at a cutoff of 600 UMIs/cell. Now, we can see how some of the cells cluster at intersections between clusters which is more likely to be problematic. Nevertheless, we have few cells with low depth.

crm$plotEmbedding(depth = T, depth.cutoff = 6e2)

Similarly, we plot cells per donor with UMI count < 600. We see the cells are evenly distributed across donors.

crm$plotDepth(cutoff = 6e2)

Mitochondrial fraction

Let’s plot cells with high (>5%) mitochondrial fraction. There’s not a lot, valiDrops caught them during demultiplexing. However, we do see some distinct “clusters” or patches of cells with low mito. fraction.

crm$plotEmbedding(mito.frac = T, mito.cutoff = 0.05)

If we plot mito. fraction per donor, we see that cells with high fractions are evenly distributed across donors.

crm$plotMitoFraction()
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.

Doublets

Let’s look at doublets. We already removed some doublets during demultiplexing, but there may be more.

crm$detectDoublets(export = T, method = "doubletdetection", data.path = "doubletdetection/visit1-2/")
## Found 14 results for doubletdetection
## Detected 19 possible doublets out of 63249 cells.
crm$addDoublets(method = "doubletdetection", data.path = "doubletdetection/visit1-2/")

We highlight possible doublets. In this case, we find almost no additional doublets.

crm$plotEmbedding(doublet.method = "doubletdetection")

Filter cells

We plot all cells to be filtered

crm$plotFilteredCells(type = "embedding", depth = T, depth.cutoff = 6e2, mito.frac = T, mito.cutoff = 0.05, doublet.method = "doubletdetection")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

We show proportion of cells to be filtered per donor

crm$plotFilteredCells(type = "bar", depth = T, depth.cutoff = 6e2, mito.frac = T, mito.cutoff = 0.05, doublet.method = "doubletdetection")
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.

Finally, an overview of specific filters per donor. All looks good, no bad samples

crm$plotFilteredCells(type = "tile", depth = T, depth.cutoff = 6e2, mito.frac = T, mito.cutoff = 0.05, doublet.method = "doubletdetection")
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.

We do the filtering.

crm$filterCms(depth.cutoff = 6e2, mito.cutoff = 0.05, doublets = "doubletdetection")
## Filtering based on: depth.cutoff = 600; mito.cutoff = 0.05 and species = human; mito.cutoff = 0.05 and species = mouse; doublet method = doubletdetection
## 2024-11-18 21:57:10.720802 Preparing filter
## 2024-11-18 21:57:12.664355 Removing 1196 of 63249 cells (1.89%)

Remove mito. and ribo. genes

Now, we remove mito. and ribo. genes since we’ve seen they skew several downstream analyses.

genes.to.omit <- crm$cms.filtered %>% 
  lapply(rownames) %>% 
  Reduce(union, .) %>% 
  .[grepl("MT-|RPS|RPL", .)] %>% 
  .[!. %in% c("TRPS1","PURPL")]

crm$cms.filtered %<>% lapply(\(x) x[!rownames(x) %in% genes.to.omit, ])

Split objects per visit

cm.merge <- crm$cms.filtered %>% 
  lapply(\(cm) cm[,colnames(cm) %in% names(visits)]) %>% 
  conos:::mergeCountMatrices()

cms.vis1 <- spc[visits[visits == "Visit1"] %>% names()] %>%
  split(names(.), .) %>% 
  lapply(\(donor) cm.merge[,colnames(cm.merge) %in% donor])

cms.vis2 <- spc[visits[visits == "Visit2"] %>% names()] %>% 
  split(names(.), .) %>% 
  lapply(\(donor) cm.merge[,colnames(cm.merge) %in% donor])