library(magrittr)
library(dplyr)
library(CRMetrics) # github.com/khodosevichlab/CRMetrics
library(sccore)
library(qs)
library(scHelper) # github.com/rrydbirk/scHelper
crm <- CRMetrics$new(data.path = "counts_vis1-2/",
verbose = TRUE,
n.cores = 32,
pal = c(ggsci::pal_npg()(10), ggsci::pal_d3()(3)))
crm$addCms(add.metadata = FALSE)
crm$addDetailedMetrics()
metrics <- crm$selectMetrics(ids = c(1,2,3,4,6,18,19))
crm$plotSummaryMetrics(metrics = metrics)
## Using 'sample' for 'comp.group'
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
crm$plotDetailedMetrics()
## Using 'sample' for 'comp.group'
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
crm$doPreprocessing()
crm$createEmbedding()
crm$plotEmbedding()
atlas.meta <- qread("ATLAS_meta.qs")
cell.names <- atlas.meta %>%
rownames() %>%
gsub("_", "!!", .) %>%
sapply(\(x) paste0(x,"-1"))
visits <- atlas.meta %>%
pull(visit) %>%
`names<-`(cell.names)
crm$plotEmbedding(groups = visits,
plot.na = FALSE,
title = "Visit",
mark.groups = FALSE,
show.legend = TRUE) +
dotSize(3)
donor <- atlas.meta %>%
pull(donor) %>%
`names<-`(cell.names)
crm$plotEmbedding(groups = donor,
plot.na = FALSE,
title = "Donors",
mark.groups = FALSE)
crm$prepareCellbender()
## 2024-11-18 21:52:56.840112 Started run using 13 cores
## 2024-11-18 21:52:56.849491 Loading HDF5 Cell Ranger outputs
## 2024-11-18 21:52:56.976368 Loading 13 count matrices using 13 cores
## 2024-11-18 21:53:39.92825 Done!
## 2024-11-18 21:53:39.929728 Calculating UMI counts per sample
## 2024-11-18 21:53:57.78629 Plotting
## 2024-11-18 21:53:58.169064 Done!
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
crm$getTotalDroplets()
## 9972 9973 9975 9976 9978 9979 9980 9981 9983 9984 9985 9986 9987
## 47364 43458 41700 15624 15297 14661 19689 19437 18780 27312 33774 29907 31023
crm$saveCellbenderScript()
crm$saveCellbenderScript(file = "cellbender_script2.sh", samples = "9972", args = "--learning-rate = 0.00001") # Rerun with new settings
Scripts are run in terminal.
For some samples, we lose a lot of cells.
crm$plotCbCells()
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
All looks good.
crm$plotCbTraining()
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
A bit many cells in the first 2-3 samples, everything’s fine.
crm$plotCbCellProbs()
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
crm$plotCbAmbExp()
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
crm$plotCbAmbGenes()
## Warning in crm$plotCbAmbGenes(): Palette has 13 colors but there are 15 genes,
## omitting palette.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
Let’s load the corrected count matrices from CellBender and plot metrics for these.
crm$data.path <- "counts_vis1-2/"
crm$cms <- NULL
crm$detailed.metrics <- NULL
crm$addCms(cellbender = T)
## 2024-11-18 21:55:15.56019 Loading 13 count matrices using 13 cores
## 2024-11-18 21:55:22.353111 Done!
## Warning in crm$addCms(cellbender = T): Overwriting metadata
## Warning in crm$addCms(cellbender = T): Consider updating embedding by setting $cms.preprocessed <- NULL and $con <- NULL, and running $doPreprocessing() and $createEmbedding().
crm$addSummaryFromCms()
## Warning in crm$addSummaryFromCms(): Overwriting existing summary metrics
## 2024-11-18 21:55:22.360764 Calculating 13 summaries using 13 cores
## 2024-11-18 21:55:32.398345 Done!
crm$plotSummaryMetrics()
## Using 'sample' for 'comp.group'
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
crm$addDetailedMetrics()
## 2024-11-18 21:55:36.43147 Counting using 13 cores
## 2024-11-18 21:55:49.202377 Creating table
## 2024-11-18 21:55:49.307545 Done!
crm$plotDetailedMetrics()
## Using 'sample' for 'comp.group'
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
These are the results for demultiplexed cell barcodes.
donor_cb <- data.frame(cell = cell.names,
donor_id = atlas.meta %>%
pull(donor))
cm.merge <- crm$cms %>%
lapply(\(cm) cm[,colnames(cm) %in% donor_cb$cell]) %>%
conos:::mergeCountMatrices()
cb.per.donor <- donor_cb %$%
split(cell, donor_id)
cms <- cb.per.donor %>%
lapply(\(donor) cm.merge[,colnames(cm.merge) %in% donor])
Let’s create an embedding of all cells per donor instead of seq run
crm <- CRMetrics$new(cms = cms, n.cores = n.cores, pal = c(ggsci::pal_npg()(10), ggsci::pal_d3()(3)))
crm$addSummaryFromCms()
crm$addDetailedMetrics()
crm$doPreprocessing()
crm$createEmbedding()
crm$plotEmbedding(title = "Clusters")
We can plot cells per donor.
spc <- crm$con$getDatasetPerCell()
crm$plotEmbedding(groups = spc, mark.groups = F, title = "Donors")
seqs <- spc %>%
names() %>%
strsplit("!!") %>%
sapply('[[', 2) %>%
`names<-`(spc %>% names())
crm$plotEmbedding(groups = seqs, mark.groups = F, title = "Seq runs")
visits <- atlas.meta %>%
nrow() %>%
seq() %>%
sapply(\(x) paste0(atlas.meta$donor[x],"!!",gsub("_", "!!", rownames(atlas.meta)[x]),"-1"))
visits %<>%
`names<-`(atlas.meta %>%
pull(visit), .)
crm$plotEmbedding(groups = visits, mark.groups = F, title = "Visits")
We’ll highlight all cells with UMI count < 1,000. We find some cells with low depth, but they are sparse and evenly distributed.
crm$plotEmbedding(depth = T, depth.cutoff = 1e3)
Instead, let’s look at a cutoff of 600 UMIs/cell. Now, we can see how some of the cells cluster at intersections between clusters which is more likely to be problematic. Nevertheless, we have few cells with low depth.
crm$plotEmbedding(depth = T, depth.cutoff = 6e2)
Similarly, we plot cells per donor with UMI count < 600. We see the cells are evenly distributed across donors.
crm$plotDepth(cutoff = 6e2)
Let’s plot cells with high (>5%) mitochondrial fraction. There’s not a lot, valiDrops caught them during demultiplexing. However, we do see some distinct “clusters” or patches of cells with low mito. fraction.
crm$plotEmbedding(mito.frac = T, mito.cutoff = 0.05)
If we plot mito. fraction per donor, we see that cells with high fractions are evenly distributed across donors.
crm$plotMitoFraction()
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
Let’s look at doublets. We already removed some doublets during demultiplexing, but there may be more.
crm$detectDoublets(export = T, method = "doubletdetection", data.path = "doubletdetection/visit1-2/")
## Found 14 results for doubletdetection
## Detected 19 possible doublets out of 63249 cells.
crm$addDoublets(method = "doubletdetection", data.path = "doubletdetection/visit1-2/")
We highlight possible doublets. In this case, we find almost no additional doublets.
crm$plotEmbedding(doublet.method = "doubletdetection")
We plot all cells to be filtered
crm$plotFilteredCells(type = "embedding", depth = T, depth.cutoff = 6e2, mito.frac = T, mito.cutoff = 0.05, doublet.method = "doubletdetection")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
We show proportion of cells to be filtered per donor
crm$plotFilteredCells(type = "bar", depth = T, depth.cutoff = 6e2, mito.frac = T, mito.cutoff = 0.05, doublet.method = "doubletdetection")
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
Finally, an overview of specific filters per donor. All looks good, no bad samples
crm$plotFilteredCells(type = "tile", depth = T, depth.cutoff = 6e2, mito.frac = T, mito.cutoff = 0.05, doublet.method = "doubletdetection")
## Warning in plot_theme(plot): The `legend.text.align` theme element is not
## defined in the element hierarchy.
## Warning in plot_theme(plot): The `legend.title.align` theme element is not
## defined in the element hierarchy.
We do the filtering.
crm$filterCms(depth.cutoff = 6e2, mito.cutoff = 0.05, doublets = "doubletdetection")
## Filtering based on: depth.cutoff = 600; mito.cutoff = 0.05 and species = human; mito.cutoff = 0.05 and species = mouse; doublet method = doubletdetection
## 2024-11-18 21:57:10.720802 Preparing filter
## 2024-11-18 21:57:12.664355 Removing 1196 of 63249 cells (1.89%)
Now, we remove mito. and ribo. genes since we’ve seen they skew several downstream analyses.
genes.to.omit <- crm$cms.filtered %>%
lapply(rownames) %>%
Reduce(union, .) %>%
.[grepl("MT-|RPS|RPL", .)] %>%
.[!. %in% c("TRPS1","PURPL")]
crm$cms.filtered %<>% lapply(\(x) x[!rownames(x) %in% genes.to.omit, ])
Split objects per visit
cm.merge <- crm$cms.filtered %>%
lapply(\(cm) cm[,colnames(cm) %in% names(visits)]) %>%
conos:::mergeCountMatrices()
cms.vis1 <- spc[visits[visits == "Visit1"] %>% names()] %>%
split(names(.), .) %>%
lapply(\(donor) cm.merge[,colnames(cm.merge) %in% donor])
cms.vis2 <- spc[visits[visits == "Visit2"] %>% names()] %>%
split(names(.), .) %>%
lapply(\(donor) cm.merge[,colnames(cm.merge) %in% donor])