## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----eval=FALSE--------------------------------------------------------------- # suppressMessages(library(SingleCellExperiment)) # sce = SingleCellExperiment(assays = list(counts = cell.rna.raw), colData = cellmd) # # define the dsb normalized values as logcounts to use a common SingleCellExperiment / Bioconductor convention # adt = SummarizedExperiment( # assays = list( # 'counts' = as.matrix(cell.adt.raw), # 'logcounts' = as.matrix(cell.adt.dsb) # ) # ) # altExp(sce, "CITE") = adt ## ----eval = FALSE------------------------------------------------------------- # library(reticulate); sc = import("scanpy") # # # merge dsb-normalized protein and raw RNA data # combined_dat = rbind(cell.rna.raw, cell.adt.dsb) # s[["combined_data"]] = CreateAssayObject(data = combined_dat) # # # create Anndata Object # adata_seurat = sc$AnnData( # X = t(GetAssayData(s,assay = "combined_data")), # obs = seurat@meta.data, # var = GetAssay(seurat)[[]] # ) ## ----eval = FALSE------------------------------------------------------------- # # suggested workflow if isotype controls are not included # dsb_rescaled = DSBNormalizeProtein(cell_protein_matrix = cells_citeseq_mtx, # empty_drop_matrix = empty_drop_citeseq_mtx, # # do not denoise each cell's technical component # denoise.counts = FALSE) ## ----eval = FALSE------------------------------------------------------------- # # dsb_rescaled = dsb::DSBNormalizeProtein(cell_protein_matrix = cells_citeseq_mtx, # empty_drop_matrix = empty_drop_citeseq_mtx, # # denoise with background mean only # denoise.counts = TRUE, # use.isotype.control = FALSE) # ## ----eval=FALSE--------------------------------------------------------------- # # raw = Read10X see above -- path to cell ranger outs/raw_feature_bc_matrix ; # # # partial thresholding to slightly subset negative drops include all with 5 unique mRNAs # seurat_object = CreateSeuratObject(raw, min.genes = 5) # # # demultiplex (positive.quantile can be tuned to dataset depending on size) # seurat_object = HTODemux(seurat_object, assay = "HTO", positive.quantile = 0.99) # Idents(seurat_object) = "HTO_classification.global" # # # subset empty drop/background and cells # neg_object = subset(seurat_object, idents = "Negative") # singlet_object = subset(seurat_object, idents = "Singlet") # # ## (QC the negative object to filter out cells with high RNA content) # # quick example below, different crteria can be used # # this step depends on dataset; see main vignette for more principled filtering # neg_object = subset(seurat_object, idents = "Negative", nGene < 80) # # # # non sparse CITEseq data store more efficiently in a regular matrix # neg_adt_matrix = GetAssayData(neg_object, assay = "CITE", slot = 'counts') %>% as.matrix() # positive_adt_matrix = GetAssayData(singlet_object, assay = "CITE", slot = 'counts') %>% as.matrix() # # # normalize the data with dsb # dsb_norm_prot = DSBNormalizeProtein( # cell_protein_matrix = cells_mtx_rawprot, # empty_drop_matrix = negative_mtx_rawprot, # denoise.counts = TRUE, # use.isotype.control = TRUE, # isotype.control.name.vec = rownames(cells_mtx_rawprot)[30:32]) # # # now add the normalized dat back to the object (the singlets defined above as "object") # singlet_object[["CITE"]] = CreateAssayObject(data = dsb_norm_prot) # # # proceed with same tutorial workflow shown above. ## ----------------------------------------------------------------------------- library(dsb) result.list = DSBNormalizeProtein( cell_protein_matrix = cells_citeseq_mtx[ ,1:50], empty_drop_matrix = empty_drop_citeseq_mtx, denoise.counts = TRUE, use.isotype.control = TRUE, isotype.control.name.vec = rownames(cells_citeseq_mtx)[67:70], return.stats = TRUE ) ## ----------------------------------------------------------------------------- names(result.list) ## ----------------------------------------------------------------------------- names(result.list$protein_stats) ## ----------------------------------------------------------------------------- head(result.list$technical_stats) ## ----eval = FALSE------------------------------------------------------------- # dsb_norm_prot = DSBNormalizeProtein( # cell_protein_matrix = cells_citeseq_mtx, # empty_drop_matrix = empty_drop_citeseq_mtx, # denoise.counts = TRUE, # use.isotype.control = TRUE, # isotype.control.name.vec = rownames(cells_citeseq_mtx)[67:70], # # implement Quantile clipping # quantile.clipping = TRUE # # high and low otlier quantile across proteins to clip # # the `quantile.clip` parameter can be adjusted: # quantile.clip = c(0.001, 0.9995) # ) ## ----eval = FALSE------------------------------------------------------------- # # dsb_norm_prot = DSBNormalizeProtein( # cell_protein_matrix = cells_citeseq_mtx, # empty_drop_matrix = empty_drop_citeseq_mtx, # denoise.counts = TRUE, # use.isotype.control = TRUE, # isotype.control.name.vec = rownames(cells_citeseq_mtx)[67:70], # quantile.clipping = TRUE, # scale.factor = 'mean.subtract' # ) # ## ----eval = FALSE------------------------------------------------------------- # # find outliers # pheatmap::pheatmap(apply(dsb_norm_prot, 1, function(x){ # quantile(x,c(0.9999, 0.99, 0.98, 0.95, 0.0001, 0.01, 0.1)) # })) # ## ----eval=FALSE--------------------------------------------------------------- # # dsb_object = DSBNormalizeProtein(cell_protein_matrix = dsb::cells_citeseq_mtx, # empty_drop_matrix = dsb::empty_drop_citeseq_mtx, # denoise.counts = TRUE, # isotype.control.name.vec = rownames(dsb::cells_citeseq_mtx)[67:70], # return.stats = TRUE) # d = as.data.frame(dsb_object$dsb_stats) # # # test correlation of background mean with the inferred dsb technical component # cor(d$cellwise_background_mean, d$dsb_technical_component) # # # test average isotype control value correlation with the background mean # isotype_names = rownames(dsb::cells_citeseq_mtx)[67:70] # cor(rowMeans(d[,isotype_names]), d$cellwise_background_mean) # ## ----eval = FALSE------------------------------------------------------------- # library(Seurat) # for Read10X helper function # # # path_to_reads = here("data/") # umi.files = list.files(path_to_reads, full.names=T, pattern = "10x" ) # umi.list = lapply(umi.files, function(x) Read10X(data.dir = paste0(x,"/outs/raw_feature_bc_matrix/"))) # prot = rna = list() # for (i in 1:length(umi.list)) { # prot[[i]] = umi.list[[i]]`Antibody Capture` # rna[[i]] = umi.list[[i]]`Gene Expression` # colnames(prot[[i]]) = paste0(colnames(prot[[i]]),"_", i ) # colnames(rna[[i]]) = paste0(colnames(rna[[i]]),"_", i ) # } # prot = do.call(cbind, prot) # rna = do.call(cbind, rna) # # proceed with step 1 in tutorial - define background and cell containing drops for dsb #