ROSMAP snRNA-seq: QC to Clustering

Example pipeline to perform snRNA-seq data analysis using the ROSMAP data

First, dowload the snRNA-seq count matrix and associated meta tables into a folder, say, “Data”. In this tutorial, we will download the filtered data provided by Mathys et al from the Synapse at https://www.synapse.org/#!Synapse:syn18681734. After downloading, the Data folder should have the following files: filtered_column_metadata.txt, filtered_count_matrix.mtx, and filtered_gene_row_names.txt. Additionally, we can also download files 41586_2019_1195_MOESM3_ESM.xlsx and 41586_2019_1195_MOESM5_ESM.xlsx from the Supplementary Tables 1 &3 of the Mathys et al paper at https://www.nature.com/articles/s41586-019-1195-2#additional-information, as well as a mapping key file “snRNAseqPFC_BA10_id_mapping.csv” (https://www.synapse.org/#!Synapse:syn18694015) and a biospecimen meta file “snRNAseqPFC_BA10_biospecimen_metadata.csv” (https://www.synapse.org/#!Synapse:syn18642936) to add in more sample demographic information.

Load necessary libraries and set up input and output locations.

library(ggplot2)
library(Seurat)
library(dplyr)
library(patchwork)
library(Matrix)
library(openxlsx)
indir = 'Data/'
outdir = "Output/"
if(!file.exists(outdir)) dir.create(outdir, recursive = TRUE)

Let us read in sample meta data.

key = read.csv(paste0(indir, 'snRNAseqPFC_BA10_id_mapping.csv'), as.is = TRUE, header = TRUE)
key = unique(key[, c('projid', 'Subject')])
natsup1 = read.xlsx(paste0(indir, '41586_2019_1195_MOESM3_ESM.xlsx'), sheet = "S1")
natsup1$Diagnosis = c(YES = 'AD', NO = 'Control')[natsup1[, 'pathologic.diagnosis.of.AD']]
natsup3 = read.xlsx(paste0(indir, '41586_2019_1195_MOESM5_ESM.xlsx'), sheet = 1)
natsup = data.frame(natsup1, natsup3[match(natsup1$Subject, natsup3$Subject), !colnames(natsup3) %in% colnames(natsup1)], stringsAsFactors = FALSE)
biospecimen = read.csv(paste0(indir, 'snRNAseqPFC_BA10_biospecimen_metadata.csv'), as.is=TRUE, header = TRUE)
natsup$projid = key[match(natsup$Subject, key$Subject), 'projid']
natsup$individualID = biospecimen[match(natsup$projid, biospecimen$projid), 'individualID']
head(natsup, 5)

##   Subject study          age_death educ msex      gpath amyloid     plaq_n
## 1    ROS1  ROS                 90+   23    0 0.01718225       0 0.00000000
## 2    ROS2  ROS                 90+   24    0 0.03055274       0 0.00000000
## 3    ROS3  ROS  79.115674195756327   18    0 0.08986113       0 0.00000000
## 4    ROS4  ROS                 90+   18    0 0.03985507       0 0.03098783
## 5    ROS5  ROS  88.925393566050644   16    0 0.02080884       0 0.00000000
##   cogdx pathologic.diagnosis.of.AD Diagnosis        nft   tangles
## 1     1                         NO   Control 0.02975843 0.3504917
## 2     1                         NO   Control 0.09166858 0.8630875
## 3     1                         NO   Control 0.11891828 0.2748281
## 4     4                         NO   Control 0.08918715 0.6956833
## 5     1                         NO   Control 0.06252178 0.8345100
##   cogn_global_lv gpath_3neocort amyloid.group caa_4gp ceradsc braaksc
## 1     1.18041075    0.012015007           low       0       4       1
## 2     0.10671595    0.000000000           low       0       4       3
## 3     1.19261623    0.082914650           low       1       4       3
## 4    -1.70813203    0.039258059           low       3       3       2
## 5    -0.02834499    0.009967699           low       0       4       4
##   niareagansc pathology.group   projid individualID
## 1           3    no-pathology 20207013     R2880377
## 2           3    no-pathology 21412626     R4739508
## 3           3    no-pathology 20956867     R8725848
## 4           3    no-pathology 20104101     R4567280
## 5           3    no-pathology 21142003     R7288382

Write compiled sample meta table into a file.

write.table(natsup, paste0(outdir, "meta.samples.tsv"), quote = FALSE, sep = '\t', row.names = FALSE, col.names = TRUE)

Now read in single cell data.

meta = read.table(paste0(indir, 'filtered_column_metadata.txt'), as.is = TRUE, header = TRUE)
colnames(meta)[colnames(meta) == 'TAG'] = 'Barcode'
meta = data.frame(meta, natsup[match(meta$projid, natsup$projid), colnames(natsup) != 'projid'], stringsAsFactors = FALSE)
rownames(meta) = meta$Barcode
#read in count matrix
dat = readMM(paste0(indir, 'filtered_count_matrix.mtx'))
features = scan(paste0(indir, 'filtered_gene_row_names.txt'), what = character())
rownames(dat) = features
colnames(dat) = meta$Barcode

Create Seurat object from the count matrix.

SeuratObject = CreateSeuratObject(counts = dat, project = "SeuratObjectProject", min.cells = 3, min.features = 200, names.field=1, names.delim='.', meta.data=meta)
SeuratObject@meta.data$orig.ident = NULL
SeuratObject@assays$RNA@meta.features = data.frame(Geneid = rownames(SeuratObject), Symbol = rownames(SeuratObject), row.names = rownames(SeuratObject), stringsAsFactors = FALSE)
rm(dat)

Note that the mitochondrial reads has been removed by the original study from the count matrix we are using here. However to check the fraction of mitochondrial reads in a new dataset, execute a command like the following

SeuratObject = PercentageFeatureSet(SeuratObject, pattern = "^MT-", col.name = "percent.mt")
summary(SeuratObject[[]]$percent.mt)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0       0       0       0       0

Now we can check the data qulity by first plotting the distribution of number of features and total read counts.

meta1 = SeuratObject@meta.data
write.table(meta1, file=paste0(outdir, 'meta.seurat.raw.tsv'), quote = FALSE, sep = '\t', row.names = FALSE)
meta1 = meta1[rev(order(meta1$Diagnosis)), ]
meta1$sid = unsplit(lapply(split(meta1, meta1$Diagnosis), function(x) paste0(c(AD = 'AD', Control = 'Ct')[x$Diagnosis[1]], as.integer(factor(x$projid, level = unique(x$projid))))), meta1$Diagnosis)
meta1$sid = factor(meta1$sid, levels = unique(meta1$sid))
theme_bw2 = function() theme_bw() + theme(panel.grid = element_blank())
p1 = ggplot(meta1, aes(x = sid, y = nFeature_RNA, fill = Diagnosis)) + geom_violin(width = 0.8, scale = 'width') + theme_bw2() + theme(legend.position = 'none', axis.text.x = element_text(angle = 45, size = 6, hjust = 1), axis.text.y = element_text(size = 7)) + xlab(NULL)
p2 = ggplot(meta1, aes(x = sid, y = nCount_RNA, fill = Diagnosis)) + geom_violin(width = 0.8, scale = 'width') + theme_bw2() + theme(legend.position = 'none', axis.text.x = element_text(angle = 45, size = 6, hjust = 1), axis.text.y = element_text(size = 7)) + xlab(NULL)
print(p1 / p2)

We can also use FeatureScatter to visualize feature-feature relationships per donor.

SeuratObject@meta.data$projid = factor(SeuratObject@meta.data$projid)
p3 = FeatureScatter(SeuratObject, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", pt.size=0.1, group.by='projid') + theme(title=element_text(size=8), legend.text=element_text(size=6), legend.title=element_text(size=7))
print(p3)

Perform some simple quality control (QC) by first filtering out cells that have unique gene counts over 6,000 or less than 200. The filter on percent.mt has no actual effect here and is just used for illustration purpose.

SeuratObject

## An object of class Seurat 
## 17775 features across 70634 samples within 1 assay 
## Active assay: RNA (17775 features)

SeuratObject = subset(SeuratObject, subset = nFeature_RNA > 200 & nFeature_RNA < 6000 & percent.mt < 5)
SeuratObject

## An object of class Seurat 
## 17775 features across 69787 samples within 1 assay 
## Active assay: RNA (17775 features)

After QC, normalize the count data using Seurat’s SCTransform function. This may take a while to finish.

SeuratObject = SCTransform(SeuratObject, verbose = TRUE)

## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |======================================================================| 100%
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |============================                                          |  41%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |=====================================                                 |  52%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |==========================================                            |  59%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |===================================================================== |  99%
  |                                                                            
  |======================================================================| 100%
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |============================                                          |  41%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |=====================================                                 |  52%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |==========================================                            |  59%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |===================================================================== |  99%
  |                                                                            
  |======================================================================| 100%

Extract gene-level statistics and make plots such as the detection rate.

genes = SeuratObject@assays$RNA@meta.features[rownames(SeuratObject), ]
genes = data.frame(genes, SeuratObject@assays[['SCT']]@meta.features, row.names = rownames(genes), stringsAsFactors = FALSE)
hist(genes[, 'sct.detection_rate'])

Perform dimensionality reduction by PCA.

SeuratObject = RunPCA(SeuratObject, verbose = FALSE)
#Plot PCA elbow
ElbowPlot(SeuratObject, ndims = 50)

After inspecting the elbow plot, we choose principal components 1:30 for UMAP/tSEN visualization and clustering.

SeuratObject = RunUMAP(SeuratObject, dims = 1:30, verbose = FALSE)
SeuratObject = RunTSNE(SeuratObject, dims = 1:30, verbose = FALSE)
SeuratObject = FindNeighbors(SeuratObject, verbose = FALSE)
SeuratObject = FindClusters(SeuratObject, resolution = 0.8, verbose = FALSE)

Plot UMAP.

p1 <- DimPlot(SeuratObject, reduction = "umap", label = TRUE)

## Warning: Using `as.character()` on a quosure is deprecated as of rlang 0.3.0.
## Please use `as_label()` or `as_name()` instead.
## This warning is displayed once per session.

print(p1)

Save the SeuratObject into a file.

saveRDS(SeuratObject, file = paste0(outdir, 'SeuratObject.RDS'))

A precomputed version of the SeuratObject file is available at https://www.synapse.org/#!Synapse:syn26477245

Session info

sessionInfo()

## R version 3.6.0 (2019-04-26)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
## 
## Matrix products: default
## BLAS/LAPACK: /hpc/packages/minerva-common/intel/parallel_studio_xe_2018/compilers_and_libraries_2018.1.163/linux/mkl/lib/intel64_lin/libmkl_gf_lp64.so
## 
## locale:
## [1] C
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] openxlsx_4.1.4  Matrix_1.3-2    patchwork_1.1.1 dplyr_1.0.6    
## [5] Seurat_3.1.4    ggplot2_3.3.3  
## 
## loaded via a namespace (and not attached):
##   [1] TH.data_1.0-10      Rtsne_0.15          colorspace_2.0-1   
##   [4] ellipsis_0.3.2      ggridges_0.5.1      farver_2.1.0       
##   [7] leiden_0.3.1        listenv_0.8.0       npsurv_0.4-0       
##  [10] ggrepel_0.9.1       RSpectra_0.16-0     fansi_0.5.0        
##  [13] mvtnorm_1.0-11      codetools_0.2-16    splines_3.6.0      
##  [16] mnormt_1.5-5        lsei_1.2-0          knitr_1.26         
##  [19] TFisher_0.2.0       jsonlite_1.7.2      ica_1.0-2          
##  [22] cluster_2.0.8       png_0.1-7           uwot_0.1.5         
##  [25] sctransform_0.2.1   compiler_3.6.0      httr_1.4.1         
##  [28] assertthat_0.2.1    lazyeval_0.2.2      htmltools_0.5.0    
##  [31] tools_3.6.0         rsvd_1.0.2          igraph_1.2.6       
##  [34] gtable_0.3.0        glue_1.4.2          RANN_2.6.1         
##  [37] reshape2_1.4.4      rappdirs_0.3.3      Rcpp_1.0.6         
##  [40] Biobase_2.46.0      vctrs_0.3.8         multtest_2.42.0    
##  [43] gdata_2.18.0        ape_5.3             nlme_3.1-139       
##  [46] gbRd_0.4-11         lmtest_0.9-37       xfun_0.12          
##  [49] stringr_1.4.0       globals_0.14.0      lifecycle_1.0.0    
##  [52] irlba_2.3.3         gtools_3.8.1        future_1.21.0      
##  [55] MASS_7.3-51.4       zoo_1.8-6           scales_1.1.1       
##  [58] parallel_3.6.0      sandwich_2.5-1      RColorBrewer_1.1-2 
##  [61] yaml_2.2.1          reticulate_1.20     pbapply_1.4-3      
##  [64] gridExtra_2.3       stringi_1.6.2       mutoss_0.1-12      
##  [67] plotrix_3.7-7       caTools_1.18.0      BiocGenerics_0.32.0
##  [70] zip_2.0.4           bibtex_0.4.2.1      Rdpack_0.11-1      
##  [73] rlang_0.4.11        pkgconfig_2.0.3     bitops_1.0-6       
##  [76] evaluate_0.14       lattice_0.20-38     ROCR_1.0-7         
##  [79] purrr_0.3.4         labeling_0.4.2      htmlwidgets_1.5.1  
##  [82] cowplot_1.1.1       tidyselect_1.1.1    parallelly_1.25.0  
##  [85] RcppAnnoy_0.0.14    plyr_1.8.6          magrittr_2.0.1     
##  [88] R6_2.5.0            gplots_3.0.3        generics_0.1.0     
##  [91] multcomp_1.4-11     DBI_1.1.0           pillar_1.6.1       
##  [94] withr_2.4.2         sn_1.5-4            fitdistrplus_1.0-14
##  [97] survival_2.44-1.1   tibble_3.1.2        future.apply_1.7.0 
## [100] tsne_0.1-3          crayon_1.4.1        KernSmooth_2.23-15 
## [103] utf8_1.2.1          plotly_4.9.2.1      rmarkdown_1.12     
## [106] grid_3.6.0          data.table_1.12.8   blob_1.2.0         
## [109] metap_1.2           digest_0.6.27       tidyr_1.1.3        
## [112] numDeriv_2016.8-1.1 RcppParallel_4.4.4  stats4_3.6.0       
## [115] munsell_0.5.0       viridisLite_0.4.0

ROSMAP snRNA-seq: QC to Clustering

Minghui Wang

November 23, 2021

Example pipeline to perform snRNA-seq data analysis using the ROSMAP data

Session info