Automatic ClustAssess pipeline. Generating the ClustAssess shiny-app • ClustAssess

ClustAssess provide the option to run the entire stability pipeline automatically, by choosing the parameters based on their highest stability. Another useful feature is the option to create, based on this object, a shiny app that user can interact with and perform the assessment of the PhenoGraph configuration and of the clusters.

library(Seurat)
#> Loading required package: SeuratObject
#> Loading required package: sp
#> 
#> Attaching package: 'SeuratObject'
#> The following object is masked from 'package:base':
#> 
#>     intersect
library(ClustAssess)
library(SeuratData)
packageVersion("ClustAssess") # should be 1.0.0
#> [1] '1.0.0'

Process the PBMC 3k Seurat object similarly to the Stability-based parameter assessment vignette.

InstallData("pbmc3k")
#> Warning: The following packages are already installed and will not be
#> reinstalled: pbmc3k
data("pbmc3k")
pbmc3k <- UpdateSeuratObject(pbmc3k)
#> Validating object structure
#> Updating object slots
#> Ensuring keys are in the proper structure
#> Warning: Assay RNA changing from Assay to Assay
#> Ensuring keys are in the proper structure
#> Ensuring feature names don't have underscores or pipes
#> Updating slots in RNA
#> Validating object structure for Assay 'RNA'
#> Object representation is consistent with the most current Seurat version
pbmc3k <- PercentageFeatureSet(pbmc3k, pattern = "^MT-", col.name = "percent.mito")
pbmc3k <- PercentageFeatureSet(pbmc3k, pattern = "^RP[SL][[:digit:]]", col.name = "percent.rp")
# remove MT and RP genes
all.index <- seq_len(nrow(pbmc3k))
MT.index <- grep(pattern = "^MT-", x = rownames(pbmc3k), value = FALSE)
RP.index <- grep(pattern = "^RP[SL][[:digit:]]", x = rownames(pbmc3k), value = FALSE)
pbmc3k <- pbmc3k[!((all.index %in% MT.index) | (all.index %in% RP.index)), ]
pbmc3k <- subset(pbmc3k, nFeature_RNA < 2000 & nCount_RNA < 2500 & percent.mito < 7 & percent.rp > 7)
pbmc3k <- NormalizeData(pbmc3k, verbose = FALSE)
pbmc3k <- FindVariableFeatures(pbmc3k, selection.method = "vst", nfeatures = 3000, verbose = FALSE)

features <- dimnames(pbmc3k@assays$RNA)[[1]]
var_features <- pbmc3k@assays[["RNA"]]@var.features
n_abundant <- 3000
most_abundant_genes <- rownames(pbmc3k@assays$RNA)[order(Matrix::rowSums(pbmc3k@assays$RNA),
    decreasing = TRUE
)]

pbmc3k <- ScaleData(pbmc3k, features = features, verbose = FALSE)

pbmc3k <- RunPCA(pbmc3k,
    npcs = 30,
    approx = FALSE,
    verbose = FALSE,
    features = intersect(most_abundant_genes, pbmc3k@assays$RNA@var.features)
)
pbmc3k <- RunUMAP(pbmc3k,
    reduction = "pca",
    dims = 1:30,
    n.neighbors = 30,
    min.dist = 0.3,
    metric = "cosine",
    verbose = FALSE
)
#> Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
#> To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
#> This message will be shown once per session

Initialise the parallel backend.

RhpcBLASctl::blas_set_num_threads(1)
ncores <- 1
my_cluster <- parallel::makeCluster(
    ncores,
    type = "PSOCK"
)

doParallel::registerDoParallel(cl = my_cluster)

Define the feature sets of interest.

features <- dimnames(pbmc3k@assays$RNA)[[1]]
var_features <- pbmc3k@assays[["RNA"]]@var.features
n_abundant <- 3000
most_abundant_genes <- rownames(pbmc3k@assays$RNA)[order(Matrix::rowSums(pbmc3k@assays$RNA),
    decreasing = TRUE
)]

steps <- seq(from = 500, to = 3000, by = 500)
ma_hv_genes_intersection_sets <- sapply(steps, function(x) intersect(most_abundant_genes[1:x], var_features[1:x]))
ma_hv_genes_intersection <- Reduce(union, ma_hv_genes_intersection_sets)
ma_hv_steps <- sapply(ma_hv_genes_intersection_sets, length)

Apply the automatic assessment.

automm_output <- automatic_stability_assessment(
    expression_matrix = pbmc3k@assays$RNA@scale.data,
    n_repetitions = 10,
    n_neigh_sequence = seq(from = 5, to = 50, by = 5),
    resolution_sequence = seq(from = 0.1, to = 1, by = 0.1),
    features_sets = list(
        "HV" = var_features,
        "MA" = most_abundant_genes[1:3000]
    ),
    steps = list(
        "HV" = steps,
        "MA" = steps
    ),
    n_top_configs = 2,
    umap_arguments = list(
        min_dist = 0.3,
        n_neighbors = 30,
        metric = "cosine"
    ),
    save_temp = FALSE,
    verbose = TRUE
)
#> [2024-04-11 12:11:31.323848] Assessing the stability of the dimensionality reduction
#> [1] "HV"
#> [1] "MA"
#> [2024-04-11 12:17:17.542542] Choosing the top 2
#> [2024-04-11 12:17:17.80667] Assessing the stability of the connected components
#> [2024-04-11 12:17:21.221886] Assessing the stability of the graph construction parameters
#> [2024-04-11 12:18:33.948796] Assessing the stability of the graph clustering method
#> [1] "HV 500"
#> [1] "HV 1000"
#> [1] "MA 2000"
#> [1] "MA 3000"

Close the connections opened when using multiple cores.

foreach::registerDoSEQ()

Create the shiny app based on the ClustAssess output. You should also specify either a seurat object or a normalized expression matrix. Note: Please make sure that the directory mentioned in the parameter project_folder is empty / doesn’t exist.

# generate using a seurat object
write_shiny_app(
    object = pbmc3k,
    assay_name = "RNA",
    clustassess_object = automm_output,
    project_folder = "clustassess_app_dir_seurat",
    shiny_app_title = "PBMC 3k dataset"
)

# generate using a normalized expression matrix
write_shiny_app(
    object = pbmc3k@assays$RNA@data,
    metadata = pbmc3k@meta.data,
    clustassess_object = automm_output,
    project_folder = "clustassess_app_dir_expr",
    shiny_app_title = "PBMC 3k dataset"
)

The app can be run using the following command.

shiny::runApp("clustassess_app_dir_seurat")

Session info

sessionInfo()
#> R version 4.3.3 (2024-02-29 ucrt)
#> Platform: x86_64-w64-mingw32/x64 (64-bit)
#> Running under: Windows 11 x64 (build 22631)
#> 
#> Matrix products: default
#> 
#> 
#> locale:
#> [1] LC_COLLATE=Romanian_Romania.utf8  LC_CTYPE=Romanian_Romania.utf8   
#> [3] LC_MONETARY=Romanian_Romania.utf8 LC_NUMERIC=C                     
#> [5] LC_TIME=Romanian_Romania.utf8    
#> 
#> time zone: Europe/Bucharest
#> tzcode source: internal
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] ClustAssess_1.0.0             Seurat_5.0.3                 
#> [3] SeuratObject_5.0.1            sp_2.1-3                     
#> [5] pbmcMultiome.SeuratData_0.1.4 pbmc3k.SeuratData_3.1.4      
#> [7] SeuratData_0.2.2.9001         devtools_2.4.5               
#> [9] usethis_2.2.3                
#> 
#> loaded via a namespace (and not attached):
#>   [1] RColorBrewer_1.1-3     jsonlite_1.8.8         magrittr_2.0.3        
#>   [4] spatstat.utils_3.0-4   rmarkdown_2.26         fs_1.6.3              
#>   [7] ragg_1.3.0             vctrs_0.6.5            ROCR_1.0-11           
#>  [10] memoise_2.0.1          spatstat.explore_3.2-7 progress_1.2.3        
#>  [13] htmltools_0.5.8        sass_0.4.9             sctransform_0.4.1     
#>  [16] parallelly_1.37.1      KernSmooth_2.23-22     bslib_0.7.0           
#>  [19] htmlwidgets_1.6.4      desc_1.4.3             ica_1.0-3             
#>  [22] plyr_1.8.9             plotly_4.10.4          zoo_1.8-12            
#>  [25] cachem_1.0.8           igraph_2.0.3           iterators_1.0.14      
#>  [28] mime_0.12              lifecycle_1.0.4        pkgconfig_2.0.3       
#>  [31] Matrix_1.6-5           R6_2.5.1               fastmap_1.1.1         
#>  [34] fitdistrplus_1.1-11    future_1.33.2          shiny_1.8.1.1         
#>  [37] digest_0.6.35          colorspace_2.1-0       patchwork_1.2.0       
#>  [40] tensor_1.5             RSpectra_0.16-1        irlba_2.3.5.1         
#>  [43] pkgload_1.3.4          textshaping_0.3.7      progressr_0.14.0      
#>  [46] fansi_1.0.6            spatstat.sparse_3.0-3  httr_1.4.7            
#>  [49] polyclip_1.10-6        abind_1.4-5            compiler_4.3.3        
#>  [52] remotes_2.5.0          doParallel_1.0.17      fastDummies_1.7.3     
#>  [55] pkgbuild_1.4.4         MASS_7.3-60.0.1        rappdirs_0.3.3        
#>  [58] sessioninfo_1.2.2      tools_4.3.3            lmtest_0.9-40         
#>  [61] httpuv_1.6.15          future.apply_1.11.2    goftest_1.2-3         
#>  [64] glue_1.7.0             nlme_3.1-164           promises_1.2.1        
#>  [67] grid_4.3.3             Rtsne_0.17             cluster_2.1.6         
#>  [70] reshape2_1.4.4         generics_0.1.3         gtable_0.3.4          
#>  [73] spatstat.data_3.0-4    tidyr_1.3.1            hms_1.1.3             
#>  [76] data.table_1.15.4      utf8_1.2.4             BiocGenerics_0.48.1   
#>  [79] spatstat.geom_3.2-9    RcppAnnoy_0.0.22       foreach_1.5.2         
#>  [82] ggrepel_0.9.5          RANN_2.6.1             pillar_1.9.0          
#>  [85] stringr_1.5.1          spam_2.10-0            RcppHNSW_0.6.0        
#>  [88] later_1.3.2            splines_4.3.3          dplyr_1.1.4           
#>  [91] lattice_0.22-5         survival_3.5-8         deldir_2.0-4          
#>  [94] tidyselect_1.2.1       miniUI_0.1.1.1         pbapply_1.7-2         
#>  [97] knitr_1.45             gridExtra_2.3          scattermore_1.2       
#> [100] RhpcBLASctl_0.23-42    xfun_0.43              SharedObject_1.16.0   
#> [103] matrixStats_1.2.0      stringi_1.8.3          lazyeval_0.2.2        
#> [106] yaml_2.3.8             evaluate_0.23          codetools_0.2-19      
#> [109] tibble_3.2.1           cli_3.6.2              uwot_0.1.16.9000      
#> [112] xtable_1.8-4           reticulate_1.35.0      systemfonts_1.0.6     
#> [115] munsell_0.5.1          jquerylib_0.1.4        Rcpp_1.0.12           
#> [118] globals_0.16.3         spatstat.random_3.2-3  png_0.1-8             
#> [121] parallel_4.3.3         ellipsis_0.3.2         pkgdown_2.0.7         
#> [124] ggplot2_3.5.0          prettyunits_1.2.0      dotCall64_1.1-1       
#> [127] profvis_0.3.8          urlchecker_1.0.1       listenv_0.9.1         
#> [130] viridisLite_0.4.2      scales_1.3.0           ggridges_0.5.6        
#> [133] crayon_1.5.2           leiden_0.4.3.1         purrr_1.0.2           
#> [136] rlang_1.1.3            cowplot_1.1.3