vignettes/stability-pipeline-shiny.Rmd
stability-pipeline-shiny.RmdClustAssess provide the option to run the entire
stability pipeline automatically, by choosing the parameters based on
their highest stability. Another useful feature is the option to create,
based on this object, a shiny app that user can interact with and
perform the assessment of the PhenoGraph configuration and of the
clusters.
library(Seurat)
#> Loading required package: SeuratObject
#> Loading required package: sp
#>
#> Attaching package: 'SeuratObject'
#> The following object is masked from 'package:base':
#>
#> intersect
library(ClustAssess)
library(SeuratData)
packageVersion("ClustAssess") # should be 1.0.0
#> [1] '1.0.0'Process the PBMC 3k Seurat object similarly to the
Stability-based parameter assessment vignette.
InstallData("pbmc3k")
#> Warning: The following packages are already installed and will not be
#> reinstalled: pbmc3k
data("pbmc3k")
pbmc3k <- UpdateSeuratObject(pbmc3k)
#> Validating object structure
#> Updating object slots
#> Ensuring keys are in the proper structure
#> Warning: Assay RNA changing from Assay to Assay
#> Ensuring keys are in the proper structure
#> Ensuring feature names don't have underscores or pipes
#> Updating slots in RNA
#> Validating object structure for Assay 'RNA'
#> Object representation is consistent with the most current Seurat version
pbmc3k <- PercentageFeatureSet(pbmc3k, pattern = "^MT-", col.name = "percent.mito")
pbmc3k <- PercentageFeatureSet(pbmc3k, pattern = "^RP[SL][[:digit:]]", col.name = "percent.rp")
# remove MT and RP genes
all.index <- seq_len(nrow(pbmc3k))
MT.index <- grep(pattern = "^MT-", x = rownames(pbmc3k), value = FALSE)
RP.index <- grep(pattern = "^RP[SL][[:digit:]]", x = rownames(pbmc3k), value = FALSE)
pbmc3k <- pbmc3k[!((all.index %in% MT.index) | (all.index %in% RP.index)), ]
pbmc3k <- subset(pbmc3k, nFeature_RNA < 2000 & nCount_RNA < 2500 & percent.mito < 7 & percent.rp > 7)
pbmc3k <- NormalizeData(pbmc3k, verbose = FALSE)
pbmc3k <- FindVariableFeatures(pbmc3k, selection.method = "vst", nfeatures = 3000, verbose = FALSE)
features <- dimnames(pbmc3k@assays$RNA)[[1]]
var_features <- pbmc3k@assays[["RNA"]]@var.features
n_abundant <- 3000
most_abundant_genes <- rownames(pbmc3k@assays$RNA)[order(Matrix::rowSums(pbmc3k@assays$RNA),
decreasing = TRUE
)]
pbmc3k <- ScaleData(pbmc3k, features = features, verbose = FALSE)
pbmc3k <- RunPCA(pbmc3k,
npcs = 30,
approx = FALSE,
verbose = FALSE,
features = intersect(most_abundant_genes, pbmc3k@assays$RNA@var.features)
)
pbmc3k <- RunUMAP(pbmc3k,
reduction = "pca",
dims = 1:30,
n.neighbors = 30,
min.dist = 0.3,
metric = "cosine",
verbose = FALSE
)
#> Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
#> To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
#> This message will be shown once per sessionInitialise the parallel backend.
RhpcBLASctl::blas_set_num_threads(1)
ncores <- 1
my_cluster <- parallel::makeCluster(
ncores,
type = "PSOCK"
)
doParallel::registerDoParallel(cl = my_cluster)Define the feature sets of interest.
features <- dimnames(pbmc3k@assays$RNA)[[1]]
var_features <- pbmc3k@assays[["RNA"]]@var.features
n_abundant <- 3000
most_abundant_genes <- rownames(pbmc3k@assays$RNA)[order(Matrix::rowSums(pbmc3k@assays$RNA),
decreasing = TRUE
)]
steps <- seq(from = 500, to = 3000, by = 500)
ma_hv_genes_intersection_sets <- sapply(steps, function(x) intersect(most_abundant_genes[1:x], var_features[1:x]))
ma_hv_genes_intersection <- Reduce(union, ma_hv_genes_intersection_sets)
ma_hv_steps <- sapply(ma_hv_genes_intersection_sets, length)Apply the automatic assessment.
automm_output <- automatic_stability_assessment(
expression_matrix = pbmc3k@assays$RNA@scale.data,
n_repetitions = 10,
n_neigh_sequence = seq(from = 5, to = 50, by = 5),
resolution_sequence = seq(from = 0.1, to = 1, by = 0.1),
features_sets = list(
"HV" = var_features,
"MA" = most_abundant_genes[1:3000]
),
steps = list(
"HV" = steps,
"MA" = steps
),
n_top_configs = 2,
umap_arguments = list(
min_dist = 0.3,
n_neighbors = 30,
metric = "cosine"
),
save_temp = FALSE,
verbose = TRUE
)
#> [2024-04-11 12:11:31.323848] Assessing the stability of the dimensionality reduction
#> [1] "HV"
#> [1] "MA"
#> [2024-04-11 12:17:17.542542] Choosing the top 2
#> [2024-04-11 12:17:17.80667] Assessing the stability of the connected components
#> [2024-04-11 12:17:21.221886] Assessing the stability of the graph construction parameters
#> [2024-04-11 12:18:33.948796] Assessing the stability of the graph clustering method
#> [1] "HV 500"
#> [1] "HV 1000"
#> [1] "MA 2000"
#> [1] "MA 3000"Close the connections opened when using multiple cores.
foreach::registerDoSEQ()Create the shiny app based on the ClustAssess output. You should also
specify either a seurat object or a normalized expression matrix.
Note: Please make sure that the directory mentioned in the
parameter project_folder is empty / doesn’t exist.
# generate using a seurat object
write_shiny_app(
object = pbmc3k,
assay_name = "RNA",
clustassess_object = automm_output,
project_folder = "clustassess_app_dir_seurat",
shiny_app_title = "PBMC 3k dataset"
)
# generate using a normalized expression matrix
write_shiny_app(
object = pbmc3k@assays$RNA@data,
metadata = pbmc3k@meta.data,
clustassess_object = automm_output,
project_folder = "clustassess_app_dir_expr",
shiny_app_title = "PBMC 3k dataset"
)The app can be run using the following command.
shiny::runApp("clustassess_app_dir_seurat")Session info
sessionInfo()
#> R version 4.3.3 (2024-02-29 ucrt)
#> Platform: x86_64-w64-mingw32/x64 (64-bit)
#> Running under: Windows 11 x64 (build 22631)
#>
#> Matrix products: default
#>
#>
#> locale:
#> [1] LC_COLLATE=Romanian_Romania.utf8 LC_CTYPE=Romanian_Romania.utf8
#> [3] LC_MONETARY=Romanian_Romania.utf8 LC_NUMERIC=C
#> [5] LC_TIME=Romanian_Romania.utf8
#>
#> time zone: Europe/Bucharest
#> tzcode source: internal
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] ClustAssess_1.0.0 Seurat_5.0.3
#> [3] SeuratObject_5.0.1 sp_2.1-3
#> [5] pbmcMultiome.SeuratData_0.1.4 pbmc3k.SeuratData_3.1.4
#> [7] SeuratData_0.2.2.9001 devtools_2.4.5
#> [9] usethis_2.2.3
#>
#> loaded via a namespace (and not attached):
#> [1] RColorBrewer_1.1-3 jsonlite_1.8.8 magrittr_2.0.3
#> [4] spatstat.utils_3.0-4 rmarkdown_2.26 fs_1.6.3
#> [7] ragg_1.3.0 vctrs_0.6.5 ROCR_1.0-11
#> [10] memoise_2.0.1 spatstat.explore_3.2-7 progress_1.2.3
#> [13] htmltools_0.5.8 sass_0.4.9 sctransform_0.4.1
#> [16] parallelly_1.37.1 KernSmooth_2.23-22 bslib_0.7.0
#> [19] htmlwidgets_1.6.4 desc_1.4.3 ica_1.0-3
#> [22] plyr_1.8.9 plotly_4.10.4 zoo_1.8-12
#> [25] cachem_1.0.8 igraph_2.0.3 iterators_1.0.14
#> [28] mime_0.12 lifecycle_1.0.4 pkgconfig_2.0.3
#> [31] Matrix_1.6-5 R6_2.5.1 fastmap_1.1.1
#> [34] fitdistrplus_1.1-11 future_1.33.2 shiny_1.8.1.1
#> [37] digest_0.6.35 colorspace_2.1-0 patchwork_1.2.0
#> [40] tensor_1.5 RSpectra_0.16-1 irlba_2.3.5.1
#> [43] pkgload_1.3.4 textshaping_0.3.7 progressr_0.14.0
#> [46] fansi_1.0.6 spatstat.sparse_3.0-3 httr_1.4.7
#> [49] polyclip_1.10-6 abind_1.4-5 compiler_4.3.3
#> [52] remotes_2.5.0 doParallel_1.0.17 fastDummies_1.7.3
#> [55] pkgbuild_1.4.4 MASS_7.3-60.0.1 rappdirs_0.3.3
#> [58] sessioninfo_1.2.2 tools_4.3.3 lmtest_0.9-40
#> [61] httpuv_1.6.15 future.apply_1.11.2 goftest_1.2-3
#> [64] glue_1.7.0 nlme_3.1-164 promises_1.2.1
#> [67] grid_4.3.3 Rtsne_0.17 cluster_2.1.6
#> [70] reshape2_1.4.4 generics_0.1.3 gtable_0.3.4
#> [73] spatstat.data_3.0-4 tidyr_1.3.1 hms_1.1.3
#> [76] data.table_1.15.4 utf8_1.2.4 BiocGenerics_0.48.1
#> [79] spatstat.geom_3.2-9 RcppAnnoy_0.0.22 foreach_1.5.2
#> [82] ggrepel_0.9.5 RANN_2.6.1 pillar_1.9.0
#> [85] stringr_1.5.1 spam_2.10-0 RcppHNSW_0.6.0
#> [88] later_1.3.2 splines_4.3.3 dplyr_1.1.4
#> [91] lattice_0.22-5 survival_3.5-8 deldir_2.0-4
#> [94] tidyselect_1.2.1 miniUI_0.1.1.1 pbapply_1.7-2
#> [97] knitr_1.45 gridExtra_2.3 scattermore_1.2
#> [100] RhpcBLASctl_0.23-42 xfun_0.43 SharedObject_1.16.0
#> [103] matrixStats_1.2.0 stringi_1.8.3 lazyeval_0.2.2
#> [106] yaml_2.3.8 evaluate_0.23 codetools_0.2-19
#> [109] tibble_3.2.1 cli_3.6.2 uwot_0.1.16.9000
#> [112] xtable_1.8-4 reticulate_1.35.0 systemfonts_1.0.6
#> [115] munsell_0.5.1 jquerylib_0.1.4 Rcpp_1.0.12
#> [118] globals_0.16.3 spatstat.random_3.2-3 png_0.1-8
#> [121] parallel_4.3.3 ellipsis_0.3.2 pkgdown_2.0.7
#> [124] ggplot2_3.5.0 prettyunits_1.2.0 dotCall64_1.1-1
#> [127] profvis_0.3.8 urlchecker_1.0.1 listenv_0.9.1
#> [130] viridisLite_0.4.2 scales_1.3.0 ggridges_0.5.6
#> [133] crayon_1.5.2 leiden_0.4.3.1 purrr_1.0.2
#> [136] rlang_1.1.3 cowplot_1.1.3