This guide will go through the steps of using the package on the count and density data from a MIBI TNBC data set.
library(ggplot2)
library(grid)
library(gridExtra)
library(ggpubr)
library(MASS)
library(DHARMa)
library(glmmTMB)
library(ComplexHeatmap)
library(circlize)
library(moments)
library(tidyverse)
The data is stored in counts_and_dens - a data frame
with 31 rows and 16 variables. The variables consist of:
patient_id: a unique identifier for each patient (one
sample/row per patient)Tumour_type: classification of sample, either mixed or
compartmentalized.B_cells, Cyto_T_cell,
Helper_T_cell, Macrophage,
Neutrophil, T_cell,
Tumor_cell.dens
appended to each cell type above)The function dataSummary() can give us an overview of
the data to help decide which functions are appropriate for our
data.
dataSummary(data = counts_and_dens,
test_column = "Tumour_type",
id_col = "patient_id",
draw_heatmap = TRUE)
First we will use the test functions on the original data.
Since the data consists of one sample per patient, we will only use wilcoxon rank sum test, and not a paired wilcoxon test. We will also use Negative Biomial and Poisson generalised linear models.
# wilcoxon rank sum
wilcox_res <- testWilcox(data = counts_and_dens,
test_column = "Tumour_type",
cols = count_cols)
## Wilcoxon rank sum test with continuity correction
## Formula: col ~ Tumour_type
## Paired: FALSE
## Alternative: two.sided
## Exact: FALSE
## Comparisons:
## 1: compartmentalized-mixed
wilcox_res
# Negative Binomial GLM
nbinom_res <- testSimpleModel(data = counts_and_dens,
test_column = "Tumour_type",
cols = count_cols,
family = "nbinom")
## glm.nb
## Formula: col ~ Tumour_type
## Family: Negative Binomial(0.3484)
## Link: log
## Comparisons:
## 1: compartmentalized-mixed
nbinom_res
poisson_res <- testSimpleModel(data = counts_and_dens,
test_column = "Tumour_type",
cols = count_cols,
family = "poisson")
## glm
## Formula: col ~ Tumour_type
## Family: poisson
## Link: log
## Comparisons:
## 1: compartmentalized-mixed
poisson_res
We are now going to combine all the results and compare the p-values.
We could combine the results first and then extract the p-values,
however it is better practice to first extract the p-values as for
larger data-sets the glm (or glmmTMB) objects
can be larger and it becomes time consuming to manipulate the data
frames.
tests <- c("wilcox", "nbinom", "poisson")
res_list <- list(wilcox=wilcox_res, nbinom=nbinom_res, poisson=poisson_res)
for (test in tests) {
res_list[[test]] <- res_list[[test]] |>
rename_with(\(x) gsub(".dens", "", x)) |> #remove .dens from column names
getPvalues(cols = count_cols) |> # extract p-values
pivot_longer(cols = count_cols, names_to = "cell_type", values_to = test) # pivot longer ready to combine results
}
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(count_cols)
##
## # Now:
## data %>% select(all_of(count_cols))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# combine into a single data-frame
all_pvalues <- reduce(res_list, \(acc, nxt) left_join(acc, nxt, by = c("cell_type", "group1", "group2", "group1.n", "group2.n")))
all_pvalues
Since there is only one comparison, we do not need to adjust the
p-values (if needed we could use adjustPvalues())
The function pvalueHeatmap() can be used to visualize
the results.
pvalueHeatmap(all_pvalues,
cols = c("wilcox", "nbinom", "poisson"),
row_name_col = "cell_type",
draw_heatmap = TRUE,
row_split = NULL, # since there is only one comparison we will remove the default split
row_names_side = "left",
column_title = "Original data\nun-adjusted p-values",
column_title_gp = gpar(fontsize = 20, fontface = "bold"))
shuffleData() returns a list of 2 objects:
shuffled_df is a copy of the original data frame with
additional columns appended that are randomly shuffled copies of
test_column and the corresponding copies of
id_col.shuffle_names is a character vector with the names of
the appended columns (only the the names of shuffles of
test_column).set.seed(123)
shuffle_res <- shuffleData(data = counts_and_dens,
test_column = "Tumour_type",
id_col = "patient_id",
n = 100)
shuffled_df <- shuffle_res$shuffled_df
shuffle_names <- shuffle_res$shuffle_names
shuffled_df
The function shuffleSummaryStats() returns a data-frame
with summary statistics (min, max, mean, median, sd, var, skewness,
kurtosis, cv) for each shuffle (split by group).
shuffleSummaryStats(shuffled_df, shuffle_names, c(count_cols, dens_cols))
We will use the function testMulti() to test the
shuffled data. Note: this function currently only works for the
un-paired wilcox and glm functions.
wilcox_res_shuffle <- testMulti(data = shuffled_df,
test_columns = shuffle_names,
test_func = testWilcox,
cols = dens_cols,
show_test_details = FALSE)
nbinom_res_shuffle <- testMulti(data = shuffled_df,
test_columns = shuffle_names,
test_func = testSimpleModel,
cols = count_cols,
family = "nbinom",
show_test_details = FALSE)
poisson_res_shuffle <- testMulti(data = shuffled_df,
test_columns = shuffle_names,
test_func = testSimpleModel,
cols = count_cols,
family = "poisson",
show_test_details = FALSE)
shuffle_res_list <- list(wilcox=wilcox_res_shuffle, nbinom=nbinom_res_shuffle, poisson=poisson_res_shuffle)
for (test in tests) {
shuffle_res_list[[test]] <- shuffle_res_list[[test]] |>
rename_with(\(x) gsub(".dens", "", x)) |> #remove .dens from column names
getPvalues(cols = count_cols) |> # extract p-values
pivot_longer(cols = count_cols, names_to = "cell_type", values_to = test) # pivot longer ready to combine results
}
# combine into a single data-frame
all_shuffle_pvalues <- reduce(shuffle_res_list, \(acc, nxt) left_join(acc, nxt, by = c("shuffleID", "cell_type", "group1", "group2", "group1.n", "group2.n")))
all_shuffle_pvalues
Again as there is only one comparison, we do not need to adjust the p-values.
The function countSignificant() will count the number of
p-values less than a given significance level (default=0.05). We want to
group our counts by cell type and comparison (group1 &
group2), so we will use the grouping argument
of countSignificant().
sigcounts <- countSignificant(all_shuffle_pvalues,
cols = c("wilcox", "nbinom", "poisson"),
grouping = c("cell_type", "group1", "group2"))
sigcounts
sigcountHeatmap(sigcounts,
n = 100,
row_name_col = "cell_type",
draw_heatmap = TRUE,
row_split = NULL,
column_title = "Random shuffle\nunadjusted p-values",
column_title_gp = gpar(fontsize = 20, fontface = "bold"))
sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Monterey 12.5.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Australia/Melbourne
## tzcode source: internal
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1
## [4] dplyr_1.1.4 purrr_1.0.2 readr_2.1.4
## [7] tidyr_1.3.0 tibble_3.2.1 tidyverse_2.0.0
## [10] moments_0.14.1 circlize_0.4.15 ComplexHeatmap_2.18.0
## [13] glmmTMB_1.1.8 DHARMa_0.4.6 MASS_7.3-60
## [16] ggpubr_0.6.0 gridExtra_2.3 ggplot2_3.4.4
##
## loaded via a namespace (and not attached):
## [1] sandwich_3.1-0 rlang_1.1.2 magrittr_2.0.3
## [4] clue_0.3-65 GetoptLong_1.0.5 multcomp_1.4-25
## [7] matrixStats_1.2.0 compiler_4.3.2 mgcv_1.9-1
## [10] png_0.1-8 vctrs_0.6.5 pkgconfig_2.0.3
## [13] shape_1.4.6 crayon_1.5.2 fastmap_1.1.1
## [16] magick_2.8.2 backports_1.4.1 utf8_1.2.4
## [19] rmarkdown_2.25 tzdb_0.4.0 nloptr_2.0.3
## [22] bit_4.0.5 xfun_0.41 cachem_1.0.8
## [25] jsonlite_1.8.8 highr_0.10 broom_1.0.5
## [28] parallel_4.3.2 cluster_2.1.6 R6_2.5.1
## [31] bslib_0.6.1 stringi_1.8.3 RColorBrewer_1.1-3
## [34] car_3.1-2 boot_1.3-28.1 jquerylib_0.1.4
## [37] numDeriv_2016.8-1.1 estimability_1.4.1 Rcpp_1.0.11
## [40] iterators_1.0.14 knitr_1.45 zoo_1.8-12
## [43] IRanges_2.36.0 Matrix_1.6-4 splines_4.3.2
## [46] timechange_0.2.0 tidyselect_1.2.0 rstudioapi_0.15.0
## [49] abind_1.4-5 yaml_2.3.8 doParallel_1.0.17
## [52] TMB_1.9.10 codetools_0.2-19 lattice_0.22-5
## [55] withr_2.5.2 coda_0.19-4 evaluate_0.23
## [58] survival_3.5-7 pillar_1.9.0 carData_3.0-5
## [61] foreach_1.5.2 stats4_4.3.2 generics_0.1.3
## [64] vroom_1.6.5 S4Vectors_0.40.2 hms_1.1.3
## [67] munsell_0.5.0 scales_1.3.0 minqa_1.2.6
## [70] xtable_1.8-4 glue_1.6.2 emmeans_1.9.0
## [73] tools_4.3.2 lme4_1.1-35.1 ggsignif_0.6.4
## [76] mvtnorm_1.2-4 colorspace_2.1-0 nlme_3.1-164
## [79] cli_3.6.2 fansi_1.0.6 gtable_0.3.4
## [82] rstatix_0.7.2 sass_0.4.8 digest_0.6.33
## [85] BiocGenerics_0.48.1 TH.data_1.1-2 rjson_0.2.21
## [88] htmltools_0.5.7 lifecycle_1.0.4 GlobalOptions_0.1.2
## [91] bit64_4.0.5