Libraries
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6.9000 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
##
## count
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:tidyr':
##
## expand
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:purrr':
##
## reduce
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
##
## Attaching package: 'tidySummarizedExperiment'
## The following object is masked from 'package:IRanges':
##
## slice
## The following object is masked from 'package:S4Vectors':
##
## rename
## The following object is masked from 'package:matrixStats':
##
## count
## The following objects are masked from 'package:dplyr':
##
## bind_cols, bind_rows, count
## The following object is masked from 'package:stats':
##
## filter
## ========================================
## tidybulk version 1.9.2
## If you use TIDYBULK in published research, please cite:
##
## Mangiola et al. tidybulk: an R tidy framework for modular
## transcriptomic data analysis. Genome Biology 2021.
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(tidybulk))
## ========================================
##
## Attaching package: 'tidybulk'
## The following objects are masked from 'package:tidySummarizedExperiment':
##
## bind_cols, bind_rows, se
## The following object is masked from 'package:S4Vectors':
##
## rename
## The following objects are masked from 'package:dplyr':
##
## bind_cols, bind_rows
## The following object is masked from 'package:stats':
##
## filter
## Loading required package: DelayedArray
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:S4Vectors':
##
## expand
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Attaching package: 'DelayedArray'
## The following object is masked from 'package:purrr':
##
## simplify
## The following objects are masked from 'package:base':
##
## aperm, apply, rowsum, scale, sweep
## Loading required package: rhdf5
##
## Attaching package: 'HDF5Array'
## The following object is masked from 'package:rhdf5':
##
## h5ls
counts = readRDS("PPCG_RNA_count/counts_deconvolution.rds")
(
counts |>
pivot_sample() |>
pivot_longer(
names_to= c("method", "cell_type"),
values_to = "proportion",
cols=contains("__"),
names_sep ="__"
) %>%
ggplot(aes(x = .sample, y = proportion, fill = cell_type)) +
geom_bar(stat = "identity") +
facet_wrap(method~Center, scale="free", ncol=6) +
guides(fill=guide_legend(ncol=10)) +
theme_multipanel +
theme(legend.position = "bottom", axis.text.x = element_blank())
) |> plotly::ggplotly()
ggMarginal(
counts |>
pivot_sample() |>
ggplot(aes(purity.scores, deconvolution__epithelial, color = Center)) +
geom_point(size=0.2) +
geom_smooth(method="lm")+
geom_abline(linetype="dashed", alpha=0.3) +
theme_multipanel,
groupFill = TRUE,
type = "boxplot"
)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
# RNA
counts |>
select(-TMM) |>
identify_abundant() |>
scale_abundance()|>
reduce_dimensions(.abundance=raw.counts_scaled, method="PCA") |>
pivot_sample() |>
ggplot(aes(PC1, PC2, color=subtype)) +
geom_point() +
facet_wrap(~ Center, scale="free") +
theme_multipanel
## No group or design set. Assuming all samples belong to one group.
## tidybulk says: the sample with largest library size PPCG2179a_RNAseq was chosen as reference for scaling
## Getting the 500 most variable genes
## Fraction of variance explained by the selected principal components
## # A tibble: 2 × 2
## `Fraction of variance` PC
## <dbl> <int>
## 1 0.196 1
## 2 0.0967 2
## tidybulk says: to access the raw results do `attr(..., "internals")$PCA`
## This message is displayed once per session.
# Tissue composition
counts |>
pivot_sample() |>
pivot_longer(
names_to= c("method", "cell_type"),
values_to = "proportion",
cols=contains("__"),
names_sep ="__"
) |>
# Filter abundant cell types
nest(data = -cell_type) |>
mutate(median_proportion = map_dbl(data, ~ .x |> pull(proportion) |> median())) |>
arrange(desc(median_proportion)) |>
slice(1:15) |>
unnest(data) |>
# Transform
reduce_dimensions(
.sample, cell_type, proportion,
transform = identity, center=TRUE,
method="PCA"
) |>
ggplot(aes(PC1, PC2, color=subtype)) +
geom_point() +
facet_wrap(~ Center, scale="free") +
theme_multipanel
## Warning in eval(dots[[i]][[action]], env, env): tidybulk says: highly abundant
## transcripts were not identified (i.e. identify_abundant()) or filtered (i.e.,
## keep_abundant), therefore this operation will be performed on unfiltered
## data. In rare occasions this could be wanted. In standard whole-transcriptome
## workflows is generally unwanted.
## Getting the 15 most variable genes
## Warning in as_mapper(.f2)(.x):
## tidybulk says: In PCA correlation there is < 100 genes that have non NA values is all samples.
## The correlation calculation would not be reliable,
## we suggest to partition the dataset for sample clusters.
##
## Fraction of variance explained by the selected principal components
## # A tibble: 2 × 2
## `Fraction of variance` PC
## <dbl> <int>
## 1 0.263 1
## 2 0.127 2