## Loading required package: ggplot2
##
## microbiome R package (microbiome.github.com)
##
##
##
## Copyright (C) 2011-2020 Leo Lahti,
## Sudarshan Shetty et al. <microbiome.github.io>
##
## Attaching package: 'microbiome'
## The following object is masked from 'package:ggplot2':
##
## alpha
## The following object is masked from 'package:base':
##
## transform
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Welcome to compositions, a package for compositional data analysis.
## Find an intro with "? compositions"
##
## Attaching package: 'compositions'
## The following objects are masked from 'package:stats':
##
## cor, cov, dist, var
## The following objects are masked from 'package:base':
##
## %*%, norm, scale, scale.default
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:compositions':
##
## normalize, var
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:microbiome':
##
## coverage
## The following object is masked from 'package:phyloseq':
##
## distance
## The following object is masked from 'package:grDevices':
##
## windows
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
##
## count
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
## The following object is masked from 'package:phyloseq':
##
## sampleNames
## Warning: Missing column names filled in: 'X1' [1]
##
## -- Column specification --------------------------------------------------------
## cols(
## .default = col_double(),
## X1 = col_character()
## )
## i Use `spec()` for the full column specifications.
anyNA(ASV_data)
## [1] FALSE
## 'data.frame': 20 obs. of 2 variables:
## $ Trt1: chr "a" "a" "b" "b" ...
## $ Trt2: chr "x" "y" "x" "y" ...
ASV=otu_table(ASV_data, taxa_are_rows=FALSE)
SAM=sample_data(SAM_data)
ps <-phyloseq(ASV, SAM)
One of the major advantages of using phyloseq is that you can filter, subset, and merge samples or taxa simultaneously across all files in the phyloseq object. These rely on quantitative or logical functions and include: * phyloseq::subset_samples * phyloseq::subset_taxa * phyloseq::prune_samples * phyloseq::prune_taxa * phyloseq::merge_samples * phyloseq::merge_taxa We will use basic examples of these today
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 232 taxa and 19 samples ]
## sample_data() Sample Data: [ 19 samples by 2 sample variables ]
The number of samples has been reduced from 20 to 19.
we can further subset treatments into a new ps object
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 232 taxa and 9 samples ]
## sample_data() Sample Data: [ 9 samples by 2 sample variables ]
myASV<- otu_table(ps)
myEnv <- sample_data(ps)
sample_names(ps)
## [1] "F3D0" "F3D1" "F3D141" "F3D142" "F3D143" "F3D144" "F3D145" "F3D146"
## [9] "F3D147" "F3D148" "F3D149" "F3D150" "F3D2" "F3D3" "F3D5" "F3D6"
## [17] "F3D7" "F3D8" "F3D9"
sample_sums(ps)
## F3D0 F3D1 F3D141 F3D142 F3D143 F3D144 F3D145 F3D146 F3D147 F3D148 F3D149
## 6528 5017 4863 2521 2518 3488 5820 3879 13006 9935 10653
## F3D150 F3D2 F3D3 F3D5 F3D6 F3D7 F3D8 F3D9
## 4240 16835 5491 3716 6679 4217 4547 6015
median(sample_sums(ps))
## [1] 5017
mean(sample_sums(ps))
## [1] 6314.105
ntaxa(ps)
## [1] 232
summarize_phyloseq(ps)
## Compositional = NO2
## 1] Min. number of reads = 25182] Max. number of reads = 168353] Total number of reads = 1199684] Average number of reads = 6314.10526315795] Median number of reads = 50177] Sparsity = 0.6322595281306726] Any OTU sum to 1 or less? YES8] Number of singletons = 149] Percent of OTUs that are singletons
## (i.e. exactly one read detected across all samples)010] Number of sample variables are: 2Trt1Trt22
## [[1]]
## [1] "1] Min. number of reads = 2518"
##
## [[2]]
## [1] "2] Max. number of reads = 16835"
##
## [[3]]
## [1] "3] Total number of reads = 119968"
##
## [[4]]
## [1] "4] Average number of reads = 6314.1052631579"
##
## [[5]]
## [1] "5] Median number of reads = 5017"
##
## [[6]]
## [1] "7] Sparsity = 0.632259528130672"
##
## [[7]]
## [1] "6] Any OTU sum to 1 or less? YES"
##
## [[8]]
## [1] "8] Number of singletons = 14"
##
## [[9]]
## [1] "9] Percent of OTUs that are singletons \n (i.e. exactly one read detected across all samples)0"
##
## [[10]]
## [1] "10] Number of sample variables are: 2"
##
## [[11]]
## [1] "Trt1" "Trt2"
top_taxa(ps, n= 10)
## [1] "ASV1" "ASV2" "ASV3" "ASV4" "ASV5" "ASV6" "ASV7" "ASV8" "ASV9"
## [10] "ASV10"
###ASVs by rank abundance, number of ranks defined by n = x
ASV_for_clr <- otu_table(ps)
ASV_clr <- clr(ASV_for_clr)
ASV_clr <- as.data.frame(ASV_clr)
ps_clr <-ps
otu_table(ps_clr) <-otu_table(ASV_clr, taxa_are_rows = FALSE)
ps_clr
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 232 taxa and 19 samples ]
## sample_data() Sample Data: [ 19 samples by 2 sample variables ]
write.csv(ASV_clr, "C:/Users/jjhackma/Desktop/test_data/HW3_Hackman/HW3_Hackman/ASV_clr_transformed.csv")
colnames(SAM)
## [1] "Trt1" "Trt2"
### examine sample data to be sure to understand experimenal design
ps_ds <- phyloseq_to_deseq2(ps, ~Trt1 + Trt2)
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
ps_ds = estimateSizeFactors(ps_ds)
ps_ds = estimateDispersions(ps_ds, fitType = "parametric")
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
plotDispEsts(ps_ds)
ps_ra = transform_sample_counts(ps, function(x){sqrt(x)})
min(ps_clr@otu_table)
## [1] -2.746116
The value -2.746116 appears to be the lowest value from the clr transformation. for claritys sake i'm just going to transform all negative values to zero
data_positive <-ps_clr@otu_table
data_positive[data_positive < 0] <- 0
min(data_positive)
## [1] 0
ps_ds_mean = estimateSizeFactors(ps_ds)
ps_ds_local = estimateSizeFactors(ps_ds)
ps_ds_mean = estimateDispersions(ps_ds_mean, fitType = "mean")
## found already estimated dispersions, replacing these
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
ps_ds_local = estimateDispersions(ps_ds_local, fitType = "local")
## found already estimated dispersions, replacing these
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
plotDispEsts(ps_ds_mean)
plotDispEsts(ps_ds_local)
plotDispEsts(ps_ds)
#rerun deseq with no treatment design and compare outputs
ps_ds_no_des <- phyloseq_to_deseq2(ps, ~1)
## converting counts to integer mode
ps_ds_no_des = estimateSizeFactors(ps_ds_no_des)
ps_ds_no_des = estimateDispersions(ps_ds_no_des, fitType = "parametric")
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
plotDispEsts(ps_ds_no_des)
plotDispEsts(ps_ds)
## reading in asv data from Wagner
wagasv <- read_csv("Wk3_Wagner_sm_ASV_data.csv")
## Warning: Missing column names filled in: 'X1' [1]
##
## -- Column specification --------------------------------------------------------
## cols(
## .default = col_double()
## )
## i Use `spec()` for the full column specifications.
wagsam <- read_csv("Wk3_Wagner_sm_SAM_data.csv")
## Warning: Missing column names filled in: 'X1' [1]
##
## -- Column specification --------------------------------------------------------
## cols(
## X1 = col_character(),
## Name = col_character(),
## Plant_ID = col_character(),
## Type = col_character(),
## Experiment = col_character(),
## Cohort = col_double(),
## Harvested = col_double(),
## Age = col_double(),
## Site = col_character(),
## Treatment = col_character(),
## Line = col_character(),
## Genotype = col_character(),
## Block = col_character(),
## oldPlate = col_character(),
## newPlate = col_character(),
## Analysis = col_character()
## )
wagtax <- read.csv("Wk3_Wagner_sm_TAX_data.csv", row.names=1, header=TRUE, sep=",")
wagasv <- data.frame(wagasv, row.names=1)
wagsam <- data.frame(wagsam, row.names=1)
###creating ps object
TAXP <-tax_table(as.matrix(wagtax))
ASVP=otu_table(wagasv, taxa_are_rows = TRUE)
SAMP=sample_data(wagsam)
ps_wag <- phyloseq(ASVP, SAMP, TAXP)
ps_wag
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 10919 taxa and 292 samples ]
## sample_data() Sample Data: [ 292 samples by 15 sample variables ]
## tax_table() Taxonomy Table: [ 10919 taxa by 7 taxonomic ranks ]
I reduced these data to a more manageable size by: removing unidentified taxa (phyloseq::subset_taxa) limiting to root samples in 2011 (phyloseq::subset_samples) removing taxa with less than 20 reads (phyloseq::prune_taxa) #6 practice subsetting and filtering data
ps_wag_sub <-subset_samples(ps_wag, Type == "root")
ps_wag_sub
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 10919 taxa and 154 samples ]
## sample_data() Sample Data: [ 154 samples by 15 sample variables ]
## tax_table() Taxonomy Table: [ 10919 taxa by 7 taxonomic ranks ]
###subtracted 138 samples
ps_wag_sub_jam <-subset_samples(ps_wag_sub, Genotype == "JAM")
ps_wag_sub_jam
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 10919 taxa and 29 samples ]
## sample_data() Sample Data: [ 29 samples by 15 sample variables ]
## tax_table() Taxonomy Table: [ 10919 taxa by 7 taxonomic ranks ]
ps_wag_sub_phylum <-subset_taxa(ps_wag_sub_jam, Phylum == "Cyanobacteria")
ps_wag_sub_phylum
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 215 taxa and 29 samples ]
## sample_data() Sample Data: [ 29 samples by 15 sample variables ]
## tax_table() Taxonomy Table: [ 215 taxa by 7 taxonomic ranks ]
ps_wag_sub_phylum_abundant <- prune_taxa(taxa_sums(ps_wag_sub_phylum)>1000, ps_wag_sub_phylum)
ps_wag_sub_phylum_abundant
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 2 taxa and 29 samples ]
## sample_data() Sample Data: [ 29 samples by 15 sample variables ]
## tax_table() Taxonomy Table: [ 2 taxa by 7 taxonomic ranks ]
tax_table(ps_wag_sub_phylum_abundant)
## Taxonomy Table: [2 taxa by 7 taxonomic ranks]:
## taxonomy
## 7 "Root;k__Bacteria;p__Cyanobacteria;c__Chloroplast;o__Streptophyta;f__"
## 34255 "Root;k__Bacteria;p__Cyanobacteria;c__Chloroplast;o__Streptophyta;f__"
## Kingdom Phylum Class Order Family Confidence
## 7 "Bacteria" "Cyanobacteria" "Chloroplast" "Streptophyta" NA "1.00"
## 34255 "Bacteria" "Cyanobacteria" "Chloroplast" "Streptophyta" NA "1.00"