Question: Do rooms in a building look more similar the same kind of room in another building: Do restrooms look like restrooms, or do kitchens look like kitchens, no matter where you live? Or, do regional differences in microbial communities make kitchens and restrooms that are in the same location look more like each other?
set.seed(942)
library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.0-10
library(ape)
library(labdsv)
## Loading required package: mgcv
## Loading required package: nlme
## This is mgcv 1.8-3. For overview type 'help("mgcv-package")'.
## Loading required package: MASS
##
## Attaching package: 'labdsv'
##
## The following object is masked from 'package:stats':
##
## density
library(phyloseq)
### These just for debugging, not evaluated.
# save.image('~/Dropbox/FloresJeonLeff.RData')
# load('~/Dropbox/FloresJeonLeff.RData')
# setwd("~/Dropbox/PersonalBEMA/FloresJeonLeffStudy")
Below is a custom function (QiimeIn) written by James Meadow for reading in OTU tables.
QiimeIn <- function ( file='') {
tmp.table <- read.table ( file, sep='\t', head=TRUE, row.names=1, comment.char='%', skip=1 )
tax.numb <- dim(tmp.table)[2] ## last col is taxon names
taxa.names <- as.character(tmp.table[,tax.numb]) ## save taxon names
tmp.to.t <- tmp.table[,1:tax.numb-1] ## trim names from table
name <- t(tmp.to.t) ## transpose after trimming names
preview <- name[1:5,1:5]
samples <- row.names(name)
taxa.tmp <- cbind(colnames(name),taxa.names)
taxa <- data.frame(taxa.tmp)
names(taxa)[1] <- 'qiime.id'
row.names(taxa) <- taxa.tmp[, 1]
dimensions <- dim(name)
table.info <- list( preview, dimensions, samples, taxa, name )
names(table.info)[[1]] <- 'Preview'
names(table.info)[[2]] <- 'Dimensions'
names(table.info)[[3]] <- 'Samples'
names(table.info)[[4]] <- 'Taxa'
names(table.info)[[5]] <- 'Table'
invisible(table.info)
}
Read in OTU table using custom function.
FJL.list <- QiimeIn('data/Ftable.from_biom_w_taxonomy.txt')
Read in mapping file, after removing ‘#’ (needed for Qiime scripts) from the header.
FJL.map <- read.delim('data/mergedMapF_no_source_no_kit.txt', head=TRUE, row.names=1, sep='\t')
Read in unweighted unifrac distance matrix, generated with Qiime script. Preview table, and convert to matrix.
unifrac.dist <- read.table('data/unweighted_unifrac_dm.txt', header=T)
unifrac.dist[1:5,1:5]
## KiB54.630662 KiC69.630743 KiB70.630661 KiB40.630629
## KiB54.630662 0.0000 0.6981 0.6727 0.6479
## KiC69.630743 0.6981 0.0000 0.6550 0.7115
## KiB70.630661 0.6727 0.6550 0.0000 0.6470
## KiB40.630629 0.6479 0.7115 0.6470 0.0000
## KiB36.630609 0.6819 0.6775 0.6682 0.6931
## KiB36.630609
## KiB54.630662 0.6819
## KiC69.630743 0.6775
## KiB70.630661 0.6682
## KiB40.630629 0.6931
## KiB36.630609 0.0000
unifrac.dist.mat <- as.matrix(unifrac.dist)
Checkpoints to make sure otu table looks correct and everything is lined up.
names(FJL.list)
FJL.list$Preview
FJL.list$Dimensions
FJL.list$Samples
FJL.list$Taxa
Extract the OTU table and taxon table into two different objects.
FJL.table <- FJL.list$Table
FJL.taxa <- FJL.list$Taxa
Make sure that the OTU table and map line up. If they don’t, reorder using the next section of code.
identical(row.names(FJL.map), row.names(FJL.table)) #False
cbind(row.names(FJL.map), row.names(FJL.table)) #Not matching
all(row.names(FJL.map) %in% row.names(FJL.table)) #True
all(row.names(FJL.table) %in% row.names(FJL.map)) #True
Reorder both the map and the OTU table so that they have the same rows and columns.
FJL.map <- FJL.map[order(row.names(FJL.map)), ]
FJL.table <- FJL.table[row.names(FJL.map), ]
# an alternative way of doing it (without the two linked together):
# FJL.map <- FJL.map[order(row.names(FJL.map)), ]
# FJL.table <- FJL.table[order(row.names(FJL.table)), ]
Reorder the Unifrac distance matrix so that it has the same rows and columns as the map.
unifrac.dist.mat <- unifrac.dist.mat[row.names(FJL.map), row.names(FJL.map)]
Make groups of interest, to make calling them easier later.
restroom <- which(FJL.map$Room_Function == 'restroom')
restroom.map <- FJL.map[restroom, ]
restroom.table <- FJL.table[restroom, ]
kitchen <- which(FJL.map$Room_Function == 'kitchen')
kitchen.map <- FJL.map[kitchen, ]
kitchen.table <- FJL.table[kitchen, ]
toilet <- which(FJL.map$Place_1 == 'toilet')
fridge <- which(FJL.map$Place_1 == 'refrigerator')
How many OTUs are in my OTU table?
dim(FJL.table) # 17398 OTUs
## [1] 758 17341
How many singletons are in my OTU table?
# abundance curve
plot(rev(sort(colSums(FJL.table))))
length(which(colSums(FJL.table) == 1))/ncol(FJL.table) # 31% are singletons
## [1] 0.3137
Make a Canberra distance matrix.
FJL.can <- vegdist(FJL.table, 'canberra')
FJL.can.mat <- as.matrix(FJL.can)
Make an ordination with the canberra distance matrix.
FJL.can.nmds <- nmds(FJL.can)
## initial value 36.417895
## iter 5 value 24.526274
## iter 10 value 23.608134
## final value 23.354897
## converged
Plot ordination, colored by different variables.
levels(FJL.map$Place_1)
## [1] "cabinet" "counter" "cutting_board"
## [4] "dish_drying_pan" "disposal" "door"
## [7] "drawer" "faucet" "freezer"
## [10] "garbage can" "garbage_can" "left_sink"
## [13] "microwave" "oven" "pillowcase"
## [16] "refrigerator" "right_sink" "sink"
## [19] "soap_dispenser" "sponge" "stall"
## [22] "stove" "television" "toilet"
## [25] "wall" "water"
colvec.Place_1=c("green4","darkorange","darkolivegreen","darkgoldenrod1","cyan2","cornflowerblue","coral1","chocolate3","chartreuse","blueviolet","aquamarine","darkslateblue","cornsilk1","firebrick","indianred1","lightblue1","darkorchid3","darkgoldenrod4","darkcyan","chartreuse4","greenyellow","brown1","deepskyblue2","mediumspringgreen","lightslateblue","lightsalmon")
plot(FJL.can.nmds, pch=16, col=colvec.Place_1[FJL.map$Place_1])
legend('topleft', c("cabinet","counter","cutting_board","dish_drying_pan","disposal","door","drawer","faucet","freezer","garbage_can","microwave","oven","pillowcase","refrigerator","sink","soap_dispenser","sponge","stall","stove","television","toilet","wall","water"), cex=0.5, pt.cex=1, text.width=0.2, y.intersp=0.9, col=colvec.Place_1, pch=16)
title(main="Canberra NMDS colored by Place_1")
levels(FJL.map$Geolocation)
## [1] "Boulder_CO_USA" "RaleighDurham_NC_USA" "Seoul_Korea"
colvec.Geoloc=c("darkorchid","darkorange","cyan2")
plot(FJL.can.nmds, pch=16, col = colvec.Geoloc[FJL.map$Geolocation])
legend('topleft', c("Boulder_CO_USA","RaleighDurham_NC_USA","Seoul_Korea"),cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9, col=colvec.Geoloc, pch=16)
title(main='Canberra NMDS colored by Geolocation')
levels(FJL.map$phinchID)
## [1] "Flores_Kitchen" "Flores_restroom" "Jeon_korea" "Leff_homes"
colvec.pID=c("aquamarine","chartreuse4","firebrick","darkgoldenrod1")
plot(FJL.can.nmds, pch=16, col=colvec.pID[FJL.map$phinchID])
legend('topleft',c("Flores_Kitchen","Flores_restroom","Jeon_korea","Leff_homes"), cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9,col=colvec.pID, pch=16)
title(main='Canberra NMDS colored by study (phinchID)')
levels(FJL.map$Room_Function)
## [1] "home" "kitchen" "restroom"
colvec.RF=c("darkorchid","cornflowerblue","coral1")
plot(FJL.can.nmds,pch=16,col=colvec.RF[FJL.map$Room_Function])
legend('topleft',c("home","kitchen","restroom"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.RF,pch=16)
title(main='Canberra NMDS colored by Room_Function')
levels(FJL.map$Target_Region)
## [1] "V1_V2" "V1_V3" "V4"
colvec.TR=c("deeppink","chartreuse","goldenrod")
plot(FJL.can.nmds,pch=16,col=colvec.TR[FJL.map$Target_Region])
legend('topleft',c("V1_V2","V1_V3","V4"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.TR,pch=16)
title(main='Canberra NMDS colored by Target_Region')
levels(FJL.map$Sequencing_Technology)
## [1] "454_GS_Junior" "Illumina_HiSeq" "Illumina_HiSeq2000"
## [4] "Illumina_MiSeq"
colvec.ST=c("coral1","cornflowerblue","chartreuse","darkorchid")
plot(FJL.can.nmds,pch=16,col=colvec.ST[FJL.map$Sequencing_Technology])
legend('topleft',c("454_GS_Junior","Illumina_HiSeq","Illumina_HiSeq2000","Illumina_MiSeq"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.ST,pch=16)
title(main='Canberra NMDS colored by Sequencing_Technology')
levels(FJL.map$Building_Type2)
## [1] "condo_town" "family_home" "low_rise_apt"
## [4] "university_building"
colvec.BT2=c("darkorchid","aquamarine","deeppink","darkgoldenrod2")
plot(FJL.can.nmds,pch=16,col=colvec.BT2[FJL.map$Building_Type2])
legend('topleft',c("condo_town","family_home","low_rise_apt","university_building"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.BT2,pch=16)
title(main='Canberra NMDS colored by Building_Type2')
Could Sequencing technologies (Miseq v. HiSeq/HiSeq2000 be driving that split in the Leff homes data, resulting in the clusters that we see?)
Need to remove the one half of the square for the self-self comparisons.
Make distance matrices between locations of interest, while removing upper triangle for self-self comparisons.
restroom.restroom.can.dist <- as.dist(FJL.can.mat[restroom, restroom])
can.RR.tri <- restroom.restroom.can.dist[upper.tri(restroom.restroom.can.dist)]
restroom.restroom.can.dist <- can.RR.tri[-which(is.na(can.RR.tri))]
kitchen.kitchen.can.dist <- as.dist(FJL.can.mat[kitchen, kitchen])
can.KK.tri <- kitchen.kitchen.can.dist[upper.tri(kitchen.kitchen.can.dist)]
kitchen.kitchen.can.dist <- can.KK.tri[-which(is.na(can.KK.tri))]
toilet.toilet.can.dist <- as.dist(FJL.can.mat[toilet,toilet])
can.TT.tri <- toilet.toilet.can.dist[upper.tri(toilet.toilet.can.dist)]
toilet.toilet.can.dist <- can.TT.tri[-which(is.na(can.TT.tri))]
fridge.fridge.can.dist <- as.dist(FJL.can.mat[fridge,fridge])
can.FF.tri <- fridge.fridge.can.dist[upper.tri(fridge.fridge.can.dist)]
fridge.fridge.can.dist <- can.FF.tri[-which(is.na(can.FF.tri))]
restroom.kitchen.can.dist <- as.dist(FJL.can.mat[restroom, kitchen])
## Warning: non-square matrix
toilet.fridge.can.dist <- as.dist(FJL.can.mat[toilet, fridge])
## Warning: non-square matrix
Make vector of distances for the comparisons you want to make.
mat1 <- as.vector(restroom.restroom.can.dist)
mat2 <- as.vector(kitchen.kitchen.can.dist)
mat3 <- as.vector(toilet.toilet.can.dist)
mat4 <- as.vector(fridge.fridge.can.dist)
mat5 <- as.vector(restroom.kitchen.can.dist)
mat6 <- as.vector(toilet.fridge.can.dist)
Run t-tests on, and make boxplot, of distance comparisons.
t.test(mat1,mat5)
##
## Welch Two Sample t-test
##
## data: mat1 and mat5
## t = -34.93, df = 3089, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.02736 -0.02445
## sample estimates:
## mean of x mean of y
## 0.9622 0.9881
boxplot(mat1,mat5, main="Restroom/Restroom v. Restroom/Kitchen",names=c("Restroom/Restroom","Restroom/Kitchen"), ylab="Canberra Distance")
t.test(mat2,mat5)
##
## Welch Two Sample t-test
##
## data: mat2 and mat5
## t = -164.5, df = 29014, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.04516 -0.04410
## sample estimates:
## mean of x mean of y
## 0.9435 0.9881
boxplot(mat2,mat5, main="Kitchen/Kitchen v. Restroom/Kitchen",names=c("Kitchen/Kitchen","Restroom/Kitchen"), ylab="Canberra Distance")
The unweighted unifrac distance matrix was already imported, and re-ordered to match the map file.
Make an ordination with the unweighted unifrac distance matrix.
FJL.uni.nmds <- nmds(unifrac.dist.mat)
## initial value 32.583909
## iter 5 value 17.629171
## iter 10 value 16.602288
## iter 10 value 16.588331
## iter 10 value 16.587316
## final value 16.587316
## converged
Plot ordination, colored by different variables.
plot(FJL.uni.nmds, pch=16, col=colvec.Place_1[FJL.map$Place_1])
legend('topleft', c("cabinet","counter","cutting_board","dish_drying_pan","disposal","door","drawer","faucet","freezer","garbage_can","microwave","oven","pillowcase","refrigerator","sink","soap_dispenser","sponge","stall","stove","television","toilet","wall","water"), cex=0.5, pt.cex=1, text.width=0.2, y.intersp=0.9, col=colvec.Place_1, pch=16)
title(main="Unweighted Unifrac NMDS colored by Place_1")
plot(FJL.uni.nmds, pch=16, col=colvec.Geoloc[FJL.map$Geolocation])
legend('topleft', c("Boulder_CO_USA","RaleighDurham_NC_USA","Seoul_Korea"),cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9, col=colvec.Geoloc, pch=16)
title(main='Unweighted Unifrac NMDS colored by Geolocation')
plot(FJL.uni.nmds, pch=16, col=colvec.pID[FJL.map$phinchID])
legend('topleft',c("Flores_Kitchen","Flores_restroom","Jeon_korea","Leff_homes"), cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9,col=colvec.pID, pch=16)
title(main='Unweighted Unifrac NMDS colored by study (phinchID)')
plot(FJL.uni.nmds,pch=16,col=colvec.RF[FJL.map$Room_Function])
legend('topleft',c("home","kitchen","restroom"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.RF,pch=16)
title(main='Unweighted Unifrac NMDS colored by Room_Function')
plot(FJL.uni.nmds,pch=16,col=colvec.TR[FJL.map$Target_Region])
legend('topleft',c("V1_V2","V1_V3","V4"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.TR,pch=16)
title(main='Unweighted Unifrac NMDS colored by Target_Region')
plot(FJL.uni.nmds,pch=16,col=colvec.ST[FJL.map$Sequencing_Technology])
legend('topleft',c("454_GS_Junior","Illumina_HiSeq","Illumina_HiSeq2000","Illumina_MiSeq"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.ST,pch=16)
title(main='Unweighted Unifrac NMDS colored by Sequencing_Technology')
plot(FJL.uni.nmds,pch=16,col=colvec.BT2[FJL.map$Building_Type2])
legend('topleft',c("condo_town","family_home","low_rise_apt","university_building"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.BT2,pch=16)
title(main='Unweighted Unifrac colored by Building_Type2')
Make distance matrices between locations of interest, while removing upper triangle for self-self comparisons.
restroom.restroom.uni.dist <- as.dist(unifrac.dist.mat[restroom, restroom])
uni.RR.tri <- restroom.restroom.uni.dist[upper.tri(restroom.restroom.uni.dist)]
restroom.restroom.uni.dist <- uni.RR.tri[-which(is.na(uni.RR.tri))]
kitchen.kitchen.uni.dist <- as.dist(unifrac.dist.mat[kitchen, kitchen])
uni.KK.tri <- kitchen.kitchen.uni.dist[upper.tri(kitchen.kitchen.uni.dist)]
kitchen.kitchen.uni.dist <- uni.KK.tri[-which(is.na(uni.KK.tri))]
toilet.toilet.uni.dist <- as.dist(unifrac.dist.mat[toilet,toilet])
uni.TT.tri <- toilet.toilet.uni.dist[upper.tri(toilet.toilet.uni.dist)]
toilet.toilet.uni.dist <- uni.TT.tri[-which(is.na(uni.TT.tri))]
fridge.fridge.uni.dist <- as.dist(unifrac.dist.mat[fridge,fridge])
uni.FF.tri <- fridge.fridge.uni.dist[upper.tri(fridge.fridge.uni.dist)]
fridge.fridge.uni.dist <- uni.FF.tri[-which(is.na(uni.FF.tri))]
restroom.kitchen.uni.dist <- as.dist(unifrac.dist.mat[restroom, kitchen])
## Warning: non-square matrix
toilet.fridge.uni.dist <- as.dist(unifrac.dist.mat[toilet, fridge])
## Warning: non-square matrix
Make vector of distances for the comparisons you want to make.
mat7 <- as.vector(restroom.restroom.uni.dist)
mat8 <- as.vector(kitchen.kitchen.uni.dist)
mat9 <- as.vector(toilet.toilet.uni.dist)
mat10 <- as.vector(fridge.fridge.uni.dist)
mat11 <- as.vector(restroom.kitchen.uni.dist)
mat12 <- as.vector(toilet.fridge.uni.dist)
Run t-tests on, and make boxplot, of distance comparisons.
t.test(mat7,mat11)
##
## Welch Two Sample t-test
##
## data: mat7 and mat11
## t = -27.02, df = 3556, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.05020 -0.04341
## sample estimates:
## mean of x mean of y
## 0.7357 0.7825
boxplot(mat7,mat11, main= "Restroom/Restroom v. Restroom/Kitchen",names=c("Restroom/Restroom","Restroom/Kitchen"),ylab='Unweighted Unifrac Distance')
t.test(mat8,mat11)
##
## Welch Two Sample t-test
##
## data: mat8 and mat11
## t = -91.55, df = 29935, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.06981 -0.06688
## sample estimates:
## mean of x mean of y
## 0.7142 0.7825
boxplot(mat8,mat11, main="Kitchen/Kitchen v. Restroom/Kitchen",names=c('Kitchen/Kitchen','Restroom/Kitchen'),ylab='Unweighted Unifrac Distance')
Create a Jaccard distance matrix.
FJL.jac <- vegdist(FJL.table, 'jaccard')
FJL.jac.mat <- as.matrix(FJL.jac)
Create an ordination using jaccard.
FJL.jac.nmds <- nmds(FJL.jac)
## initial value 42.934861
## iter 5 value 28.977639
## iter 10 value 25.290675
## final value 25.022110
## converged
Plot the ordination, colored by different variables.
plot(FJL.jac.nmds, pch=16, col=colvec.Place_1[FJL.map$Place_1])
legend('topleft', c("cabinet","counter","cutting_board","dish_drying_pan","disposal","door","drawer","faucet","freezer","garbage_can","microwave","oven","pillowcase","refrigerator","sink","soap_dispenser","sponge","stall","stove","television","toilet","wall","water"), cex=0.5, pt.cex=1, text.width=0.2, y.intersp=0.9, col=colvec.Place_1, pch=16)
title(main="Jaccard NMDS colored by Place_1")
plot(FJL.jac.nmds, pch=16, col=colvec.Geoloc[FJL.map$Geolocation])
legend('topleft', c("Boulder_CO_USA","RaleighDurham_NC_USA","Seoul_Korea"),cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9, col=colvec.Geoloc, pch=16)
title(main='Jaccard NMDS colored by Geolocation')
plot(FJL.jac.nmds, pch=16, col=colvec.pID[FJL.map$phinchID])
legend('topleft',c("Flores_Kitchen","Flores_restroom","Jeon_korea","Leff_homes"), cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9,col=colvec.pID, pch=16)
title(main='Jaccard NMDS colored by study (phinchID)')
plot(FJL.jac.nmds,pch=16,col=colvec.RF[FJL.map$Room_Function])
legend('topleft',c("home","kitchen","restroom"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.RF,pch=16)
title(main='Jaccard NMDS colored by Room_Function')
plot(FJL.jac.nmds,pch=16,col=colvec.TR[FJL.map$Target_Region])
legend('topleft',c("V1_V2","V1_V3","V4"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.TR,pch=16)
title(main='Jaccard NMDS colored by Target_Region')
plot(FJL.jac.nmds,pch=16,col=colvec.ST[FJL.map$Sequencing_Technology])
legend('topleft',c("454_GS_Junior","Illumina_HiSeq","Illumina_HiSeq2000","Illumina_MiSeq"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.ST,pch=16)
title(main='Jaccard NMDS colored by Sequencing_Technology')
plot(FJL.jac.nmds,pch=16,col=colvec.BT2[FJL.map$Building_Type2])
legend('topleft',c("condo_town","family_home","low_rise_apt","university_building"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.BT2,pch=16)
title(main='Jaccard NMDS colored by Building_Type2')
Make distance matrices between locations of interest, while removing upper triangle for self-self comparisons.
restroom.restroom.jac.dist <- as.dist(FJL.jac.mat[restroom, restroom])
jac.RR.tri <- restroom.restroom.jac.dist[upper.tri(restroom.restroom.jac.dist)]
restroom.restroom.jac.dist <- jac.RR.tri[-which(is.na(jac.RR.tri))]
kitchen.kitchen.jac.dist <- as.dist(FJL.jac.mat[kitchen, kitchen])
jac.KK.tri <- kitchen.kitchen.jac.dist[upper.tri(kitchen.kitchen.jac.dist)]
kitchen.kitchen.jac.dist <- jac.KK.tri[-which(is.na(jac.KK.tri))]
toilet.toilet.jac.dist <- as.dist(FJL.jac.mat[toilet,toilet])
jac.TT.tri <- toilet.toilet.jac.dist[upper.tri(toilet.toilet.jac.dist)]
toilet.toilet.jac.dist <- jac.TT.tri[-which(is.na(jac.TT.tri))]
fridge.fridge.jac.dist <- as.dist(FJL.jac.mat[fridge,fridge])
jac.FF.tri <- fridge.fridge.jac.dist[upper.tri(fridge.fridge.jac.dist)]
fridge.fridge.jac.dist <- jac.FF.tri[-which(is.na(jac.FF.tri))]
restroom.kitchen.jac.dist <- as.dist(FJL.jac.mat[restroom, kitchen])
## Warning: non-square matrix
toilet.fridge.jac.dist <- as.dist(FJL.jac.mat[toilet, fridge])
## Warning: non-square matrix
Make vector of distances for the comparisons you want to make.
mat13 <- as.vector(restroom.restroom.jac.dist)
mat14 <- as.vector(kitchen.kitchen.jac.dist)
mat15 <- as.vector(toilet.toilet.jac.dist)
mat16 <- as.vector(fridge.fridge.jac.dist)
mat17 <- as.vector(restroom.kitchen.jac.dist)
mat18 <- as.vector(toilet.fridge.jac.dist)
Run t-tests on, and make boxplot, of distance comparisons.
t.test(mat13,mat17)
##
## Welch Two Sample t-test
##
## data: mat13 and mat17
## t = -33.98, df = 2998, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.04784 -0.04262
## sample estimates:
## mean of x mean of y
## 0.9427 0.9880
boxplot(mat13,mat17, main="Restroom/Restroom v. Restroom/Kitchen",names=c('Restroom/Restroom','Restroom/Kitchen'),ylab='Jaccard Distance')
t.test(mat14,mat17)
##
## Welch Two Sample t-test
##
## data: mat14 and mat17
## t = -139.8, df = 23386, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.08032 -0.07810
## sample estimates:
## mean of x mean of y
## 0.9088 0.9880
boxplot(mat14,mat17, main="Kitchen/Kitchen v. Restroom/Kitchen",names=c('Kitchen/Kitchen','Restroom/Kitchen'),ylab='Jaccard Distance')
sortKitTax <- sort(apply(kitchen.table, 2, sum)/sum(kitchen.table), decreasing=TRUE)[1:25]
kitAbun25 <- FJL.taxa[names(sortKitTax), ]
kitAbun25$abundance <- sortKitTax
# kitAbun25
kitAbun25$taxa.names <- gsub('\\;\\ s\\_\\_$', '', kitAbun25$taxa.names)
kitAbun25$taxa.names <- gsub('\\;\\ g\\_\\_$', '', kitAbun25$taxa.names)
kitAbun25$taxa.names <- gsub('\\;\\ f\\_\\_$', '', kitAbun25$taxa.names)
kitAbun25$taxa.names <- gsub('\\;\\ s\\_\\_', ' ', kitAbun25$taxa.names)
# do this 5 times~ what is this doing???
# kitAbun25$taxa.names <- gsub('^[[:alnum:]]{1,}\\;\\ \\w{1}\\_\\_', '', kitAbun25$taxa.names)
# kitAbun25$taxa.names <- gsub('^\\[[[:alnum:]]{1,}\\]\\;\\ \\w{1}\\_\\_', '', kitAbun25$taxa.names)
kitAbun25.mat <- as.matrix(kitAbun25)
par(las=1, mar=c(5, 25, 1, 1))
barplot(rev(kitAbun25$abundance), horiz=TRUE, names.arg=rev(kitAbun25$taxa.names), cex.names=0.5, main='All Kitchens: Top 25 Taxa')
sortRestTax <- sort(apply(restroom.table, 2, sum)/sum(restroom.table), decreasing=TRUE)[1:25]
RestAbun25 <- FJL.taxa[names(sortRestTax), ]
RestAbun25$abundance <- sortRestTax
# RestAbun25
RestAbun25$taxa.names <- gsub('\\;\\ s\\_\\_$', '', RestAbun25$taxa.names)
RestAbun25$taxa.names <- gsub('\\;\\ g\\_\\_$', '', RestAbun25$taxa.names)
RestAbun25$taxa.names <- gsub('\\;\\ f\\_\\_$', '', RestAbun25$taxa.names)
RestAbun25$taxa.names <- gsub('\\;\\ s\\_\\_', ' ', RestAbun25$taxa.names)
RestAbun25.mat <- as.matrix(RestAbun25)
par(las=1, mar=c(5, 25, 1, 1))
barplot(rev(RestAbun25$abundance), horiz=TRUE, names.arg=rev(RestAbun25$taxa.names), cex.names=0.5, main='All Restrooms: Top 25 Taxa')
kitchen.table[1:5,1:5]
## 4479944 741867 11544 623634 1126569
## H13Cb.729458 0 0 0 0 0
## H13Fr.729158 0 0 0 0 0
## H13Kc.729422 0 0 0 0 0
## H14Cb.729167 0 0 0 0 0
## H14Fr.729334 0 0 0 0 0
restroom.table[1:5,1:5]
## 4479944 741867 11544 623634 1126569
## B1.489537 0 0 0 0 0
## B2.489526 0 0 0 0 0
## B3.489528 0 0 0 0 0
## B5.489455 0 0 0 0 0
## B6.489449 0 0 0 0 0
leff.kitchen <- which(FJL.map$phinchID == 'Leff_homes' & FJL.map$Room_Function == 'kitchen')
leff.kitchen.table <- FJL.table[leff.kitchen, ]
flores.kitchen <- which(FJL.map$phinchID == 'Flores_Kitchen' & FJL.map$Room_Function == 'kitchen')
flores.kitchen.table <- FJL.table[flores.kitchen, ]
jeon.kitchen <- which(FJL.map$phinchID == 'Jeon_korea' & FJL.map$Room_Function == 'kitchen')
jeon.kitchen.table <- FJL.table[jeon.kitchen, ]
sortKitTax.leff <- sort(apply(leff.kitchen.table, 2, sum)/sum(leff.kitchen.table), decreasing=TRUE)[1:25]
sortKitTax.flores <- sort(apply(flores.kitchen.table, 2, sum)/sum(flores.kitchen.table), decreasing=TRUE)[1:25]
sortKitTax.jeon <- sort(apply(jeon.kitchen.table, 2, sum)/sum(jeon.kitchen.table), decreasing=TRUE)[1:25]
kitAbun25.leff <- FJL.taxa[names(sortKitTax.leff), ]
kitAbun25.leff$abundance <- sortKitTax.leff
# kitAbun25.leff
kitAbun25.flores <- FJL.taxa[names(sortKitTax.flores), ]
kitAbun25.flores$abundance <- sortKitTax.flores
# kitAbun25.flores
kitAbun25.jeon <- FJL.taxa[names(sortKitTax.jeon), ]
kitAbun25.jeon$abundance <- sortKitTax.jeon
# kitAbun25.jeon
kitAbun25.leff$taxa.names <- gsub('\\;\\ s\\_\\_$', '', kitAbun25.leff$taxa.names)
kitAbun25.leff$taxa.names <- gsub('\\;\\ g\\_\\_$', '', kitAbun25.leff$taxa.names)
kitAbun25.leff$taxa.names <- gsub('\\;\\ f\\_\\_$', '', kitAbun25.leff$taxa.names)
kitAbun25.leff$taxa.names <- gsub('\\;\\ s\\_\\_', ' ', kitAbun25.leff$taxa.names)
kitAbun25.flores$taxa.names <- gsub('\\;\\ s\\_\\_$', '', kitAbun25.flores$taxa.names)
kitAbun25.flores$taxa.names <- gsub('\\;\\ g\\_\\_$', '', kitAbun25.flores$taxa.names)
kitAbun25.flores$taxa.names <- gsub('\\;\\ f\\_\\_$', '', kitAbun25.flores$taxa.names)
kitAbun25.flores$taxa.names <- gsub('\\;\\ s\\_\\_', ' ', kitAbun25.flores$taxa.names)
kitAbun25.jeon$taxa.names <- gsub('\\;\\ s\\_\\_$', '', kitAbun25.jeon$taxa.names)
kitAbun25.jeon$taxa.names <- gsub('\\;\\ g\\_\\_$', '', kitAbun25.jeon$taxa.names)
kitAbun25.jeon$taxa.names <- gsub('\\;\\ f\\_\\_$', '', kitAbun25.jeon$taxa.names)
kitAbun25.jeon$taxa.names <- gsub('\\;\\ s\\_\\_', ' ', kitAbun25.jeon$taxa.names)
kitAbun25.leff.mat <- as.matrix(kitAbun25.leff)
par(las=1, mar=c(5, 25, 1, 1))
barplot(rev(kitAbun25.leff$abundance), horiz=TRUE, names.arg=rev(kitAbun25.leff$taxa.names), cex.names=0.5, main='Leff Kitchens: Top 25 Taxa')
kitAbun25.flores.mat <- as.matrix(kitAbun25.flores)
par(las=1, mar=c(5, 25, 1, 1))
barplot(rev(kitAbun25.flores$abundance), horiz=TRUE, names.arg=rev(kitAbun25.flores$taxa.names), cex.names=0.5, main='Flores Kitchens: Top 25 Taxa')
kitAbun25.jeon.mat <- as.matrix(kitAbun25.jeon)
par(las=1, mar=c(5, 25, 1, 1))
barplot(rev(kitAbun25.jeon$abundance), horiz=TRUE, names.arg=rev(kitAbun25.jeon$taxa.names), cex.names=0.5, main='Jeon Kitchens: Top 25 Taxa')
kitchen.table[1:5,1:5]
## 4479944 741867 11544 623634 1126569
## H13Cb.729458 0 0 0 0 0
## H13Fr.729158 0 0 0 0 0
## H13Kc.729422 0 0 0 0 0
## H14Cb.729167 0 0 0 0 0
## H14Fr.729334 0 0 0 0 0
restroom.table[1:5,1:5]
## 4479944 741867 11544 623634 1126569
## B1.489537 0 0 0 0 0
## B2.489526 0 0 0 0 0
## B3.489528 0 0 0 0 0
## B5.489455 0 0 0 0 0
## B6.489449 0 0 0 0 0
leff.restroom <- which(FJL.map$phinchID == 'Leff_homes' & FJL.map$Room_Function == 'restroom')
leff.restroom.table <- FJL.table[leff.restroom, ]
flores.restroom <- which(FJL.map$phinchID == 'Flores_restroom' & FJL.map$Room_Function == 'restroom')
flores.restroom.table <- FJL.table[flores.restroom, ]
jeon.restroom <- which(FJL.map$phinchID == 'Jeon_korea' & FJL.map$Room_Function == 'restroom')
jeon.restroom.table <- FJL.table[jeon.kitchen, ]
sortRestTax.leff <- sort(apply(leff.restroom.table, 2, sum)/sum(leff.restroom.table), decreasing=TRUE)[1:25]
sortRestTax.flores <- sort(apply(flores.restroom.table, 2, sum)/sum(flores.restroom.table), decreasing=TRUE)[1:25]
sortRestTax.jeon <- sort(apply(jeon.restroom.table, 2, sum)/sum(jeon.restroom.table), decreasing=TRUE)[1:25]
RestAbun25.leff <- FJL.taxa[names(sortRestTax.leff), ]
RestAbun25.leff$abundance <- sortRestTax.leff
# RestAbun25.leff
RestAbun25.flores <- FJL.taxa[names(sortRestTax.flores), ]
RestAbun25.flores$abundance <- sortRestTax.flores
# RestAbun25.flores
RestAbun25.jeon <- FJL.taxa[names(sortRestTax.jeon), ]
RestAbun25.jeon$abundance <- sortRestTax.jeon
# RestAbun25.jeon
RestAbun25.leff$taxa.names <- gsub('\\;\\ s\\_\\_$', '', RestAbun25.leff$taxa.names)
RestAbun25.leff$taxa.names <- gsub('\\;\\ g\\_\\_$', '', RestAbun25.leff$taxa.names)
RestAbun25.leff$taxa.names <- gsub('\\;\\ f\\_\\_$', '', RestAbun25.leff$taxa.names)
RestAbun25.leff$taxa.names <- gsub('\\;\\ s\\_\\_', ' ', RestAbun25.leff$taxa.names)
RestAbun25.flores$taxa.names <- gsub('\\;\\ s\\_\\_$', '', RestAbun25.flores$taxa.names)
RestAbun25.flores$taxa.names <- gsub('\\;\\ g\\_\\_$', '', RestAbun25.flores$taxa.names)
RestAbun25.flores$taxa.names <- gsub('\\;\\ f\\_\\_$', '', RestAbun25.flores$taxa.names)
RestAbun25.flores$taxa.names <- gsub('\\;\\ s\\_\\_', ' ', RestAbun25.flores$taxa.names)
RestAbun25.jeon$taxa.names <- gsub('\\;\\ s\\_\\_$', '', RestAbun25.jeon$taxa.names)
RestAbun25.jeon$taxa.names <- gsub('\\;\\ g\\_\\_$', '', RestAbun25.jeon$taxa.names)
RestAbun25.jeon$taxa.names <- gsub('\\;\\ f\\_\\_$', '', RestAbun25.jeon$taxa.names)
RestAbun25.jeon$taxa.names <- gsub('\\;\\ s\\_\\_', ' ', RestAbun25.jeon$taxa.names)
RestAbun25.leff.mat <- as.matrix(RestAbun25.leff)
par(las=1, mar=c(5, 25, 1, 1))
barplot(rev(RestAbun25.leff$abundance), horiz=TRUE, names.arg=rev(RestAbun25.leff$taxa.names), cex.names=0.5, main='Leff Restrooms: Top 25 Taxa')
RestAbun25.flores.mat <- as.matrix(RestAbun25.flores)
par(las=1, mar=c(5, 25, 1, 1))
barplot(rev(RestAbun25.flores$abundance), horiz=TRUE, names.arg=rev(RestAbun25.flores$taxa.names), cex.names=0.5, main='Flores Restrooms: Top 25 Taxa')
RestAbun25.jeon.mat <- as.matrix(RestAbun25.jeon)
par(las=1, mar=c(5, 25, 1, 1))
barplot(rev(RestAbun25.jeon$abundance), horiz=TRUE, names.arg=rev(RestAbun25.jeon$taxa.names), cex.names=0.5, main='Jeon Restrooms: Top 25 Taxa')
Read in OTU table using custom function.
FJL.list.noC <- QiimeIn('data/Ftable_noC.from_biom_w_taxonomy.txt')
Same mapping file as before.
Read in new unweighted unifrac distance matrix, generated with Qiime script. Preview table, and convert to matrix.
unifrac.dist.noC <- read.table('data/unweighted_unifrac_dm_noC.txt', header=T)
unifrac.dist.noC[1:5,1:5]
## KiB54.630662 KiC69.630743 KiB70.630661 KiB40.630629
## KiB54.630662 0.0000 0.7014 0.6787 0.6570
## KiC69.630743 0.7014 0.0000 0.6618 0.7145
## KiB70.630661 0.6787 0.6618 0.0000 0.6510
## KiB40.630629 0.6570 0.7145 0.6510 0.0000
## KiB36.630609 0.6941 0.6806 0.6743 0.6984
## KiB36.630609
## KiB54.630662 0.6941
## KiC69.630743 0.6806
## KiB70.630661 0.6743
## KiB40.630629 0.6984
## KiB36.630609 0.0000
unifrac.dist.noC.mat <- as.matrix(unifrac.dist.noC)
Checkpoints to make sure otu table looks correct and everything is lined up.
names(FJL.list.noC)
FJL.list.noC$Preview
FJL.list.noC$Dimensions
FJL.list.noC$Samples
FJL.list.noC$Taxa
Extract the OTU table and taxon table into two different objects.
FJL.table.noC <- FJL.list.noC$Table
FJL.taxa.noC <- FJL.list.noC$Taxa
Make sure that the OTU table and map line up. If they don’t, reorder using the next section of code.
identical(row.names(FJL.map), row.names(FJL.table.noC)) #False
cbind(row.names(FJL.map), row.names(FJL.table.noC)) #Not matching
all(row.names(FJL.map) %in% row.names(FJL.table.noC)) #True
all(row.names(FJL.table.noC) %in% row.names(FJL.map)) #True
Reorder both the map and the OTU table so that they have the same rows and columns.
FJL.map <- FJL.map[order(row.names(FJL.map)), ]
FJL.table.noC <- FJL.table.noC[row.names(FJL.map), ]
# an alternative way of doing it (without the two linked together):
# FJL.map <- FJL.map[order(row.names(FJL.map)), ]
# FJL.table <- FJL.table[order(row.names(FJL.table)), ]
Reorder the Unifrac distance matrix so that it has the same rows and columns as the map.
unifrac.dist.noC.mat <- unifrac.dist.noC.mat[row.names(FJL.map), row.names(FJL.map)]
Make groups of interest, to make calling them easier later. (Same as before)
How many OTUs are in my OTU table?
dim(FJL.table.noC) # 17219 OTUs
## [1] 758 17219
How many singletons are in my OTU table?
# abundance curve
plot(rev(sort(colSums(FJL.table.noC))))
length(which(colSums(FJL.table.noC) == 1))/ncol(FJL.table.noC) # 31% are singletons
## [1] 0.3141
Make a Canberra distance matrix.
FJL.can.noC <- vegdist(FJL.table.noC, 'canberra')
FJL.can.noC.mat <- as.matrix(FJL.can.noC)
Make an ordination with the canberra distance matrix.
FJL.can.nmds.noC <- nmds(FJL.can.noC)
## initial value 36.132489
## iter 5 value 24.283354
## iter 10 value 23.398636
## final value 23.222887
## converged
Plot ordination, colored by different variables.
levels(FJL.map$Place_1)
## [1] "cabinet" "counter" "cutting_board"
## [4] "dish_drying_pan" "disposal" "door"
## [7] "drawer" "faucet" "freezer"
## [10] "garbage can" "garbage_can" "left_sink"
## [13] "microwave" "oven" "pillowcase"
## [16] "refrigerator" "right_sink" "sink"
## [19] "soap_dispenser" "sponge" "stall"
## [22] "stove" "television" "toilet"
## [25] "wall" "water"
colvec.Place_1=c("green4","darkorange","darkolivegreen","darkgoldenrod1","cyan2","cornflowerblue","coral1","chocolate3","chartreuse","blueviolet","aquamarine","darkslateblue","cornsilk1","firebrick","indianred1","lightblue1","darkorchid3","darkgoldenrod4","darkcyan","chartreuse4","greenyellow","brown1","deepskyblue2","mediumspringgreen","lightslateblue","lightsalmon")
plot(FJL.can.nmds.noC, pch=16, col=colvec.Place_1[FJL.map$Place_1])
legend('topleft', c("cabinet","counter","cutting_board","dish_drying_pan","disposal","door","drawer","faucet","freezer","garbage_can","microwave","oven","pillowcase","refrigerator","sink","soap_dispenser","sponge","stall","stove","television","toilet","wall","water"), cex=0.5, pt.cex=1, text.width=0.2, y.intersp=0.9, col=colvec.Place_1, pch=16)
title(main="Canberra NMDS colored by Place_1 (no chloroplasts)")
levels(FJL.map$Geolocation)
## [1] "Boulder_CO_USA" "RaleighDurham_NC_USA" "Seoul_Korea"
colvec.Geoloc=c("darkorchid","darkorange","cyan2")
plot(FJL.can.nmds.noC, pch=16, col = colvec.Geoloc[FJL.map$Geolocation])
legend('topleft', c("Boulder_CO_USA","RaleighDurham_NC_USA","Seoul_Korea"),cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9, col=colvec.Geoloc, pch=16)
title(main='Canberra NMDS colored by Geolocation (no chloroplasts)')
levels(FJL.map$phinchID)
## [1] "Flores_Kitchen" "Flores_restroom" "Jeon_korea" "Leff_homes"
colvec.pID=c("aquamarine","chartreuse4","firebrick","darkgoldenrod1")
plot(FJL.can.nmds.noC, pch=16, col=colvec.pID[FJL.map$phinchID])
legend('topleft',c("Flores_Kitchen","Flores_restroom","Jeon_korea","Leff_homes"), cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9,col=colvec.pID, pch=16)
title(main='Canberra NMDS colored by study (phinchID) (no chloroplasts)')
levels(FJL.map$Room_Function)
## [1] "home" "kitchen" "restroom"
colvec.RF=c("darkorchid","cornflowerblue","coral1")
plot(FJL.can.nmds.noC,pch=16,col=colvec.RF[FJL.map$Room_Function])
legend('topleft',c("home","kitchen","restroom"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.RF,pch=16)
title(main='Canberra NMDS colored by Room_Function (no chloroplasts)')
levels(FJL.map$Target_Region)
## [1] "V1_V2" "V1_V3" "V4"
colvec.TR=c("deeppink","chartreuse","goldenrod")
plot(FJL.can.nmds.noC,pch=16,col=colvec.TR[FJL.map$Target_Region])
legend('topleft',c("V1_V2","V1_V3","V4"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.TR,pch=16)
title(main='Canberra NMDS colored by Target_Region (no chloroplasts)')
levels(FJL.map$Sequencing_Technology)
## [1] "454_GS_Junior" "Illumina_HiSeq" "Illumina_HiSeq2000"
## [4] "Illumina_MiSeq"
colvec.ST=c("coral1","cornflowerblue","chartreuse","darkorchid")
plot(FJL.can.nmds.noC,pch=16,col=colvec.ST[FJL.map$Sequencing_Technology])
legend('topleft',c("454_GS_Junior","Illumina_HiSeq","Illumina_HiSeq2000","Illumina_MiSeq"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.ST,pch=16)
title(main='Canberra NMDS colored by Sequencing_Technology (no chloroplasts)')
levels(FJL.map$Building_Type2)
## [1] "condo_town" "family_home" "low_rise_apt"
## [4] "university_building"
colvec.BT2=c("darkorchid","aquamarine","deeppink","darkgoldenrod2")
plot(FJL.can.nmds.noC,pch=16,col=colvec.BT2[FJL.map$Building_Type2])
legend('topleft',c("condo_town","family_home","low_rise_apt","university_building"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.BT2,pch=16)
title(main='Canberra NMDS colored by Building_Type2 (no chloroplasts)')
Could Sequencing technologies (Miseq v. HiSeq/HiSeq2000 be driving that split in the Leff homes data, resulting in the clusters that we see?)
Make distance matrices between locations of interest, while removing upper triangle for self-self comparisons.
restroom.restroom.can.dist.noC <- as.dist(FJL.can.noC.mat[restroom, restroom])
can.RR.tri.noC <- restroom.restroom.can.dist.noC[upper.tri(restroom.restroom.can.dist.noC)]
restroom.restroom.can.dist.noC <- can.RR.tri.noC[-which(is.na(can.RR.tri.noC))]
kitchen.kitchen.can.dist.noC <- as.dist(FJL.can.noC.mat[kitchen, kitchen])
can.KK.tri.noC <- kitchen.kitchen.can.dist.noC[upper.tri(kitchen.kitchen.can.dist.noC)]
kitchen.kitchen.can.dist.noC <- can.KK.tri.noC[-which(is.na(can.KK.tri.noC))]
toilet.toilet.can.dist.noC <- as.dist(FJL.can.noC.mat[toilet,toilet])
can.TT.tri.noC <- toilet.toilet.can.dist.noC[upper.tri(toilet.toilet.can.dist.noC)]
toilet.toilet.can.dist.noC <- can.TT.tri.noC[-which(is.na(can.TT.tri.noC))]
fridge.fridge.can.dist.noC <- as.dist(FJL.can.noC.mat[fridge,fridge])
can.FF.tri.noC <- fridge.fridge.can.dist.noC[upper.tri(fridge.fridge.can.dist.noC)]
fridge.fridge.can.dist.noC <- can.FF.tri.noC[-which(is.na(can.FF.tri.noC))]
restroom.kitchen.can.dist.noC <- as.dist(FJL.can.noC.mat[restroom, kitchen])
## Warning: non-square matrix
toilet.fridge.can.dist.noC <- as.dist(FJL.can.noC.mat[toilet, fridge])
## Warning: non-square matrix
Make vector of distances for the comparisons you want to make.
mat1.noC <- as.vector(restroom.restroom.can.dist.noC)
mat2.noC <- as.vector(kitchen.kitchen.can.dist.noC)
mat3.noC <- as.vector(toilet.toilet.can.dist.noC)
mat4.noC <- as.vector(fridge.fridge.can.dist.noC)
mat5.noC <- as.vector(restroom.kitchen.can.dist.noC)
mat6.noC <- as.vector(toilet.fridge.can.dist.noC)
Run t-tests on, and make boxplot, of distance comparisons.
t.test(mat1.noC,mat5.noC)
##
## Welch Two Sample t-test
##
## data: mat1.noC and mat5.noC
## t = -35.11, df = 3089, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.02754 -0.02462
## sample estimates:
## mean of x mean of y
## 0.9624 0.9885
boxplot(mat1.noC,mat5.noC, main="Restroom/Restroom v. Restroom/Kitchen (no chloroplasts)",names=c("Restroom/Restroom","Restroom/Kitchen"), ylab="Canberra Distance")
t.test(mat2.noC,mat5.noC)
##
## Welch Two Sample t-test
##
## data: mat2.noC and mat5.noC
## t = -160.5, df = 29472, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.04281 -0.04178
## sample estimates:
## mean of x mean of y
## 0.9462 0.9885
boxplot(mat2.noC,mat5.noC, main="Kitchen/Kitchen v. Restroom/Kitchen (no chloroplasts)",names=c("Kitchen/Kitchen","Restroom/Kitchen"), ylab="Canberra Distance")
The new unweighted unifrac distance matrix was already imported, and re-ordered to match the map file.
Make an ordination with the unweighted unifrac distance matrix.
FJL.uni.nmds.noC <- nmds(unifrac.dist.noC.mat)
## initial value 32.053727
## iter 5 value 17.618373
## iter 10 value 16.819742
## iter 10 value 16.813545
## iter 10 value 16.812329
## final value 16.812329
## converged
Plot ordination, colored by different variables.
plot(FJL.uni.nmds.noC, pch=16, col=colvec.Place_1[FJL.map$Place_1])
legend('topleft', c("cabinet","counter","cutting_board","dish_drying_pan","disposal","door","drawer","faucet","freezer","garbage_can","microwave","oven","pillowcase","refrigerator","sink","soap_dispenser","sponge","stall","stove","television","toilet","wall","water"), cex=0.5, pt.cex=1, text.width=0.2, y.intersp=0.9, col=colvec.Place_1, pch=16)
title(main="Unweighted Unifrac NMDS colored by Place_1 (no chloroplasts)")
plot(FJL.uni.nmds.noC, pch=16, col=colvec.Geoloc[FJL.map$Geolocation])
legend('topleft', c("Boulder_CO_USA","RaleighDurham_NC_USA","Seoul_Korea"),cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9, col=colvec.Geoloc, pch=16)
title(main='Unweighted Unifrac NMDS colored by Geolocation (no chloroplasts)')
plot(FJL.uni.nmds.noC, pch=16, col=colvec.pID[FJL.map$phinchID])
legend('topleft',c("Flores_Kitchen","Flores_restroom","Jeon_korea","Leff_homes"), cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9,col=colvec.pID, pch=16)
title(main='Unweighted Unifrac NMDS colored by study (phinchID) (no chloroplasts')
plot(FJL.uni.nmds.noC,pch=16,col=colvec.RF[FJL.map$Room_Function])
legend('topleft',c("home","kitchen","restroom"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.RF,pch=16)
title(main='Unweighted Unifrac NMDS colored by Room_Function (no chloroplasts)')
plot(FJL.uni.nmds.noC,pch=16,col=colvec.TR[FJL.map$Target_Region])
legend('topleft',c("V1_V2","V1_V3","V4"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.TR,pch=16)
title(main='Unweighted Unifrac NMDS colored by Target_Region (no chloroplasts)')
plot(FJL.uni.nmds.noC,pch=16,col=colvec.ST[FJL.map$Sequencing_Technology])
legend('topleft',c("454_GS_Junior","Illumina_HiSeq","Illumina_HiSeq2000","Illumina_MiSeq"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.ST,pch=16)
title(main='Unweighted Unifrac NMDS colored by Sequencing_Technology (no chloroplasts)')
plot(FJL.uni.nmds.noC,pch=16,col=colvec.BT2[FJL.map$Building_Type2])
legend('topleft',c("condo_town","family_home","low_rise_apt","university_building"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.BT2,pch=16)
title(main='Unweighted Unifrac colored by Building_Type2 (no chloroplasts)')
Make distance matrices between locations of interest, while removing upper triangle for self-self comparisons.
restroom.restroom.uni.dist.noC <- as.dist(unifrac.dist.noC.mat[restroom, restroom])
uni.RR.tri.noC <- restroom.restroom.uni.dist.noC[upper.tri(restroom.restroom.uni.dist.noC)]
restroom.restroom.uni.dist.noC <- uni.RR.tri.noC[-which(is.na(uni.RR.tri.noC))]
kitchen.kitchen.uni.dist.noC <- as.dist(unifrac.dist.noC.mat[kitchen, kitchen])
uni.KK.tri.noC <- kitchen.kitchen.uni.dist.noC[upper.tri(kitchen.kitchen.uni.dist.noC)]
kitchen.kitchen.uni.dist.noC <- uni.KK.tri.noC[-which(is.na(uni.KK.tri.noC))]
toilet.toilet.uni.dist.noC <- as.dist(unifrac.dist.noC.mat[toilet,toilet])
uni.TT.tri.noC <- toilet.toilet.uni.dist.noC[upper.tri(toilet.toilet.uni.dist.noC)]
toilet.toilet.uni.dist.noC <- uni.TT.tri.noC[-which(is.na(uni.TT.tri.noC))]
fridge.fridge.uni.dist.noC <- as.dist(unifrac.dist.noC.mat[fridge,fridge])
uni.FF.tri.noC <- fridge.fridge.uni.dist.noC[upper.tri(fridge.fridge.uni.dist.noC)]
fridge.fridge.uni.dist.noC <- uni.FF.tri.noC[-which(is.na(uni.FF.tri.noC))]
restroom.kitchen.uni.dist.noC <- as.dist(unifrac.dist.noC.mat[restroom, kitchen])
## Warning: non-square matrix
toilet.fridge.uni.dist.noC <- as.dist(unifrac.dist.noC.mat[toilet, fridge])
## Warning: non-square matrix
Make vector of distances for the comparisons you want to make.
mat7.noC <- as.vector(restroom.restroom.uni.dist.noC)
mat8.noC <- as.vector(kitchen.kitchen.uni.dist.noC)
mat9.noC <- as.vector(toilet.toilet.uni.dist.noC)
mat10.noC <- as.vector(fridge.fridge.uni.dist.noC)
mat11.noC <- as.vector(restroom.kitchen.uni.dist.noC)
mat12.noC <- as.vector(toilet.fridge.uni.dist.noC)
Run t-tests on, and make boxplot, of distance comparisons.
t.test(mat7.noC,mat11.noC)
##
## Welch Two Sample t-test
##
## data: mat7.noC and mat11.noC
## t = -28.76, df = 3554, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.05216 -0.04550
## sample estimates:
## mean of x mean of y
## 0.7409 0.7897
boxplot(mat7.noC,mat11.noC, main= "Restroom/Restroom v. Restroom/Kitchen (no chloroplasts)",names=c("Restroom/Restroom","Restroom/Kitchen"),ylab='Unweighted Unifrac Distance')
t.test(mat8.noC,mat11.noC)
##
## Welch Two Sample t-test
##
## data: mat8.noC and mat11.noC
## t = -91.15, df = 29923, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.06802 -0.06516
## sample estimates:
## mean of x mean of y
## 0.7231 0.7897
boxplot(mat8.noC,mat11.noC, main="Kitchen/Kitchen v. Restroom/Kitchen (no chloroplasts)",names=c('Kitchen/Kitchen','Restroom/Kitchen'),ylab='Unweighted Unifrac Distance')
Skip Jaccard for now.
Make groups of interest, to make calling them easier later.
restroom.table.noC <- FJL.table.noC[restroom, ]
kitchen.table.noC <- FJL.table.noC[kitchen, ]
sortKitTax.noC <- sort(apply(kitchen.table.noC, 2, sum)/sum(kitchen.table.noC), decreasing=TRUE)[1:25]
kitAbun25.noC <- FJL.taxa.noC[names(sortKitTax.noC), ]
kitAbun25.noC$abundance <- sortKitTax.noC
# kitAbun25.noC
kitAbun25.noC$taxa.names <- gsub('\\;\\ s\\_\\_$', '', kitAbun25.noC$taxa.names)
kitAbun25.noC$taxa.names <- gsub('\\;\\ g\\_\\_$', '', kitAbun25.noC$taxa.names)
kitAbun25.noC$taxa.names <- gsub('\\;\\ f\\_\\_$', '', kitAbun25.noC$taxa.names)
kitAbun25.noC$taxa.names <- gsub('\\;\\ s\\_\\_', ' ', kitAbun25.noC$taxa.names)
# do this 5 times~ what is this doing???
# kitAbun25$taxa.names <- gsub('^[[:alnum:]]{1,}\\;\\ \\w{1}\\_\\_', '', kitAbun25$taxa.names)
# kitAbun25$taxa.names <- gsub('^\\[[[:alnum:]]{1,}\\]\\;\\ \\w{1}\\_\\_', '', kitAbun25$taxa.names)
par(las=1, mar=c(5, 25, 1, 1))
barplot(rev(kitAbun25.noC$abundance), horiz=TRUE, names.arg=rev(kitAbun25.noC$taxa.names), cex.names=0.5, main='All Kitchens: Top 25 Taxa (noC)')
sortRestTax.noC <- sort(apply(restroom.table.noC, 2, sum)/sum(restroom.table.noC), decreasing=TRUE)[1:25]
RestAbun25.noC <- FJL.taxa.noC[names(sortRestTax.noC), ]
RestAbun25.noC$abundance <- sortRestTax.noC
# RestAbun25.noC
RestAbun25.noC$taxa.names <- gsub('\\;\\ s\\_\\_$', '', RestAbun25.noC$taxa.names)
RestAbun25.noC$taxa.names <- gsub('\\;\\ g\\_\\_$', '', RestAbun25.noC$taxa.names)
RestAbun25.noC$taxa.names <- gsub('\\;\\ f\\_\\_$', '', RestAbun25.noC$taxa.names)
RestAbun25.noC$taxa.names <- gsub('\\;\\ s\\_\\_', ' ', RestAbun25.noC$taxa.names)
par(las=1, mar=c(5, 25, 1, 1))
barplot(rev(RestAbun25.noC$abundance), horiz=TRUE, names.arg=rev(RestAbun25.noC$taxa.names), cex.names=0.5, main='All Restrooms: Top 25 Taxa (noC)')
Could continue with the top taxa,by study.
(could also do the following without chloroplasts)
Create maps and tables of refrigerator and toilet samples, by study.
length(which(FJL.map$Place_1 == 'refrigerator' & FJL.map$phinchID == 'Jeon_korea'))
length(which(FJL.map$Place_1 == 'toilet' & FJL.map$phinchID == 'Jeon_korea'))
length(which(FJL.map$Place_1 == 'refrigerator' & FJL.map$phinchID == 'Flores_Kitchen'))
length(which(FJL.map$Place_1 == 'toilet' & FJL.map$phinchID == 'Flores_Kitchen'))
length(which(FJL.map$Place_1 == 'refrigerator' & FJL.map$phinchID == 'Flores_restroom'))
length(which(FJL.map$Place_1 == 'toilet' & FJL.map$phinchID == 'Flores_restroom'))
length(which(FJL.map$Place_1 == 'refrigerator' & FJL.map$phinchID == 'Leff_homes'))
length(which(FJL.map$Place_1 == 'toilet' & FJL.map$phinchID == 'Leff_homes'))
fridge.Jeon <- which(FJL.map$Place_1 == 'refrigerator' & FJL.map$phinchID == 'Jeon_korea')
toilet.Jeon <- which(FJL.map$Place_1 == 'toilet' & FJL.map$phinchID == 'Jeon_korea')
fridge.Flores <- which(FJL.map$Place_1 == 'refrigerator' & FJL.map$phinchID == 'Flores_Kitchen')
toilet.Flores <- which(FJL.map$Place_1 == 'toilet' & FJL.map$phinchID == 'Flores_restroom')
fridge.Leff <- which(FJL.map$Place_1 == 'refrigerator' & FJL.map$phinchID == 'Leff_homes')
toilet.Leff <- which(FJL.map$Place_1 == 'toilet' & FJL.map$phinchID == 'Leff_homes')
fridge.Jeon.map <- FJL.map[fridge.Jeon, ]
fridge.Jeon.table <- FJL.table[fridge.Jeon, ]
toilet.Jeon.map <- FJL.map[toilet.Jeon, ]
toilet.Jeon.table <- FJL.table[toilet.Jeon, ]
fridge.Flores.map <- FJL.map[fridge.Flores, ]
fridge.Flores.table <- FJL.table[fridge.Flores, ]
toilet.Flores.map <- FJL.map[toilet.Flores, ]
toilet.Flores.table <- FJL.table[toilet.Flores, ]
fridge.Leff.map <- FJL.map[fridge.Leff, ]
fridge.Leff.table <- FJL.table[fridge.Leff, ]
toilet.Leff.map <- FJL.map[toilet.Leff, ]
toilet.Leff.table <- FJL.table[toilet.Leff, ]
# take a random sample of size 10 from a dataset mydata
# sample without replacement
# mysample <- mydata[sample(1:nrow(mydata), 10,
# replace=FALSE),]
# I want to subsample 12 from fridge OTU tables: fridge.Flores.table & fridge.Leff.table
# I want to subsample 10 from toilet OTU tables: toilet.Flores.table & toilet.Leff.table
# matching the maps up??????not sure if i need to do this yet
fridge.Flores.table.12 <- fridge.Flores.table[sample(1:nrow(fridge.Flores.table), 12, replace=FALSE),]
fridge.Leff.table.12 <- fridge.Leff.table[sample(1:nrow(fridge.Leff.table), 12, replace=FALSE),]
toilet.Flores.table.10 <- toilet.Flores.table[sample(1:nrow(toilet.Flores.table), 10, replace=FALSE),]
toilet.Leff.table.10 <- toilet.Leff.table[sample(1:nrow(toilet.Leff.table), 10, replace=FALSE),]
all.fridge.toilet.map <- rbind(fridge.Jeon.map, fridge.Flores.map, fridge.Leff.map, toilet.Jeon.map, toilet.Flores.map, toilet.Leff.map)
all.fridge.table.12 <- rbind(fridge.Flores.table.12, fridge.Leff.table.12, fridge.Jeon.table)
all.toilet.table.10 <- rbind(toilet.Flores.table.10, toilet.Leff.table.10, toilet.Jeon.table)
all.fridgetoilet.table.12.10 <- rbind(all.fridge.table.12, all.toilet.table.10)
Make sure that the OTU table and map line up. If they don’t, reorder using the next section of code.
identical(row.names(all.fridge.toilet.map), row.names(all.fridgetoilet.table.12.10)) #False
cbind(row.names(all.fridge.toilet.map), row.names(all.fridgetoilet.table.12.10)) #Not matching
## Warning: number of rows of result is not a multiple of vector length (arg
## 2)
all(row.names(all.fridge.toilet.map) %in% row.names(all.fridgetoilet.table.12.10)) #False
all(row.names(all.fridgetoilet.table.12.10) %in% row.names(all.fridge.toilet.map)) #True
Remove rows from map that aren’t included in the subsetted OTU table.
all.fridge.toilet.map.12.10 <- all.fridge.toilet.map[row.names(all.fridgetoilet.table.12.10),]
identical(row.names(all.fridge.toilet.map.12.10), row.names(all.fridgetoilet.table.12.10)) #True
## [1] TRUE
cbind(row.names(all.fridge.toilet.map.12.10), row.names(all.fridgetoilet.table.12.10)) #Match
## [,1] [,2]
## [1,] "KiC36.630570" "KiC36.630570"
## [2,] "KiC31.630483" "KiC31.630483"
## [3,] "KiD31.630581" "KiD31.630581"
## [4,] "KiC29.630745" "KiC29.630745"
## [5,] "KiB75.630564" "KiB75.630564"
## [6,] "KiD75.630628" "KiD75.630628"
## [7,] "KiD37.630587" "KiD37.630587"
## [8,] "KiB74.630598" "KiB74.630598"
## [9,] "KiB39.630496" "KiB39.630496"
## [10,] "KiC30.630488" "KiC30.630488"
## [11,] "KiD35.630583" "KiD35.630583"
## [12,] "KiA75.630480" "KiA75.630480"
## [13,] "H43Fr.736000" "H43Fr.736000"
## [14,] "H47Fr.735880" "H47Fr.735880"
## [15,] "H19Fr.735936" "H19Fr.735936"
## [16,] "H45Fr.735904" "H45Fr.735904"
## [17,] "H27Fr.729274" "H27Fr.729274"
## [18,] "H33Fr.729116" "H33Fr.729116"
## [19,] "H30Fr.729368" "H30Fr.729368"
## [20,] "H48Fr.729208" "H48Fr.729208"
## [21,] "H40Fr.729193" "H40Fr.729193"
## [22,] "H34Fr.736058" "H34Fr.736058"
## [23,] "H56Fr.736041" "H56Fr.736041"
## [24,] "H15Fr.729307" "H15Fr.729307"
## [25,] "Rcul.1.1078777" "Rcul.1.1078777"
## [26,] "Rcul.11.1078797" "Rcul.11.1078797"
## [27,] "Rcul.2.1078799" "Rcul.2.1078799"
## [28,] "Rcul.3.1078800" "Rcul.3.1078800"
## [29,] "Rcul.5.1078790" "Rcul.5.1078790"
## [30,] "Rcul.6.1078792" "Rcul.6.1078792"
## [31,] "Rcul.7.1078773" "Rcul.7.1078773"
## [32,] "Rcul.8.1078780" "Rcul.8.1078780"
## [33,] "Rgen.10.1078794" "Rgen.10.1078794"
## [34,] "Rgen.7.1078779" "Rgen.7.1078779"
## [35,] "Rgen.8.1078789" "Rgen.8.1078789"
## [36,] "Rgen.9.1078798" "Rgen.9.1078798"
## [37,] "EKCF7.489557" "EKCF7.489557"
## [38,] "EKAF5.489565" "EKAF5.489565"
## [39,] "PTBF5.489479" "PTBF5.489479"
## [40,] "PTCF5.489507" "PTCF5.489507"
## [41,] "EKAM6.489544" "EKAM6.489544"
## [42,] "PTCM5.489525" "PTCM5.489525"
## [43,] "EKCF5.489488" "EKCF5.489488"
## [44,] "PTCF6.489533" "PTCF6.489533"
## [45,] "EKBM5.489472" "EKBM5.489472"
## [46,] "EKBM6.489504" "EKBM6.489504"
## [47,] "H36Ts.729110" "H36Ts.729110"
## [48,] "H43Ts.735928" "H43Ts.735928"
## [49,] "H20Ts.729120" "H20Ts.729120"
## [50,] "H26Ts.729406" "H26Ts.729406"
## [51,] "H13Ts.729194" "H13Ts.729194"
## [52,] "H35Ts.735945" "H35Ts.735945"
## [53,] "H27Ts.729226" "H27Ts.729226"
## [54,] "H29Ts.729131" "H29Ts.729131"
## [55,] "H18Ts.736040" "H18Ts.736040"
## [56,] "H38Ts.729270" "H38Ts.729270"
## [57,] "Tcul.1.1078775" "Tcul.1.1078775"
## [58,] "Tcul.2.1078782" "Tcul.2.1078782"
## [59,] "Tcul.3.1078785" "Tcul.3.1078785"
## [60,] "Tcul.4.1078788" "Tcul.4.1078788"
## [61,] "Tcul.5.1078791" "Tcul.5.1078791"
## [62,] "Tcul.6.1078781" "Tcul.6.1078781"
## [63,] "Tgen.1.1078795" "Tgen.1.1078795"
## [64,] "Tgen.10.1078774" "Tgen.10.1078774"
## [65,] "Tgen.12.1078784" "Tgen.12.1078784"
## [66,] "Tgen.13.1078793" "Tgen.13.1078793"
all(row.names(all.fridge.toilet.map.12.10) %in% row.names(all.fridgetoilet.table.12.10)) #True
## [1] TRUE
all(row.names(all.fridgetoilet.table.12.10) %in% row.names(all.fridge.toilet.map.12.10)) #True
## [1] TRUE
write.table(all.fridgetoilet.table.12.10, "12.10.fridge.toilet.table.txt", sep="\t", col.names=TRUE, row.names=TRUE, quote=FALSE)
write.table(all.fridge.toilet.map.12.10, "12.10.fridge.toilet.map.txt", sep="\t", col.names=NA, row.names=TRUE)
Make a Canberra distance matrix.
all.12.10.can <- vegdist(all.fridgetoilet.table.12.10, 'canberra')
all.12.10.can.mat <- as.matrix(all.12.10.can)
Make an ordination with the canberra distance matrix.
all.12.10.can.nmds <- nmds(all.12.10.can)
## initial value 35.510400
## iter 5 value 23.723161
## iter 10 value 22.178427
## iter 15 value 21.462291
## iter 20 value 21.169825
## final value 21.082734
## converged
Plot ordination, colored by different variables.
levels(all.fridge.toilet.map.12.10$Place_1)
## [1] "cabinet" "counter" "cutting_board"
## [4] "dish_drying_pan" "disposal" "door"
## [7] "drawer" "faucet" "freezer"
## [10] "garbage can" "garbage_can" "left_sink"
## [13] "microwave" "oven" "pillowcase"
## [16] "refrigerator" "right_sink" "sink"
## [19] "soap_dispenser" "sponge" "stall"
## [22] "stove" "television" "toilet"
## [25] "wall" "water"
colvec.Place_1=c("green4","darkorange","darkolivegreen","darkgoldenrod1","cyan2","cornflowerblue","coral1","chocolate3","chartreuse","blueviolet","aquamarine","darkslateblue","cornsilk1","firebrick","indianred1","lightblue1","darkorchid3","darkgoldenrod4","darkcyan","chartreuse4","greenyellow","brown1","deepskyblue2","mediumspringgreen","lightslateblue","lightsalmon")
length(colvec.Place_1)
## [1] 26
plot(all.12.10.can.nmds, pch=16, col = colvec.Place_1[all.fridge.toilet.map.12.10$Place_1])
legend('topleft', c("cabinet","counter","cutting_board","dish_drying_pan","disposal","door","drawer","faucet","freezer","garbage_can","microwave","oven","pillowcase","refrigerator","sink","soap_dispenser","sponge","stall","stove","television","toilet","wall","water"), cex=0.5, pt.cex=1, text.width=0.2, y.intersp=0.9, col=colvec.Place_1, pch=16)
title(main="Canberra NMDS colored by Place_1 (subset)")
### colors are wrong!!!!!!can't figure out why
#lightblue1(16)=refrigerator=(14)
#mediumspringgreen(24)=toilet=(21)
levels(all.fridge.toilet.map.12.10$Geolocation)
## [1] "Boulder_CO_USA" "RaleighDurham_NC_USA" "Seoul_Korea"
colvec.Geoloc=c("darkorchid","darkorange","cyan2")
plot(all.12.10.can.nmds, pch=16, col = colvec.Geoloc[all.fridge.toilet.map.12.10$Geolocation])
legend('topleft', c("Boulder_CO_USA","RaleighDurham_NC_USA","Seoul_Korea"),cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9, col=colvec.Geoloc, pch=16)
title(main='Canberra NMDS colored by Geolocation (subset)')
levels(all.fridge.toilet.map.12.10$phinchID)
## [1] "Flores_Kitchen" "Flores_restroom" "Jeon_korea" "Leff_homes"
colvec.pID=c("aquamarine","chartreuse4","firebrick","darkgoldenrod1")
plot(all.12.10.can.nmds, pch=16, col=colvec.pID[all.fridge.toilet.map.12.10$phinchID])
legend('topleft',c("Flores_Kitchen","Flores_restroom","Jeon_korea","Leff_homes"), cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9,col=colvec.pID, pch=16)
title(main='Canberra NMDS colored by study (phinchID) (subset)')
levels(all.fridge.toilet.map.12.10$Room_Function)
## [1] "home" "kitchen" "restroom"
colvec.RF=c("darkorchid","cornflowerblue","coral1")
plot(all.12.10.can.nmds,pch=16,col=colvec.RF[all.fridge.toilet.map.12.10$Room_Function])
legend('topleft',c("home","kitchen","restroom"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.RF,pch=16)
title(main='Canberra NMDS colored by Room_Function (subset)')
levels(all.fridge.toilet.map.12.10$Target_Region)
## [1] "V1_V2" "V1_V3" "V4"
colvec.TR=c("deeppink","chartreuse","goldenrod")
plot(all.12.10.can.nmds,pch=16,col=colvec.TR[all.fridge.toilet.map.12.10$Target_Region])
legend('topleft',c("V1_V2","V1_V3","V4"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.TR,pch=16)
title(main='Canberra NMDS colored by Target_Region (Subset)')
levels(all.fridge.toilet.map.12.10$Sequencing_Technology)
## [1] "454_GS_Junior" "Illumina_HiSeq" "Illumina_HiSeq2000"
## [4] "Illumina_MiSeq"
colvec.ST=c("coral1","cornflowerblue","chartreuse","darkorchid")
plot(all.12.10.can.nmds,pch=16,col=colvec.ST[all.fridge.toilet.map.12.10$Sequencing_Technology])
legend('topleft',c("454_GS_Junior","Illumina_HiSeq","Illumina_HiSeq2000","Illumina_MiSeq"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.ST,pch=16)
title(main='Canberra NMDS colored by Sequencing_Technology (subset)')
levels(all.fridge.toilet.map.12.10$Building_Type2)
## [1] "condo_town" "family_home" "low_rise_apt"
## [4] "university_building"
colvec.BT2=c("darkorchid","aquamarine","deeppink","darkgoldenrod2")
plot(all.12.10.can.nmds,pch=16,col=colvec.BT2[all.fridge.toilet.map.12.10$Building_Type2])
legend('topleft',c("condo_town","family_home","low_rise_apt","university_building"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.BT2,pch=16)
title(main='Canberra NMDS colored by Building_Type2 (subset)')
Without chloroplasts, target region seems to drive clustering the best, between subsampled toilets and refrigerators among the 4 studies.
toilet.SS <- which(all.fridge.toilet.map.12.10$Place_1 == 'toilet')
toilet.SS.map <- all.fridge.toilet.map.12.10[toilet.SS, ]
toilet.SS.table <- all.fridgetoilet.table.12.10[toilet.SS, ]
fridge.SS <- which(all.fridge.toilet.map.12.10$Place_1 == 'refrigerator')
fridge.SS.map <- all.fridge.toilet.map.12.10[fridge.SS, ]
fridge.SS.table <- all.fridgetoilet.table.12.10[fridge.SS, ]
toilet.toilet.can.dist.SS <- as.dist(all.12.10.can.mat[toilet.SS, toilet.SS])
can.TT.tri.SS <- toilet.toilet.can.dist.SS[upper.tri(toilet.toilet.can.dist.SS)]
toilet.toilet.can.dist.SS <- can.TT.tri.SS[-which(is.na(can.TT.tri.SS))]
fridge.fridge.can.dist.SS <- as.dist(all.12.10.can.mat[fridge.SS, fridge.SS])
can.FF.tri.SS <- fridge.fridge.can.dist.SS[upper.tri(fridge.fridge.can.dist.SS)]
fridge.fridge.can.dist.SS <- can.FF.tri.SS[-which(is.na(can.FF.tri.SS))]
toilet.fridge.can.dist.SS <- as.dist(all.12.10.can.mat[toilet.SS, fridge.SS])
## Warning: non-square matrix
Make vector of distances for the comparisons you want to make.
mat1.SS <- as.vector(toilet.toilet.can.dist.SS)
mat2.SS <- as.vector(fridge.fridge.can.dist.SS)
mat3.SS <- as.vector(toilet.fridge.can.dist.SS)
Run t-tests on, and make boxplot, of distance comparisons.
t.test(mat1.SS, mat3.SS)
##
## Welch Two Sample t-test
##
## data: mat1.SS and mat3.SS
## t = -4.021, df = 109, p-value = 0.0001069
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.025684 -0.008725
## sample estimates:
## mean of x mean of y
## 0.9717 0.9889
boxplot(mat1.SS,mat3.SS, main="Toilet/Toilet v. Toilet/Fridge",names=c("Toilet/Toilet","Toilet/Fridge"), ylab="Canberra Distance")
t.test(mat2.SS, mat3.SS)
##
## Welch Two Sample t-test
##
## data: mat2.SS and mat3.SS
## t = -6.463, df = 173.7, p-value = 1e-09
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.02150 -0.01144
## sample estimates:
## mean of x mean of y
## 0.9724 0.9889
boxplot(mat2.SS ,mat3.SS, main="Fridge/Fridge v. Toilet/Fridge",names=c("Fridge/Fridge","Toilet/Fridge"), ylab="Canberra Distance")
Trying to do unifrac in R for this OTU table (having a hard time doing biom/txt file conversions).
FJL.tree <- read_tree_greengenes('/macqiime/greengenes/gg_13_8_otus/trees/97_otus.tree')
UF.SS <- unifrac(all.fridgetoilet.table.12.10, FJL.tree) # takes a really long time??????
Could re-try the biom conversion to do the Qiime unifrac. would be better/more consistent anyways.