FloresJeonLeff Subset Study - BEMA project

(Rachel Adams, Ashley Bateman, Holly Bik, James Meadow)

Question: Do rooms in a building look more similar the same kind of room in another building: Do restrooms look like restrooms, or do kitchens look like kitchens, no matter where you live? Or, do regional differences in microbial communities make kitchens and restrooms that are in the same location look more like each other?

set.seed(942)

library(vegan)

## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.0-10

library(ape)
library(labdsv)

## Loading required package: mgcv
## Loading required package: nlme
## This is mgcv 1.8-3. For overview type 'help("mgcv-package")'.
## Loading required package: MASS
## 
## Attaching package: 'labdsv'
## 
## The following object is masked from 'package:stats':
## 
##     density

library(phyloseq)

### These just for debugging, not evaluated. 
# save.image('~/Dropbox/FloresJeonLeff.RData')
# load('~/Dropbox/FloresJeonLeff.RData')
# setwd("~/Dropbox/PersonalBEMA/FloresJeonLeffStudy")

Below is a custom function (QiimeIn) written by James Meadow for reading in OTU tables.

QiimeIn <- function ( file='') {
  tmp.table <- read.table ( file, sep='\t', head=TRUE, row.names=1, comment.char='%', skip=1 )
  tax.numb <- dim(tmp.table)[2]  ## last col is taxon names
  taxa.names <- as.character(tmp.table[,tax.numb])  ## save taxon names
  tmp.to.t <- tmp.table[,1:tax.numb-1]  ## trim names from table
  name <- t(tmp.to.t)  ## transpose after trimming names
  
  preview <- name[1:5,1:5]
  samples <- row.names(name)
  taxa.tmp <- cbind(colnames(name),taxa.names)
  taxa <- data.frame(taxa.tmp)
  names(taxa)[1] <- 'qiime.id'
  row.names(taxa) <- taxa.tmp[, 1]
  dimensions <- dim(name)
  
  table.info <- list( preview, dimensions, samples, taxa, name )
  names(table.info)[[1]] <- 'Preview'
  names(table.info)[[2]] <- 'Dimensions'
  names(table.info)[[3]] <- 'Samples'
  names(table.info)[[4]] <- 'Taxa'
  names(table.info)[[5]] <- 'Table'
  
  invisible(table.info)
  
}

Read in OTU table, mapping file, and unifrac distance matrix generated with Qiime.

Read in OTU table using custom function.

FJL.list <- QiimeIn('data/Ftable.from_biom_w_taxonomy.txt')

Read in mapping file, after removing ‘#’ (needed for Qiime scripts) from the header.

FJL.map <- read.delim('data/mergedMapF_no_source_no_kit.txt', head=TRUE, row.names=1, sep='\t')

Read in unweighted unifrac distance matrix, generated with Qiime script. Preview table, and convert to matrix.

unifrac.dist <- read.table('data/unweighted_unifrac_dm.txt', header=T)
unifrac.dist[1:5,1:5]

##              KiB54.630662 KiC69.630743 KiB70.630661 KiB40.630629
## KiB54.630662       0.0000       0.6981       0.6727       0.6479
## KiC69.630743       0.6981       0.0000       0.6550       0.7115
## KiB70.630661       0.6727       0.6550       0.0000       0.6470
## KiB40.630629       0.6479       0.7115       0.6470       0.0000
## KiB36.630609       0.6819       0.6775       0.6682       0.6931
##              KiB36.630609
## KiB54.630662       0.6819
## KiC69.630743       0.6775
## KiB70.630661       0.6682
## KiB40.630629       0.6931
## KiB36.630609       0.0000

unifrac.dist.mat <- as.matrix(unifrac.dist)

Checkpoints to make sure otu table looks correct and everything is lined up.

names(FJL.list)
FJL.list$Preview
FJL.list$Dimensions
FJL.list$Samples
FJL.list$Taxa

Match up/Reorder OTU table, map, and unifrac distance matrix

Extract the OTU table and taxon table into two different objects.

FJL.table <- FJL.list$Table
FJL.taxa <- FJL.list$Taxa

Make sure that the OTU table and map line up. If they don’t, reorder using the next section of code.

identical(row.names(FJL.map), row.names(FJL.table)) #False
cbind(row.names(FJL.map), row.names(FJL.table))     #Not matching
all(row.names(FJL.map) %in% row.names(FJL.table))   #True
all(row.names(FJL.table) %in% row.names(FJL.map))   #True

Reorder both the map and the OTU table so that they have the same rows and columns.

FJL.map <- FJL.map[order(row.names(FJL.map)), ]
FJL.table <- FJL.table[row.names(FJL.map), ]
# an alternative way of doing it (without the two linked together):
# FJL.map <- FJL.map[order(row.names(FJL.map)), ]
# FJL.table <- FJL.table[order(row.names(FJL.table)), ]

Reorder the Unifrac distance matrix so that it has the same rows and columns as the map.

unifrac.dist.mat <- unifrac.dist.mat[row.names(FJL.map), row.names(FJL.map)]

Make groups of interest, to make calling them easier later.

restroom <- which(FJL.map$Room_Function == 'restroom')
restroom.map <- FJL.map[restroom, ]
restroom.table <- FJL.table[restroom, ]

kitchen <- which(FJL.map$Room_Function == 'kitchen')
kitchen.map <- FJL.map[kitchen, ]
kitchen.table <- FJL.table[kitchen, ]

toilet <- which(FJL.map$Place_1 == 'toilet')
fridge <- which(FJL.map$Place_1 == 'refrigerator')

How many OTUs are in my OTU table?

dim(FJL.table) # 17398 OTUs

## [1]   758 17341

How many singletons are in my OTU table?

# abundance curve
plot(rev(sort(colSums(FJL.table))))

plot of chunk plotSingletons

length(which(colSums(FJL.table) == 1))/ncol(FJL.table) # 31% are singletons

## [1] 0.3137

Canberra

Make a Canberra distance matrix.

FJL.can <- vegdist(FJL.table, 'canberra')
FJL.can.mat <- as.matrix(FJL.can)

Make an ordination with the canberra distance matrix.

FJL.can.nmds <- nmds(FJL.can)

## initial  value 36.417895 
## iter   5 value 24.526274
## iter  10 value 23.608134
## final  value 23.354897 
## converged

Plot ordination, colored by different variables.

levels(FJL.map$Place_1)

##  [1] "cabinet"         "counter"         "cutting_board"  
##  [4] "dish_drying_pan" "disposal"        "door"           
##  [7] "drawer"          "faucet"          "freezer"        
## [10] "garbage can"     "garbage_can"     "left_sink"      
## [13] "microwave"       "oven"            "pillowcase"     
## [16] "refrigerator"    "right_sink"      "sink"           
## [19] "soap_dispenser"  "sponge"          "stall"          
## [22] "stove"           "television"      "toilet"         
## [25] "wall"            "water"

colvec.Place_1=c("green4","darkorange","darkolivegreen","darkgoldenrod1","cyan2","cornflowerblue","coral1","chocolate3","chartreuse","blueviolet","aquamarine","darkslateblue","cornsilk1","firebrick","indianred1","lightblue1","darkorchid3","darkgoldenrod4","darkcyan","chartreuse4","greenyellow","brown1","deepskyblue2","mediumspringgreen","lightslateblue","lightsalmon")
plot(FJL.can.nmds, pch=16, col=colvec.Place_1[FJL.map$Place_1])
legend('topleft', c("cabinet","counter","cutting_board","dish_drying_pan","disposal","door","drawer","faucet","freezer","garbage_can","microwave","oven","pillowcase","refrigerator","sink","soap_dispenser","sponge","stall","stove","television","toilet","wall","water"), cex=0.5, pt.cex=1, text.width=0.2, y.intersp=0.9, col=colvec.Place_1, pch=16)
title(main="Canberra NMDS colored by Place_1")

plot of chunk plotNMDScan

levels(FJL.map$Geolocation)

## [1] "Boulder_CO_USA"       "RaleighDurham_NC_USA" "Seoul_Korea"

colvec.Geoloc=c("darkorchid","darkorange","cyan2")
plot(FJL.can.nmds, pch=16, col = colvec.Geoloc[FJL.map$Geolocation])
legend('topleft', c("Boulder_CO_USA","RaleighDurham_NC_USA","Seoul_Korea"),cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9, col=colvec.Geoloc, pch=16)
title(main='Canberra NMDS colored by Geolocation')

plot of chunk plotNMDScan

levels(FJL.map$phinchID)

## [1] "Flores_Kitchen"  "Flores_restroom" "Jeon_korea"      "Leff_homes"

colvec.pID=c("aquamarine","chartreuse4","firebrick","darkgoldenrod1")
plot(FJL.can.nmds, pch=16, col=colvec.pID[FJL.map$phinchID])
legend('topleft',c("Flores_Kitchen","Flores_restroom","Jeon_korea","Leff_homes"), cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9,col=colvec.pID, pch=16)
title(main='Canberra NMDS colored by study (phinchID)')

plot of chunk plotNMDScan

levels(FJL.map$Room_Function)

## [1] "home"     "kitchen"  "restroom"

colvec.RF=c("darkorchid","cornflowerblue","coral1")
plot(FJL.can.nmds,pch=16,col=colvec.RF[FJL.map$Room_Function])
legend('topleft',c("home","kitchen","restroom"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.RF,pch=16)
title(main='Canberra NMDS colored by Room_Function')

plot of chunk plotNMDScan

levels(FJL.map$Target_Region)

## [1] "V1_V2" "V1_V3" "V4"

colvec.TR=c("deeppink","chartreuse","goldenrod")
plot(FJL.can.nmds,pch=16,col=colvec.TR[FJL.map$Target_Region])
legend('topleft',c("V1_V2","V1_V3","V4"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.TR,pch=16)
title(main='Canberra NMDS colored by Target_Region')

plot of chunk plotNMDScan

levels(FJL.map$Sequencing_Technology)

## [1] "454_GS_Junior"      "Illumina_HiSeq"     "Illumina_HiSeq2000"
## [4] "Illumina_MiSeq"

colvec.ST=c("coral1","cornflowerblue","chartreuse","darkorchid")
plot(FJL.can.nmds,pch=16,col=colvec.ST[FJL.map$Sequencing_Technology])
legend('topleft',c("454_GS_Junior","Illumina_HiSeq","Illumina_HiSeq2000","Illumina_MiSeq"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.ST,pch=16)
title(main='Canberra NMDS colored by Sequencing_Technology')

plot of chunk plotNMDScan

levels(FJL.map$Building_Type2)

## [1] "condo_town"          "family_home"         "low_rise_apt"       
## [4] "university_building"

colvec.BT2=c("darkorchid","aquamarine","deeppink","darkgoldenrod2")
plot(FJL.can.nmds,pch=16,col=colvec.BT2[FJL.map$Building_Type2])
legend('topleft',c("condo_town","family_home","low_rise_apt","university_building"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.BT2,pch=16)
title(main='Canberra NMDS colored by Building_Type2')

plot of chunk plotNMDScan

Could Sequencing technologies (Miseq v. HiSeq/HiSeq2000 be driving that split in the Leff homes data, resulting in the clusters that we see?)

Need to remove the one half of the square for the self-self comparisons.

Make distance matrices between locations of interest, while removing upper triangle for self-self comparisons.

restroom.restroom.can.dist <- as.dist(FJL.can.mat[restroom, restroom])
can.RR.tri <- restroom.restroom.can.dist[upper.tri(restroom.restroom.can.dist)]
restroom.restroom.can.dist <- can.RR.tri[-which(is.na(can.RR.tri))]

kitchen.kitchen.can.dist <- as.dist(FJL.can.mat[kitchen, kitchen])
can.KK.tri <- kitchen.kitchen.can.dist[upper.tri(kitchen.kitchen.can.dist)]
kitchen.kitchen.can.dist <- can.KK.tri[-which(is.na(can.KK.tri))]

toilet.toilet.can.dist <- as.dist(FJL.can.mat[toilet,toilet])
can.TT.tri <- toilet.toilet.can.dist[upper.tri(toilet.toilet.can.dist)]
toilet.toilet.can.dist <- can.TT.tri[-which(is.na(can.TT.tri))]

fridge.fridge.can.dist <- as.dist(FJL.can.mat[fridge,fridge])
can.FF.tri <- fridge.fridge.can.dist[upper.tri(fridge.fridge.can.dist)]
fridge.fridge.can.dist <- can.FF.tri[-which(is.na(can.FF.tri))]

restroom.kitchen.can.dist <- as.dist(FJL.can.mat[restroom, kitchen])

## Warning: non-square matrix

toilet.fridge.can.dist <- as.dist(FJL.can.mat[toilet, fridge])

## Warning: non-square matrix

Make vector of distances for the comparisons you want to make.

mat1 <- as.vector(restroom.restroom.can.dist)
mat2 <- as.vector(kitchen.kitchen.can.dist)
mat3 <- as.vector(toilet.toilet.can.dist)
mat4 <- as.vector(fridge.fridge.can.dist)
mat5 <- as.vector(restroom.kitchen.can.dist)
mat6 <- as.vector(toilet.fridge.can.dist)

Run t-tests on, and make boxplot, of distance comparisons.

Restroom/Restroom v. Restroom/Kitchen

t.test(mat1,mat5)

## 
##  Welch Two Sample t-test
## 
## data:  mat1 and mat5
## t = -34.93, df = 3089, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.02736 -0.02445
## sample estimates:
## mean of x mean of y 
##    0.9622    0.9881

boxplot(mat1,mat5, main="Restroom/Restroom v. Restroom/Kitchen",names=c("Restroom/Restroom","Restroom/Kitchen"), ylab="Canberra Distance")

plot of chunk testDistsCan1

Kitchen/Kitchen v. Restroom/Kitchen

t.test(mat2,mat5)

## 
##  Welch Two Sample t-test
## 
## data:  mat2 and mat5
## t = -164.5, df = 29014, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.04516 -0.04410
## sample estimates:
## mean of x mean of y 
##    0.9435    0.9881

boxplot(mat2,mat5, main="Kitchen/Kitchen v. Restroom/Kitchen",names=c("Kitchen/Kitchen","Restroom/Kitchen"), ylab="Canberra Distance")

plot of chunk testDistsCan2

Unweighted Unifrac

The unweighted unifrac distance matrix was already imported, and re-ordered to match the map file.

Make an ordination with the unweighted unifrac distance matrix.

FJL.uni.nmds <- nmds(unifrac.dist.mat)

## initial  value 32.583909 
## iter   5 value 17.629171
## iter  10 value 16.602288
## iter  10 value 16.588331
## iter  10 value 16.587316
## final  value 16.587316 
## converged

Plot ordination, colored by different variables.

plot(FJL.uni.nmds, pch=16, col=colvec.Place_1[FJL.map$Place_1])
legend('topleft', c("cabinet","counter","cutting_board","dish_drying_pan","disposal","door","drawer","faucet","freezer","garbage_can","microwave","oven","pillowcase","refrigerator","sink","soap_dispenser","sponge","stall","stove","television","toilet","wall","water"), cex=0.5, pt.cex=1, text.width=0.2, y.intersp=0.9, col=colvec.Place_1, pch=16)
title(main="Unweighted Unifrac NMDS colored by Place_1")

plot of chunk plotNMDSuni

plot(FJL.uni.nmds, pch=16, col=colvec.Geoloc[FJL.map$Geolocation])
legend('topleft', c("Boulder_CO_USA","RaleighDurham_NC_USA","Seoul_Korea"),cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9, col=colvec.Geoloc, pch=16)
title(main='Unweighted Unifrac NMDS colored by Geolocation')

plot of chunk plotNMDSuni

plot(FJL.uni.nmds, pch=16, col=colvec.pID[FJL.map$phinchID])
legend('topleft',c("Flores_Kitchen","Flores_restroom","Jeon_korea","Leff_homes"), cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9,col=colvec.pID, pch=16)
title(main='Unweighted Unifrac NMDS colored by study (phinchID)')

plot of chunk plotNMDSuni

plot(FJL.uni.nmds,pch=16,col=colvec.RF[FJL.map$Room_Function])
legend('topleft',c("home","kitchen","restroom"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.RF,pch=16)
title(main='Unweighted Unifrac NMDS colored by Room_Function')

plot of chunk plotNMDSuni

plot(FJL.uni.nmds,pch=16,col=colvec.TR[FJL.map$Target_Region])
legend('topleft',c("V1_V2","V1_V3","V4"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.TR,pch=16)
title(main='Unweighted Unifrac NMDS colored by Target_Region')

plot of chunk plotNMDSuni

plot(FJL.uni.nmds,pch=16,col=colvec.ST[FJL.map$Sequencing_Technology])
legend('topleft',c("454_GS_Junior","Illumina_HiSeq","Illumina_HiSeq2000","Illumina_MiSeq"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.ST,pch=16)
title(main='Unweighted Unifrac NMDS colored by Sequencing_Technology')

plot of chunk plotNMDSuni

plot(FJL.uni.nmds,pch=16,col=colvec.BT2[FJL.map$Building_Type2])
legend('topleft',c("condo_town","family_home","low_rise_apt","university_building"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.BT2,pch=16)
title(main='Unweighted Unifrac colored by Building_Type2')

plot of chunk plotNMDSuni

Make distance matrices between locations of interest, while removing upper triangle for self-self comparisons.

restroom.restroom.uni.dist <- as.dist(unifrac.dist.mat[restroom, restroom])
uni.RR.tri <- restroom.restroom.uni.dist[upper.tri(restroom.restroom.uni.dist)]
restroom.restroom.uni.dist <- uni.RR.tri[-which(is.na(uni.RR.tri))]

kitchen.kitchen.uni.dist <- as.dist(unifrac.dist.mat[kitchen, kitchen])
uni.KK.tri <- kitchen.kitchen.uni.dist[upper.tri(kitchen.kitchen.uni.dist)]
kitchen.kitchen.uni.dist <- uni.KK.tri[-which(is.na(uni.KK.tri))]

toilet.toilet.uni.dist <- as.dist(unifrac.dist.mat[toilet,toilet])
uni.TT.tri <- toilet.toilet.uni.dist[upper.tri(toilet.toilet.uni.dist)]
toilet.toilet.uni.dist <- uni.TT.tri[-which(is.na(uni.TT.tri))]

fridge.fridge.uni.dist <- as.dist(unifrac.dist.mat[fridge,fridge])
uni.FF.tri <- fridge.fridge.uni.dist[upper.tri(fridge.fridge.uni.dist)]
fridge.fridge.uni.dist <- uni.FF.tri[-which(is.na(uni.FF.tri))]

restroom.kitchen.uni.dist <- as.dist(unifrac.dist.mat[restroom, kitchen])

## Warning: non-square matrix

toilet.fridge.uni.dist <- as.dist(unifrac.dist.mat[toilet, fridge])

## Warning: non-square matrix

Make vector of distances for the comparisons you want to make.

mat7 <- as.vector(restroom.restroom.uni.dist)
mat8 <- as.vector(kitchen.kitchen.uni.dist)
mat9 <- as.vector(toilet.toilet.uni.dist)
mat10 <- as.vector(fridge.fridge.uni.dist)
mat11 <- as.vector(restroom.kitchen.uni.dist)
mat12 <- as.vector(toilet.fridge.uni.dist)

Run t-tests on, and make boxplot, of distance comparisons.

Restroom/Restroom v. Restroom/Kitchen

t.test(mat7,mat11)

## 
##  Welch Two Sample t-test
## 
## data:  mat7 and mat11
## t = -27.02, df = 3556, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.05020 -0.04341
## sample estimates:
## mean of x mean of y 
##    0.7357    0.7825

boxplot(mat7,mat11, main= "Restroom/Restroom v. Restroom/Kitchen",names=c("Restroom/Restroom","Restroom/Kitchen"),ylab='Unweighted Unifrac Distance')

plot of chunk testDistsUni1

Kitchen/Kitchen v. Restroom/Kitchen

t.test(mat8,mat11)

## 
##  Welch Two Sample t-test
## 
## data:  mat8 and mat11
## t = -91.55, df = 29935, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.06981 -0.06688
## sample estimates:
## mean of x mean of y 
##    0.7142    0.7825

boxplot(mat8,mat11, main="Kitchen/Kitchen v. Restroom/Kitchen",names=c('Kitchen/Kitchen','Restroom/Kitchen'),ylab='Unweighted Unifrac Distance')

plot of chunk testDistsUni2

Jaccard

Create a Jaccard distance matrix.

FJL.jac <- vegdist(FJL.table, 'jaccard')
FJL.jac.mat <- as.matrix(FJL.jac)

Create an ordination using jaccard.

FJL.jac.nmds <- nmds(FJL.jac)

## initial  value 42.934861 
## iter   5 value 28.977639
## iter  10 value 25.290675
## final  value 25.022110 
## converged

Plot the ordination, colored by different variables.

plot(FJL.jac.nmds, pch=16, col=colvec.Place_1[FJL.map$Place_1])
legend('topleft', c("cabinet","counter","cutting_board","dish_drying_pan","disposal","door","drawer","faucet","freezer","garbage_can","microwave","oven","pillowcase","refrigerator","sink","soap_dispenser","sponge","stall","stove","television","toilet","wall","water"), cex=0.5, pt.cex=1, text.width=0.2, y.intersp=0.9, col=colvec.Place_1, pch=16)
title(main="Jaccard NMDS colored by Place_1")

plot of chunk plotNMDSJaccard

plot(FJL.jac.nmds, pch=16, col=colvec.Geoloc[FJL.map$Geolocation])
legend('topleft', c("Boulder_CO_USA","RaleighDurham_NC_USA","Seoul_Korea"),cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9, col=colvec.Geoloc, pch=16)
title(main='Jaccard NMDS colored by Geolocation')

plot of chunk plotNMDSJaccard

plot(FJL.jac.nmds, pch=16, col=colvec.pID[FJL.map$phinchID])
legend('topleft',c("Flores_Kitchen","Flores_restroom","Jeon_korea","Leff_homes"), cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9,col=colvec.pID, pch=16)
title(main='Jaccard NMDS colored by study (phinchID)')

plot of chunk plotNMDSJaccard

plot(FJL.jac.nmds,pch=16,col=colvec.RF[FJL.map$Room_Function])
legend('topleft',c("home","kitchen","restroom"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.RF,pch=16)
title(main='Jaccard NMDS colored by Room_Function')

plot of chunk plotNMDSJaccard

plot(FJL.jac.nmds,pch=16,col=colvec.TR[FJL.map$Target_Region])
legend('topleft',c("V1_V2","V1_V3","V4"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.TR,pch=16)
title(main='Jaccard NMDS colored by Target_Region')

plot of chunk plotNMDSJaccard

plot(FJL.jac.nmds,pch=16,col=colvec.ST[FJL.map$Sequencing_Technology])
legend('topleft',c("454_GS_Junior","Illumina_HiSeq","Illumina_HiSeq2000","Illumina_MiSeq"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.ST,pch=16)
title(main='Jaccard NMDS colored by Sequencing_Technology')

plot of chunk plotNMDSJaccard

plot(FJL.jac.nmds,pch=16,col=colvec.BT2[FJL.map$Building_Type2])
legend('topleft',c("condo_town","family_home","low_rise_apt","university_building"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.BT2,pch=16)
title(main='Jaccard NMDS colored by Building_Type2')

plot of chunk plotNMDSJaccard

Make distance matrices between locations of interest, while removing upper triangle for self-self comparisons.

restroom.restroom.jac.dist <- as.dist(FJL.jac.mat[restroom, restroom])
jac.RR.tri <- restroom.restroom.jac.dist[upper.tri(restroom.restroom.jac.dist)]
restroom.restroom.jac.dist <- jac.RR.tri[-which(is.na(jac.RR.tri))]

kitchen.kitchen.jac.dist <- as.dist(FJL.jac.mat[kitchen, kitchen])
jac.KK.tri <- kitchen.kitchen.jac.dist[upper.tri(kitchen.kitchen.jac.dist)]
kitchen.kitchen.jac.dist <- jac.KK.tri[-which(is.na(jac.KK.tri))]

toilet.toilet.jac.dist <- as.dist(FJL.jac.mat[toilet,toilet])
jac.TT.tri <- toilet.toilet.jac.dist[upper.tri(toilet.toilet.jac.dist)]
toilet.toilet.jac.dist <- jac.TT.tri[-which(is.na(jac.TT.tri))]

fridge.fridge.jac.dist <- as.dist(FJL.jac.mat[fridge,fridge])
jac.FF.tri <- fridge.fridge.jac.dist[upper.tri(fridge.fridge.jac.dist)]
fridge.fridge.jac.dist <- jac.FF.tri[-which(is.na(jac.FF.tri))]

restroom.kitchen.jac.dist <- as.dist(FJL.jac.mat[restroom, kitchen])

## Warning: non-square matrix

toilet.fridge.jac.dist <- as.dist(FJL.jac.mat[toilet, fridge])

## Warning: non-square matrix

Make vector of distances for the comparisons you want to make.

mat13 <- as.vector(restroom.restroom.jac.dist)
mat14 <- as.vector(kitchen.kitchen.jac.dist)
mat15 <- as.vector(toilet.toilet.jac.dist)
mat16 <- as.vector(fridge.fridge.jac.dist)
mat17 <- as.vector(restroom.kitchen.jac.dist)
mat18 <- as.vector(toilet.fridge.jac.dist)

Run t-tests on, and make boxplot, of distance comparisons.

Restroom/Restroom v. Restroom/Kitchen

t.test(mat13,mat17)

## 
##  Welch Two Sample t-test
## 
## data:  mat13 and mat17
## t = -33.98, df = 2998, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.04784 -0.04262
## sample estimates:
## mean of x mean of y 
##    0.9427    0.9880

boxplot(mat13,mat17, main="Restroom/Restroom v. Restroom/Kitchen",names=c('Restroom/Restroom','Restroom/Kitchen'),ylab='Jaccard Distance')

plot of chunk testDistsJac1

Kitchen/Kitchen v. Restroom/Kitchen

t.test(mat14,mat17)

## 
##  Welch Two Sample t-test
## 
## data:  mat14 and mat17
## t = -139.8, df = 23386, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.08032 -0.07810
## sample estimates:
## mean of x mean of y 
##    0.9088    0.9880

boxplot(mat14,mat17, main="Kitchen/Kitchen v. Restroom/Kitchen",names=c('Kitchen/Kitchen','Restroom/Kitchen'),ylab='Jaccard Distance')

plot of chunk testDistsJac2

Finding the taxa that are abundant in kitchens v. restrooms

sortKitTax <- sort(apply(kitchen.table, 2, sum)/sum(kitchen.table), decreasing=TRUE)[1:25]
kitAbun25 <- FJL.taxa[names(sortKitTax), ]
kitAbun25$abundance <- sortKitTax
# kitAbun25

kitAbun25$taxa.names <- gsub('\\;\\ s\\_\\_$', '', kitAbun25$taxa.names)
kitAbun25$taxa.names <- gsub('\\;\\ g\\_\\_$', '', kitAbun25$taxa.names)
kitAbun25$taxa.names <- gsub('\\;\\ f\\_\\_$', '', kitAbun25$taxa.names)
kitAbun25$taxa.names <- gsub('\\;\\ s\\_\\_', ' ', kitAbun25$taxa.names)

# do this 5 times~ what is this doing???
# kitAbun25$taxa.names <- gsub('^[[:alnum:]]{1,}\\;\\ \\w{1}\\_\\_', '', kitAbun25$taxa.names)
# kitAbun25$taxa.names <- gsub('^\\[[[:alnum:]]{1,}\\]\\;\\ \\w{1}\\_\\_', '', kitAbun25$taxa.names)


kitAbun25.mat <- as.matrix(kitAbun25)
par(las=1, mar=c(5, 25, 1, 1))
barplot(rev(kitAbun25$abundance), horiz=TRUE, names.arg=rev(kitAbun25$taxa.names), cex.names=0.5, main='All Kitchens: Top 25 Taxa')

plot of chunk findtop25OTU

sortRestTax <- sort(apply(restroom.table, 2, sum)/sum(restroom.table), decreasing=TRUE)[1:25]
RestAbun25 <- FJL.taxa[names(sortRestTax), ]
RestAbun25$abundance <- sortRestTax
# RestAbun25

RestAbun25$taxa.names <- gsub('\\;\\ s\\_\\_$', '', RestAbun25$taxa.names)
RestAbun25$taxa.names <- gsub('\\;\\ g\\_\\_$', '', RestAbun25$taxa.names)
RestAbun25$taxa.names <- gsub('\\;\\ f\\_\\_$', '', RestAbun25$taxa.names)
RestAbun25$taxa.names <- gsub('\\;\\ s\\_\\_', ' ', RestAbun25$taxa.names)

RestAbun25.mat <- as.matrix(RestAbun25)
par(las=1, mar=c(5, 25, 1, 1))
barplot(rev(RestAbun25$abundance), horiz=TRUE, names.arg=rev(RestAbun25$taxa.names), cex.names=0.5, main='All Restrooms: Top 25 Taxa')

plot of chunk findtop25OTU

Looking at subsetted taxa from each study: Kitchens

kitchen.table[1:5,1:5]

##              4479944 741867 11544 623634 1126569
## H13Cb.729458       0      0     0      0       0
## H13Fr.729158       0      0     0      0       0
## H13Kc.729422       0      0     0      0       0
## H14Cb.729167       0      0     0      0       0
## H14Fr.729334       0      0     0      0       0

restroom.table[1:5,1:5]

##           4479944 741867 11544 623634 1126569
## B1.489537       0      0     0      0       0
## B2.489526       0      0     0      0       0
## B3.489528       0      0     0      0       0
## B5.489455       0      0     0      0       0
## B6.489449       0      0     0      0       0

leff.kitchen <- which(FJL.map$phinchID == 'Leff_homes' & FJL.map$Room_Function == 'kitchen')
leff.kitchen.table <- FJL.table[leff.kitchen, ]

flores.kitchen <- which(FJL.map$phinchID == 'Flores_Kitchen' & FJL.map$Room_Function == 'kitchen')
flores.kitchen.table <- FJL.table[flores.kitchen, ]

jeon.kitchen <- which(FJL.map$phinchID == 'Jeon_korea' & FJL.map$Room_Function == 'kitchen')
jeon.kitchen.table <- FJL.table[jeon.kitchen, ]


sortKitTax.leff <- sort(apply(leff.kitchen.table, 2, sum)/sum(leff.kitchen.table), decreasing=TRUE)[1:25]
sortKitTax.flores <- sort(apply(flores.kitchen.table, 2, sum)/sum(flores.kitchen.table), decreasing=TRUE)[1:25]
sortKitTax.jeon <- sort(apply(jeon.kitchen.table, 2, sum)/sum(jeon.kitchen.table), decreasing=TRUE)[1:25]

kitAbun25.leff <- FJL.taxa[names(sortKitTax.leff), ]
kitAbun25.leff$abundance <- sortKitTax.leff
# kitAbun25.leff

kitAbun25.flores <- FJL.taxa[names(sortKitTax.flores), ]
kitAbun25.flores$abundance <- sortKitTax.flores
# kitAbun25.flores

kitAbun25.jeon <- FJL.taxa[names(sortKitTax.jeon), ]
kitAbun25.jeon$abundance <- sortKitTax.jeon
# kitAbun25.jeon

kitAbun25.leff$taxa.names <- gsub('\\;\\ s\\_\\_$', '', kitAbun25.leff$taxa.names)
kitAbun25.leff$taxa.names <- gsub('\\;\\ g\\_\\_$', '', kitAbun25.leff$taxa.names)
kitAbun25.leff$taxa.names <- gsub('\\;\\ f\\_\\_$', '', kitAbun25.leff$taxa.names)
kitAbun25.leff$taxa.names <- gsub('\\;\\ s\\_\\_', ' ', kitAbun25.leff$taxa.names)

kitAbun25.flores$taxa.names <- gsub('\\;\\ s\\_\\_$', '', kitAbun25.flores$taxa.names)
kitAbun25.flores$taxa.names <- gsub('\\;\\ g\\_\\_$', '', kitAbun25.flores$taxa.names)
kitAbun25.flores$taxa.names <- gsub('\\;\\ f\\_\\_$', '', kitAbun25.flores$taxa.names)
kitAbun25.flores$taxa.names <- gsub('\\;\\ s\\_\\_', ' ', kitAbun25.flores$taxa.names)

kitAbun25.jeon$taxa.names <- gsub('\\;\\ s\\_\\_$', '', kitAbun25.jeon$taxa.names)
kitAbun25.jeon$taxa.names <- gsub('\\;\\ g\\_\\_$', '', kitAbun25.jeon$taxa.names)
kitAbun25.jeon$taxa.names <- gsub('\\;\\ f\\_\\_$', '', kitAbun25.jeon$taxa.names)
kitAbun25.jeon$taxa.names <- gsub('\\;\\ s\\_\\_', ' ', kitAbun25.jeon$taxa.names)

kitAbun25.leff.mat <- as.matrix(kitAbun25.leff)
par(las=1, mar=c(5, 25, 1, 1))
barplot(rev(kitAbun25.leff$abundance), horiz=TRUE, names.arg=rev(kitAbun25.leff$taxa.names), cex.names=0.5, main='Leff Kitchens: Top 25 Taxa')

plot of chunk SubsetKitchens

kitAbun25.flores.mat <- as.matrix(kitAbun25.flores)
par(las=1, mar=c(5, 25, 1, 1))
barplot(rev(kitAbun25.flores$abundance), horiz=TRUE, names.arg=rev(kitAbun25.flores$taxa.names), cex.names=0.5, main='Flores Kitchens: Top 25 Taxa')

plot of chunk SubsetKitchens

kitAbun25.jeon.mat <- as.matrix(kitAbun25.jeon)
par(las=1, mar=c(5, 25, 1, 1))
barplot(rev(kitAbun25.jeon$abundance), horiz=TRUE, names.arg=rev(kitAbun25.jeon$taxa.names), cex.names=0.5, main='Jeon Kitchens: Top 25 Taxa')

plot of chunk SubsetKitchens

Looking at subsetted taxa from each study: Restrooms

kitchen.table[1:5,1:5]

##              4479944 741867 11544 623634 1126569
## H13Cb.729458       0      0     0      0       0
## H13Fr.729158       0      0     0      0       0
## H13Kc.729422       0      0     0      0       0
## H14Cb.729167       0      0     0      0       0
## H14Fr.729334       0      0     0      0       0

restroom.table[1:5,1:5]

##           4479944 741867 11544 623634 1126569
## B1.489537       0      0     0      0       0
## B2.489526       0      0     0      0       0
## B3.489528       0      0     0      0       0
## B5.489455       0      0     0      0       0
## B6.489449       0      0     0      0       0

leff.restroom <- which(FJL.map$phinchID == 'Leff_homes' & FJL.map$Room_Function == 'restroom')
leff.restroom.table <- FJL.table[leff.restroom, ]

flores.restroom <- which(FJL.map$phinchID == 'Flores_restroom' & FJL.map$Room_Function == 'restroom')
flores.restroom.table <- FJL.table[flores.restroom, ]

jeon.restroom <- which(FJL.map$phinchID == 'Jeon_korea' & FJL.map$Room_Function == 'restroom')
jeon.restroom.table <- FJL.table[jeon.kitchen, ]


sortRestTax.leff <- sort(apply(leff.restroom.table, 2, sum)/sum(leff.restroom.table), decreasing=TRUE)[1:25]
sortRestTax.flores <- sort(apply(flores.restroom.table, 2, sum)/sum(flores.restroom.table), decreasing=TRUE)[1:25]
sortRestTax.jeon <- sort(apply(jeon.restroom.table, 2, sum)/sum(jeon.restroom.table), decreasing=TRUE)[1:25]

RestAbun25.leff <- FJL.taxa[names(sortRestTax.leff), ]
RestAbun25.leff$abundance <- sortRestTax.leff
# RestAbun25.leff

RestAbun25.flores <- FJL.taxa[names(sortRestTax.flores), ]
RestAbun25.flores$abundance <- sortRestTax.flores
# RestAbun25.flores

RestAbun25.jeon <- FJL.taxa[names(sortRestTax.jeon), ]
RestAbun25.jeon$abundance <- sortRestTax.jeon
# RestAbun25.jeon

RestAbun25.leff$taxa.names <- gsub('\\;\\ s\\_\\_$', '', RestAbun25.leff$taxa.names)
RestAbun25.leff$taxa.names <- gsub('\\;\\ g\\_\\_$', '', RestAbun25.leff$taxa.names)
RestAbun25.leff$taxa.names <- gsub('\\;\\ f\\_\\_$', '', RestAbun25.leff$taxa.names)
RestAbun25.leff$taxa.names <- gsub('\\;\\ s\\_\\_', ' ', RestAbun25.leff$taxa.names)

RestAbun25.flores$taxa.names <- gsub('\\;\\ s\\_\\_$', '', RestAbun25.flores$taxa.names)
RestAbun25.flores$taxa.names <- gsub('\\;\\ g\\_\\_$', '', RestAbun25.flores$taxa.names)
RestAbun25.flores$taxa.names <- gsub('\\;\\ f\\_\\_$', '', RestAbun25.flores$taxa.names)
RestAbun25.flores$taxa.names <- gsub('\\;\\ s\\_\\_', ' ', RestAbun25.flores$taxa.names)

RestAbun25.jeon$taxa.names <- gsub('\\;\\ s\\_\\_$', '', RestAbun25.jeon$taxa.names)
RestAbun25.jeon$taxa.names <- gsub('\\;\\ g\\_\\_$', '', RestAbun25.jeon$taxa.names)
RestAbun25.jeon$taxa.names <- gsub('\\;\\ f\\_\\_$', '', RestAbun25.jeon$taxa.names)
RestAbun25.jeon$taxa.names <- gsub('\\;\\ s\\_\\_', ' ', RestAbun25.jeon$taxa.names)

RestAbun25.leff.mat <- as.matrix(RestAbun25.leff)
par(las=1, mar=c(5, 25, 1, 1))
barplot(rev(RestAbun25.leff$abundance), horiz=TRUE, names.arg=rev(RestAbun25.leff$taxa.names), cex.names=0.5, main='Leff Restrooms: Top 25 Taxa')

RestAbun25.flores.mat <- as.matrix(RestAbun25.flores)
par(las=1, mar=c(5, 25, 1, 1))
barplot(rev(RestAbun25.flores$abundance), horiz=TRUE, names.arg=rev(RestAbun25.flores$taxa.names), cex.names=0.5, main='Flores Restrooms: Top 25 Taxa')

plot of chunk SubsetRestrooms

RestAbun25.jeon.mat <- as.matrix(RestAbun25.jeon)
par(las=1, mar=c(5, 25, 1, 1))
barplot(rev(RestAbun25.jeon$abundance), horiz=TRUE, names.arg=rev(RestAbun25.jeon$taxa.names), cex.names=0.5, main='Jeon Restrooms: Top 25 Taxa')

plot of chunk SubsetRestrooms

Re-do analysis with chloroplasts removed

Read in OTU table, mapping file, and unifrac distance matrix generated with Qiime.

Read in OTU table using custom function.

FJL.list.noC <- QiimeIn('data/Ftable_noC.from_biom_w_taxonomy.txt')

Same mapping file as before.

Read in new unweighted unifrac distance matrix, generated with Qiime script. Preview table, and convert to matrix.

unifrac.dist.noC <- read.table('data/unweighted_unifrac_dm_noC.txt', header=T)
unifrac.dist.noC[1:5,1:5]

##              KiB54.630662 KiC69.630743 KiB70.630661 KiB40.630629
## KiB54.630662       0.0000       0.7014       0.6787       0.6570
## KiC69.630743       0.7014       0.0000       0.6618       0.7145
## KiB70.630661       0.6787       0.6618       0.0000       0.6510
## KiB40.630629       0.6570       0.7145       0.6510       0.0000
## KiB36.630609       0.6941       0.6806       0.6743       0.6984
##              KiB36.630609
## KiB54.630662       0.6941
## KiC69.630743       0.6806
## KiB70.630661       0.6743
## KiB40.630629       0.6984
## KiB36.630609       0.0000

unifrac.dist.noC.mat <- as.matrix(unifrac.dist.noC)

Checkpoints to make sure otu table looks correct and everything is lined up.

names(FJL.list.noC)
FJL.list.noC$Preview
FJL.list.noC$Dimensions
FJL.list.noC$Samples
FJL.list.noC$Taxa

Match up/Reorder OTU table, map, and unifrac distance matrix

Extract the OTU table and taxon table into two different objects.

FJL.table.noC <- FJL.list.noC$Table
FJL.taxa.noC <- FJL.list.noC$Taxa

Make sure that the OTU table and map line up. If they don’t, reorder using the next section of code.

identical(row.names(FJL.map), row.names(FJL.table.noC)) #False
cbind(row.names(FJL.map), row.names(FJL.table.noC))     #Not matching
all(row.names(FJL.map) %in% row.names(FJL.table.noC))   #True
all(row.names(FJL.table.noC) %in% row.names(FJL.map))   #True

Reorder both the map and the OTU table so that they have the same rows and columns.

FJL.map <- FJL.map[order(row.names(FJL.map)), ]
FJL.table.noC <- FJL.table.noC[row.names(FJL.map), ]
# an alternative way of doing it (without the two linked together):
# FJL.map <- FJL.map[order(row.names(FJL.map)), ]
# FJL.table <- FJL.table[order(row.names(FJL.table)), ]

Reorder the Unifrac distance matrix so that it has the same rows and columns as the map.

unifrac.dist.noC.mat <- unifrac.dist.noC.mat[row.names(FJL.map), row.names(FJL.map)]

Make groups of interest, to make calling them easier later. (Same as before)

How many OTUs are in my OTU table?

dim(FJL.table.noC) # 17219 OTUs

## [1]   758 17219

How many singletons are in my OTU table?

# abundance curve
plot(rev(sort(colSums(FJL.table.noC))))

plot of chunk plotSingletons.noC

length(which(colSums(FJL.table.noC) == 1))/ncol(FJL.table.noC) # 31% are singletons

## [1] 0.3141

Canberra

Make a Canberra distance matrix.

FJL.can.noC <- vegdist(FJL.table.noC, 'canberra')
FJL.can.noC.mat <- as.matrix(FJL.can.noC)

Make an ordination with the canberra distance matrix.

FJL.can.nmds.noC <- nmds(FJL.can.noC)

## initial  value 36.132489 
## iter   5 value 24.283354
## iter  10 value 23.398636
## final  value 23.222887 
## converged

Plot ordination, colored by different variables.

levels(FJL.map$Place_1)

##  [1] "cabinet"         "counter"         "cutting_board"  
##  [4] "dish_drying_pan" "disposal"        "door"           
##  [7] "drawer"          "faucet"          "freezer"        
## [10] "garbage can"     "garbage_can"     "left_sink"      
## [13] "microwave"       "oven"            "pillowcase"     
## [16] "refrigerator"    "right_sink"      "sink"           
## [19] "soap_dispenser"  "sponge"          "stall"          
## [22] "stove"           "television"      "toilet"         
## [25] "wall"            "water"

colvec.Place_1=c("green4","darkorange","darkolivegreen","darkgoldenrod1","cyan2","cornflowerblue","coral1","chocolate3","chartreuse","blueviolet","aquamarine","darkslateblue","cornsilk1","firebrick","indianred1","lightblue1","darkorchid3","darkgoldenrod4","darkcyan","chartreuse4","greenyellow","brown1","deepskyblue2","mediumspringgreen","lightslateblue","lightsalmon")
plot(FJL.can.nmds.noC, pch=16, col=colvec.Place_1[FJL.map$Place_1])
legend('topleft', c("cabinet","counter","cutting_board","dish_drying_pan","disposal","door","drawer","faucet","freezer","garbage_can","microwave","oven","pillowcase","refrigerator","sink","soap_dispenser","sponge","stall","stove","television","toilet","wall","water"), cex=0.5, pt.cex=1, text.width=0.2, y.intersp=0.9, col=colvec.Place_1, pch=16)
title(main="Canberra NMDS colored by Place_1 (no chloroplasts)")

plot of chunk plotNMDScan.noC

levels(FJL.map$Geolocation)

## [1] "Boulder_CO_USA"       "RaleighDurham_NC_USA" "Seoul_Korea"

colvec.Geoloc=c("darkorchid","darkorange","cyan2")
plot(FJL.can.nmds.noC, pch=16, col = colvec.Geoloc[FJL.map$Geolocation])
legend('topleft', c("Boulder_CO_USA","RaleighDurham_NC_USA","Seoul_Korea"),cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9, col=colvec.Geoloc, pch=16)
title(main='Canberra NMDS colored by Geolocation (no chloroplasts)')

plot of chunk plotNMDScan.noC

levels(FJL.map$phinchID)

## [1] "Flores_Kitchen"  "Flores_restroom" "Jeon_korea"      "Leff_homes"

colvec.pID=c("aquamarine","chartreuse4","firebrick","darkgoldenrod1")
plot(FJL.can.nmds.noC, pch=16, col=colvec.pID[FJL.map$phinchID])
legend('topleft',c("Flores_Kitchen","Flores_restroom","Jeon_korea","Leff_homes"), cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9,col=colvec.pID, pch=16)
title(main='Canberra NMDS colored by study (phinchID) (no chloroplasts)')

plot of chunk plotNMDScan.noC

levels(FJL.map$Room_Function)

## [1] "home"     "kitchen"  "restroom"

colvec.RF=c("darkorchid","cornflowerblue","coral1")
plot(FJL.can.nmds.noC,pch=16,col=colvec.RF[FJL.map$Room_Function])
legend('topleft',c("home","kitchen","restroom"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.RF,pch=16)
title(main='Canberra NMDS colored by Room_Function (no chloroplasts)')

plot of chunk plotNMDScan.noC

levels(FJL.map$Target_Region)

## [1] "V1_V2" "V1_V3" "V4"

colvec.TR=c("deeppink","chartreuse","goldenrod")
plot(FJL.can.nmds.noC,pch=16,col=colvec.TR[FJL.map$Target_Region])
legend('topleft',c("V1_V2","V1_V3","V4"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.TR,pch=16)
title(main='Canberra NMDS colored by Target_Region (no chloroplasts)')

plot of chunk plotNMDScan.noC

levels(FJL.map$Sequencing_Technology)

## [1] "454_GS_Junior"      "Illumina_HiSeq"     "Illumina_HiSeq2000"
## [4] "Illumina_MiSeq"

colvec.ST=c("coral1","cornflowerblue","chartreuse","darkorchid")
plot(FJL.can.nmds.noC,pch=16,col=colvec.ST[FJL.map$Sequencing_Technology])
legend('topleft',c("454_GS_Junior","Illumina_HiSeq","Illumina_HiSeq2000","Illumina_MiSeq"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.ST,pch=16)
title(main='Canberra NMDS colored by Sequencing_Technology (no chloroplasts)')

plot of chunk plotNMDScan.noC

levels(FJL.map$Building_Type2)

## [1] "condo_town"          "family_home"         "low_rise_apt"       
## [4] "university_building"

colvec.BT2=c("darkorchid","aquamarine","deeppink","darkgoldenrod2")
plot(FJL.can.nmds.noC,pch=16,col=colvec.BT2[FJL.map$Building_Type2])
legend('topleft',c("condo_town","family_home","low_rise_apt","university_building"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.BT2,pch=16)
title(main='Canberra NMDS colored by Building_Type2 (no chloroplasts)')

plot of chunk plotNMDScan.noC

Could Sequencing technologies (Miseq v. HiSeq/HiSeq2000 be driving that split in the Leff homes data, resulting in the clusters that we see?)

Make distance matrices between locations of interest, while removing upper triangle for self-self comparisons.

restroom.restroom.can.dist.noC <- as.dist(FJL.can.noC.mat[restroom, restroom])
can.RR.tri.noC <- restroom.restroom.can.dist.noC[upper.tri(restroom.restroom.can.dist.noC)]
restroom.restroom.can.dist.noC <- can.RR.tri.noC[-which(is.na(can.RR.tri.noC))]

kitchen.kitchen.can.dist.noC <- as.dist(FJL.can.noC.mat[kitchen, kitchen])
can.KK.tri.noC <- kitchen.kitchen.can.dist.noC[upper.tri(kitchen.kitchen.can.dist.noC)]
kitchen.kitchen.can.dist.noC <- can.KK.tri.noC[-which(is.na(can.KK.tri.noC))]

toilet.toilet.can.dist.noC <- as.dist(FJL.can.noC.mat[toilet,toilet])
can.TT.tri.noC <- toilet.toilet.can.dist.noC[upper.tri(toilet.toilet.can.dist.noC)]
toilet.toilet.can.dist.noC <- can.TT.tri.noC[-which(is.na(can.TT.tri.noC))]

fridge.fridge.can.dist.noC <- as.dist(FJL.can.noC.mat[fridge,fridge])
can.FF.tri.noC <- fridge.fridge.can.dist.noC[upper.tri(fridge.fridge.can.dist.noC)]
fridge.fridge.can.dist.noC <- can.FF.tri.noC[-which(is.na(can.FF.tri.noC))]

restroom.kitchen.can.dist.noC <- as.dist(FJL.can.noC.mat[restroom, kitchen])

## Warning: non-square matrix

toilet.fridge.can.dist.noC <- as.dist(FJL.can.noC.mat[toilet, fridge])

## Warning: non-square matrix

Make vector of distances for the comparisons you want to make.

mat1.noC <- as.vector(restroom.restroom.can.dist.noC)
mat2.noC <- as.vector(kitchen.kitchen.can.dist.noC)
mat3.noC <- as.vector(toilet.toilet.can.dist.noC)
mat4.noC <- as.vector(fridge.fridge.can.dist.noC)
mat5.noC <- as.vector(restroom.kitchen.can.dist.noC)
mat6.noC <- as.vector(toilet.fridge.can.dist.noC)

Run t-tests on, and make boxplot, of distance comparisons.

Restroom/Restroom v. Restroom/Kitchen

t.test(mat1.noC,mat5.noC)

## 
##  Welch Two Sample t-test
## 
## data:  mat1.noC and mat5.noC
## t = -35.11, df = 3089, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.02754 -0.02462
## sample estimates:
## mean of x mean of y 
##    0.9624    0.9885

boxplot(mat1.noC,mat5.noC, main="Restroom/Restroom v. Restroom/Kitchen (no chloroplasts)",names=c("Restroom/Restroom","Restroom/Kitchen"), ylab="Canberra Distance")

plot of chunk testDistsCan1.noC

Kitchen/Kitchen v. Restroom/Kitchen

t.test(mat2.noC,mat5.noC)

## 
##  Welch Two Sample t-test
## 
## data:  mat2.noC and mat5.noC
## t = -160.5, df = 29472, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.04281 -0.04178
## sample estimates:
## mean of x mean of y 
##    0.9462    0.9885

boxplot(mat2.noC,mat5.noC, main="Kitchen/Kitchen v. Restroom/Kitchen (no chloroplasts)",names=c("Kitchen/Kitchen","Restroom/Kitchen"), ylab="Canberra Distance")

plot of chunk testDistsCan2.noC

Unweighted Unifrac

The new unweighted unifrac distance matrix was already imported, and re-ordered to match the map file.

Make an ordination with the unweighted unifrac distance matrix.

FJL.uni.nmds.noC <- nmds(unifrac.dist.noC.mat)

## initial  value 32.053727 
## iter   5 value 17.618373
## iter  10 value 16.819742
## iter  10 value 16.813545
## iter  10 value 16.812329
## final  value 16.812329 
## converged

Plot ordination, colored by different variables.

plot(FJL.uni.nmds.noC, pch=16, col=colvec.Place_1[FJL.map$Place_1])
legend('topleft', c("cabinet","counter","cutting_board","dish_drying_pan","disposal","door","drawer","faucet","freezer","garbage_can","microwave","oven","pillowcase","refrigerator","sink","soap_dispenser","sponge","stall","stove","television","toilet","wall","water"), cex=0.5, pt.cex=1, text.width=0.2, y.intersp=0.9, col=colvec.Place_1, pch=16)
title(main="Unweighted Unifrac NMDS colored by Place_1 (no chloroplasts)")

plot of chunk plotNMDSuni.noC

plot(FJL.uni.nmds.noC, pch=16, col=colvec.Geoloc[FJL.map$Geolocation])
legend('topleft', c("Boulder_CO_USA","RaleighDurham_NC_USA","Seoul_Korea"),cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9, col=colvec.Geoloc, pch=16)
title(main='Unweighted Unifrac NMDS colored by Geolocation (no chloroplasts)')

plot(FJL.uni.nmds.noC, pch=16, col=colvec.pID[FJL.map$phinchID])
legend('topleft',c("Flores_Kitchen","Flores_restroom","Jeon_korea","Leff_homes"), cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9,col=colvec.pID, pch=16)
title(main='Unweighted Unifrac NMDS colored by study (phinchID) (no chloroplasts')

plot of chunk plotNMDSuni.noC

plot(FJL.uni.nmds.noC,pch=16,col=colvec.RF[FJL.map$Room_Function])
legend('topleft',c("home","kitchen","restroom"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.RF,pch=16)
title(main='Unweighted Unifrac NMDS colored by Room_Function (no chloroplasts)')

plot(FJL.uni.nmds.noC,pch=16,col=colvec.TR[FJL.map$Target_Region])
legend('topleft',c("V1_V2","V1_V3","V4"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.TR,pch=16)
title(main='Unweighted Unifrac NMDS colored by Target_Region (no chloroplasts)')

plot of chunk plotNMDSuni.noC

plot(FJL.uni.nmds.noC,pch=16,col=colvec.ST[FJL.map$Sequencing_Technology])
legend('topleft',c("454_GS_Junior","Illumina_HiSeq","Illumina_HiSeq2000","Illumina_MiSeq"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.ST,pch=16)
title(main='Unweighted Unifrac NMDS colored by Sequencing_Technology (no chloroplasts)')

plot of chunk plotNMDSuni.noC

plot(FJL.uni.nmds.noC,pch=16,col=colvec.BT2[FJL.map$Building_Type2])
legend('topleft',c("condo_town","family_home","low_rise_apt","university_building"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.BT2,pch=16)
title(main='Unweighted Unifrac colored by Building_Type2 (no chloroplasts)')

plot of chunk plotNMDSuni.noC

Make distance matrices between locations of interest, while removing upper triangle for self-self comparisons.

restroom.restroom.uni.dist.noC <- as.dist(unifrac.dist.noC.mat[restroom, restroom])
uni.RR.tri.noC <- restroom.restroom.uni.dist.noC[upper.tri(restroom.restroom.uni.dist.noC)]
restroom.restroom.uni.dist.noC <- uni.RR.tri.noC[-which(is.na(uni.RR.tri.noC))]

kitchen.kitchen.uni.dist.noC <- as.dist(unifrac.dist.noC.mat[kitchen, kitchen])
uni.KK.tri.noC <- kitchen.kitchen.uni.dist.noC[upper.tri(kitchen.kitchen.uni.dist.noC)]
kitchen.kitchen.uni.dist.noC <- uni.KK.tri.noC[-which(is.na(uni.KK.tri.noC))]

toilet.toilet.uni.dist.noC <- as.dist(unifrac.dist.noC.mat[toilet,toilet])
uni.TT.tri.noC <- toilet.toilet.uni.dist.noC[upper.tri(toilet.toilet.uni.dist.noC)]
toilet.toilet.uni.dist.noC <- uni.TT.tri.noC[-which(is.na(uni.TT.tri.noC))]

fridge.fridge.uni.dist.noC <- as.dist(unifrac.dist.noC.mat[fridge,fridge])
uni.FF.tri.noC <- fridge.fridge.uni.dist.noC[upper.tri(fridge.fridge.uni.dist.noC)]
fridge.fridge.uni.dist.noC <- uni.FF.tri.noC[-which(is.na(uni.FF.tri.noC))]

restroom.kitchen.uni.dist.noC <- as.dist(unifrac.dist.noC.mat[restroom, kitchen])

## Warning: non-square matrix

toilet.fridge.uni.dist.noC <- as.dist(unifrac.dist.noC.mat[toilet, fridge])

## Warning: non-square matrix

Make vector of distances for the comparisons you want to make.

mat7.noC <- as.vector(restroom.restroom.uni.dist.noC)
mat8.noC <- as.vector(kitchen.kitchen.uni.dist.noC)
mat9.noC <- as.vector(toilet.toilet.uni.dist.noC)
mat10.noC <- as.vector(fridge.fridge.uni.dist.noC)
mat11.noC <- as.vector(restroom.kitchen.uni.dist.noC)
mat12.noC <- as.vector(toilet.fridge.uni.dist.noC)

Run t-tests on, and make boxplot, of distance comparisons.

Restroom/Restroom v. Restroom/Kitchen

t.test(mat7.noC,mat11.noC)

## 
##  Welch Two Sample t-test
## 
## data:  mat7.noC and mat11.noC
## t = -28.76, df = 3554, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.05216 -0.04550
## sample estimates:
## mean of x mean of y 
##    0.7409    0.7897

boxplot(mat7.noC,mat11.noC, main= "Restroom/Restroom v. Restroom/Kitchen (no chloroplasts)",names=c("Restroom/Restroom","Restroom/Kitchen"),ylab='Unweighted Unifrac Distance')

plot of chunk testDistsUni1.noC

Kitchen/Kitchen v. Restroom/Kitchen

t.test(mat8.noC,mat11.noC)

## 
##  Welch Two Sample t-test
## 
## data:  mat8.noC and mat11.noC
## t = -91.15, df = 29923, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.06802 -0.06516
## sample estimates:
## mean of x mean of y 
##    0.7231    0.7897

boxplot(mat8.noC,mat11.noC, main="Kitchen/Kitchen v. Restroom/Kitchen (no chloroplasts)",names=c('Kitchen/Kitchen','Restroom/Kitchen'),ylab='Unweighted Unifrac Distance')

plot of chunk testDistsUni2.noC

Skip Jaccard for now.

Finding the taxa that are abundant in kitchens v. restrooms

Make groups of interest, to make calling them easier later.

restroom.table.noC <- FJL.table.noC[restroom, ]
kitchen.table.noC <- FJL.table.noC[kitchen, ]

sortKitTax.noC <- sort(apply(kitchen.table.noC, 2, sum)/sum(kitchen.table.noC), decreasing=TRUE)[1:25]
kitAbun25.noC <- FJL.taxa.noC[names(sortKitTax.noC), ]
kitAbun25.noC$abundance <- sortKitTax.noC
# kitAbun25.noC

kitAbun25.noC$taxa.names <- gsub('\\;\\ s\\_\\_$', '', kitAbun25.noC$taxa.names)
kitAbun25.noC$taxa.names <- gsub('\\;\\ g\\_\\_$', '', kitAbun25.noC$taxa.names)
kitAbun25.noC$taxa.names <- gsub('\\;\\ f\\_\\_$', '', kitAbun25.noC$taxa.names)
kitAbun25.noC$taxa.names <- gsub('\\;\\ s\\_\\_', ' ', kitAbun25.noC$taxa.names)

# do this 5 times~ what is this doing???
# kitAbun25$taxa.names <- gsub('^[[:alnum:]]{1,}\\;\\ \\w{1}\\_\\_', '', kitAbun25$taxa.names)
# kitAbun25$taxa.names <- gsub('^\\[[[:alnum:]]{1,}\\]\\;\\ \\w{1}\\_\\_', '', kitAbun25$taxa.names)


par(las=1, mar=c(5, 25, 1, 1))
barplot(rev(kitAbun25.noC$abundance), horiz=TRUE, names.arg=rev(kitAbun25.noC$taxa.names), cex.names=0.5, main='All Kitchens: Top 25 Taxa (noC)')

plot of chunk findtop25OTU.noC

sortRestTax.noC <- sort(apply(restroom.table.noC, 2, sum)/sum(restroom.table.noC), decreasing=TRUE)[1:25]
RestAbun25.noC <- FJL.taxa.noC[names(sortRestTax.noC), ]
RestAbun25.noC$abundance <- sortRestTax.noC
# RestAbun25.noC

RestAbun25.noC$taxa.names <- gsub('\\;\\ s\\_\\_$', '', RestAbun25.noC$taxa.names)
RestAbun25.noC$taxa.names <- gsub('\\;\\ g\\_\\_$', '', RestAbun25.noC$taxa.names)
RestAbun25.noC$taxa.names <- gsub('\\;\\ f\\_\\_$', '', RestAbun25.noC$taxa.names)
RestAbun25.noC$taxa.names <- gsub('\\;\\ s\\_\\_', ' ', RestAbun25.noC$taxa.names)

par(las=1, mar=c(5, 25, 1, 1))
barplot(rev(RestAbun25.noC$abundance), horiz=TRUE, names.arg=rev(RestAbun25.noC$taxa.names), cex.names=0.5, main='All Restrooms: Top 25 Taxa (noC)')

plot of chunk findtop25OTU.noC

Could continue with the top taxa,by study.

Subsample OTU tables (same # of samples per study)

(could also do the following without chloroplasts)

Create maps and tables of refrigerator and toilet samples, by study.

length(which(FJL.map$Place_1 == 'refrigerator' & FJL.map$phinchID == 'Jeon_korea'))
length(which(FJL.map$Place_1 == 'toilet' & FJL.map$phinchID == 'Jeon_korea'))
length(which(FJL.map$Place_1 == 'refrigerator' & FJL.map$phinchID == 'Flores_Kitchen'))
length(which(FJL.map$Place_1 == 'toilet' & FJL.map$phinchID == 'Flores_Kitchen'))
length(which(FJL.map$Place_1 == 'refrigerator' & FJL.map$phinchID == 'Flores_restroom'))
length(which(FJL.map$Place_1 == 'toilet' & FJL.map$phinchID == 'Flores_restroom'))
length(which(FJL.map$Place_1 == 'refrigerator' & FJL.map$phinchID == 'Leff_homes'))
length(which(FJL.map$Place_1 == 'toilet' & FJL.map$phinchID == 'Leff_homes'))

fridge.Jeon <- which(FJL.map$Place_1 == 'refrigerator' & FJL.map$phinchID == 'Jeon_korea')
toilet.Jeon <- which(FJL.map$Place_1 == 'toilet' & FJL.map$phinchID == 'Jeon_korea')
fridge.Flores <- which(FJL.map$Place_1 == 'refrigerator' & FJL.map$phinchID == 'Flores_Kitchen')
toilet.Flores <- which(FJL.map$Place_1 == 'toilet' & FJL.map$phinchID == 'Flores_restroom')
fridge.Leff <- which(FJL.map$Place_1 == 'refrigerator' & FJL.map$phinchID == 'Leff_homes')
toilet.Leff <- which(FJL.map$Place_1 == 'toilet' & FJL.map$phinchID == 'Leff_homes')

fridge.Jeon.map <- FJL.map[fridge.Jeon, ]
fridge.Jeon.table <- FJL.table[fridge.Jeon, ]

toilet.Jeon.map <- FJL.map[toilet.Jeon, ]
toilet.Jeon.table <- FJL.table[toilet.Jeon, ]

fridge.Flores.map <- FJL.map[fridge.Flores, ]
fridge.Flores.table <- FJL.table[fridge.Flores, ]

toilet.Flores.map <- FJL.map[toilet.Flores, ]
toilet.Flores.table <- FJL.table[toilet.Flores, ]

fridge.Leff.map <- FJL.map[fridge.Leff, ]
fridge.Leff.table <- FJL.table[fridge.Leff, ]

toilet.Leff.map <- FJL.map[toilet.Leff, ]
toilet.Leff.table <- FJL.table[toilet.Leff, ]

# take a random sample of size 10 from a dataset mydata 
# sample without replacement
# mysample <- mydata[sample(1:nrow(mydata), 10,
#      replace=FALSE),]


# I want to subsample 12 from fridge OTU tables: fridge.Flores.table & fridge.Leff.table
# I want to subsample 10 from toilet OTU tables: toilet.Flores.table & toilet.Leff.table

# matching the maps up??????not sure if i need to do this yet

fridge.Flores.table.12 <- fridge.Flores.table[sample(1:nrow(fridge.Flores.table), 12, replace=FALSE),]
fridge.Leff.table.12 <- fridge.Leff.table[sample(1:nrow(fridge.Leff.table), 12, replace=FALSE),]

toilet.Flores.table.10 <- toilet.Flores.table[sample(1:nrow(toilet.Flores.table), 10, replace=FALSE),]
toilet.Leff.table.10 <- toilet.Leff.table[sample(1:nrow(toilet.Leff.table), 10, replace=FALSE),]

all.fridge.toilet.map <- rbind(fridge.Jeon.map, fridge.Flores.map, fridge.Leff.map, toilet.Jeon.map, toilet.Flores.map, toilet.Leff.map)

all.fridge.table.12 <- rbind(fridge.Flores.table.12, fridge.Leff.table.12, fridge.Jeon.table)
all.toilet.table.10 <- rbind(toilet.Flores.table.10, toilet.Leff.table.10, toilet.Jeon.table)
all.fridgetoilet.table.12.10 <- rbind(all.fridge.table.12, all.toilet.table.10)

Make sure that the OTU table and map line up. If they don’t, reorder using the next section of code.

identical(row.names(all.fridge.toilet.map), row.names(all.fridgetoilet.table.12.10)) #False
cbind(row.names(all.fridge.toilet.map), row.names(all.fridgetoilet.table.12.10))     #Not matching

## Warning: number of rows of result is not a multiple of vector length (arg
## 2)

all(row.names(all.fridge.toilet.map) %in% row.names(all.fridgetoilet.table.12.10))   #False
all(row.names(all.fridgetoilet.table.12.10) %in% row.names(all.fridge.toilet.map))   #True

Remove rows from map that aren’t included in the subsetted OTU table.

all.fridge.toilet.map.12.10 <- all.fridge.toilet.map[row.names(all.fridgetoilet.table.12.10),]

identical(row.names(all.fridge.toilet.map.12.10), row.names(all.fridgetoilet.table.12.10)) #True

## [1] TRUE

cbind(row.names(all.fridge.toilet.map.12.10), row.names(all.fridgetoilet.table.12.10))     #Match

##       [,1]              [,2]             
##  [1,] "KiC36.630570"    "KiC36.630570"   
##  [2,] "KiC31.630483"    "KiC31.630483"   
##  [3,] "KiD31.630581"    "KiD31.630581"   
##  [4,] "KiC29.630745"    "KiC29.630745"   
##  [5,] "KiB75.630564"    "KiB75.630564"   
##  [6,] "KiD75.630628"    "KiD75.630628"   
##  [7,] "KiD37.630587"    "KiD37.630587"   
##  [8,] "KiB74.630598"    "KiB74.630598"   
##  [9,] "KiB39.630496"    "KiB39.630496"   
## [10,] "KiC30.630488"    "KiC30.630488"   
## [11,] "KiD35.630583"    "KiD35.630583"   
## [12,] "KiA75.630480"    "KiA75.630480"   
## [13,] "H43Fr.736000"    "H43Fr.736000"   
## [14,] "H47Fr.735880"    "H47Fr.735880"   
## [15,] "H19Fr.735936"    "H19Fr.735936"   
## [16,] "H45Fr.735904"    "H45Fr.735904"   
## [17,] "H27Fr.729274"    "H27Fr.729274"   
## [18,] "H33Fr.729116"    "H33Fr.729116"   
## [19,] "H30Fr.729368"    "H30Fr.729368"   
## [20,] "H48Fr.729208"    "H48Fr.729208"   
## [21,] "H40Fr.729193"    "H40Fr.729193"   
## [22,] "H34Fr.736058"    "H34Fr.736058"   
## [23,] "H56Fr.736041"    "H56Fr.736041"   
## [24,] "H15Fr.729307"    "H15Fr.729307"   
## [25,] "Rcul.1.1078777"  "Rcul.1.1078777" 
## [26,] "Rcul.11.1078797" "Rcul.11.1078797"
## [27,] "Rcul.2.1078799"  "Rcul.2.1078799" 
## [28,] "Rcul.3.1078800"  "Rcul.3.1078800" 
## [29,] "Rcul.5.1078790"  "Rcul.5.1078790" 
## [30,] "Rcul.6.1078792"  "Rcul.6.1078792" 
## [31,] "Rcul.7.1078773"  "Rcul.7.1078773" 
## [32,] "Rcul.8.1078780"  "Rcul.8.1078780" 
## [33,] "Rgen.10.1078794" "Rgen.10.1078794"
## [34,] "Rgen.7.1078779"  "Rgen.7.1078779" 
## [35,] "Rgen.8.1078789"  "Rgen.8.1078789" 
## [36,] "Rgen.9.1078798"  "Rgen.9.1078798" 
## [37,] "EKCF7.489557"    "EKCF7.489557"   
## [38,] "EKAF5.489565"    "EKAF5.489565"   
## [39,] "PTBF5.489479"    "PTBF5.489479"   
## [40,] "PTCF5.489507"    "PTCF5.489507"   
## [41,] "EKAM6.489544"    "EKAM6.489544"   
## [42,] "PTCM5.489525"    "PTCM5.489525"   
## [43,] "EKCF5.489488"    "EKCF5.489488"   
## [44,] "PTCF6.489533"    "PTCF6.489533"   
## [45,] "EKBM5.489472"    "EKBM5.489472"   
## [46,] "EKBM6.489504"    "EKBM6.489504"   
## [47,] "H36Ts.729110"    "H36Ts.729110"   
## [48,] "H43Ts.735928"    "H43Ts.735928"   
## [49,] "H20Ts.729120"    "H20Ts.729120"   
## [50,] "H26Ts.729406"    "H26Ts.729406"   
## [51,] "H13Ts.729194"    "H13Ts.729194"   
## [52,] "H35Ts.735945"    "H35Ts.735945"   
## [53,] "H27Ts.729226"    "H27Ts.729226"   
## [54,] "H29Ts.729131"    "H29Ts.729131"   
## [55,] "H18Ts.736040"    "H18Ts.736040"   
## [56,] "H38Ts.729270"    "H38Ts.729270"   
## [57,] "Tcul.1.1078775"  "Tcul.1.1078775" 
## [58,] "Tcul.2.1078782"  "Tcul.2.1078782" 
## [59,] "Tcul.3.1078785"  "Tcul.3.1078785" 
## [60,] "Tcul.4.1078788"  "Tcul.4.1078788" 
## [61,] "Tcul.5.1078791"  "Tcul.5.1078791" 
## [62,] "Tcul.6.1078781"  "Tcul.6.1078781" 
## [63,] "Tgen.1.1078795"  "Tgen.1.1078795" 
## [64,] "Tgen.10.1078774" "Tgen.10.1078774"
## [65,] "Tgen.12.1078784" "Tgen.12.1078784"
## [66,] "Tgen.13.1078793" "Tgen.13.1078793"

all(row.names(all.fridge.toilet.map.12.10) %in% row.names(all.fridgetoilet.table.12.10))   #True

## [1] TRUE

all(row.names(all.fridgetoilet.table.12.10) %in% row.names(all.fridge.toilet.map.12.10))   #True

## [1] TRUE

write.table(all.fridgetoilet.table.12.10, "12.10.fridge.toilet.table.txt", sep="\t", col.names=TRUE, row.names=TRUE, quote=FALSE)
write.table(all.fridge.toilet.map.12.10, "12.10.fridge.toilet.map.txt", sep="\t", col.names=NA, row.names=TRUE)

Make a Canberra distance matrix.

all.12.10.can <- vegdist(all.fridgetoilet.table.12.10, 'canberra')
all.12.10.can.mat <- as.matrix(all.12.10.can)

Make an ordination with the canberra distance matrix.

all.12.10.can.nmds <- nmds(all.12.10.can)

## initial  value 35.510400 
## iter   5 value 23.723161
## iter  10 value 22.178427
## iter  15 value 21.462291
## iter  20 value 21.169825
## final  value 21.082734 
## converged

Plot ordination, colored by different variables.

levels(all.fridge.toilet.map.12.10$Place_1)

##  [1] "cabinet"         "counter"         "cutting_board"  
##  [4] "dish_drying_pan" "disposal"        "door"           
##  [7] "drawer"          "faucet"          "freezer"        
## [10] "garbage can"     "garbage_can"     "left_sink"      
## [13] "microwave"       "oven"            "pillowcase"     
## [16] "refrigerator"    "right_sink"      "sink"           
## [19] "soap_dispenser"  "sponge"          "stall"          
## [22] "stove"           "television"      "toilet"         
## [25] "wall"            "water"

colvec.Place_1=c("green4","darkorange","darkolivegreen","darkgoldenrod1","cyan2","cornflowerblue","coral1","chocolate3","chartreuse","blueviolet","aquamarine","darkslateblue","cornsilk1","firebrick","indianred1","lightblue1","darkorchid3","darkgoldenrod4","darkcyan","chartreuse4","greenyellow","brown1","deepskyblue2","mediumspringgreen","lightslateblue","lightsalmon")
length(colvec.Place_1)

## [1] 26

plot(all.12.10.can.nmds, pch=16, col = colvec.Place_1[all.fridge.toilet.map.12.10$Place_1])
legend('topleft', c("cabinet","counter","cutting_board","dish_drying_pan","disposal","door","drawer","faucet","freezer","garbage_can","microwave","oven","pillowcase","refrigerator","sink","soap_dispenser","sponge","stall","stove","television","toilet","wall","water"), cex=0.5, pt.cex=1, text.width=0.2, y.intersp=0.9, col=colvec.Place_1, pch=16)
title(main="Canberra NMDS colored by Place_1 (subset)")

plot of chunk plotNMDScan.SS

### colors are wrong!!!!!!can't figure out why
#lightblue1(16)=refrigerator=(14)
#mediumspringgreen(24)=toilet=(21)

levels(all.fridge.toilet.map.12.10$Geolocation)

## [1] "Boulder_CO_USA"       "RaleighDurham_NC_USA" "Seoul_Korea"

colvec.Geoloc=c("darkorchid","darkorange","cyan2")
plot(all.12.10.can.nmds, pch=16, col = colvec.Geoloc[all.fridge.toilet.map.12.10$Geolocation])
legend('topleft', c("Boulder_CO_USA","RaleighDurham_NC_USA","Seoul_Korea"),cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9, col=colvec.Geoloc, pch=16)
title(main='Canberra NMDS colored by Geolocation (subset)')

plot of chunk plotNMDScan.SS

levels(all.fridge.toilet.map.12.10$phinchID)

## [1] "Flores_Kitchen"  "Flores_restroom" "Jeon_korea"      "Leff_homes"

colvec.pID=c("aquamarine","chartreuse4","firebrick","darkgoldenrod1")
plot(all.12.10.can.nmds, pch=16, col=colvec.pID[all.fridge.toilet.map.12.10$phinchID])
legend('topleft',c("Flores_Kitchen","Flores_restroom","Jeon_korea","Leff_homes"), cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9,col=colvec.pID, pch=16)
title(main='Canberra NMDS colored by study (phinchID) (subset)')

plot of chunk plotNMDScan.SS

levels(all.fridge.toilet.map.12.10$Room_Function)

## [1] "home"     "kitchen"  "restroom"

colvec.RF=c("darkorchid","cornflowerblue","coral1")
plot(all.12.10.can.nmds,pch=16,col=colvec.RF[all.fridge.toilet.map.12.10$Room_Function])
legend('topleft',c("home","kitchen","restroom"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.RF,pch=16)
title(main='Canberra NMDS colored by Room_Function (subset)')

plot of chunk plotNMDScan.SS

levels(all.fridge.toilet.map.12.10$Target_Region)

## [1] "V1_V2" "V1_V3" "V4"

colvec.TR=c("deeppink","chartreuse","goldenrod")
plot(all.12.10.can.nmds,pch=16,col=colvec.TR[all.fridge.toilet.map.12.10$Target_Region])
legend('topleft',c("V1_V2","V1_V3","V4"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.TR,pch=16)
title(main='Canberra NMDS colored by Target_Region (Subset)')

plot of chunk plotNMDScan.SS

levels(all.fridge.toilet.map.12.10$Sequencing_Technology)

## [1] "454_GS_Junior"      "Illumina_HiSeq"     "Illumina_HiSeq2000"
## [4] "Illumina_MiSeq"

colvec.ST=c("coral1","cornflowerblue","chartreuse","darkorchid")
plot(all.12.10.can.nmds,pch=16,col=colvec.ST[all.fridge.toilet.map.12.10$Sequencing_Technology])
legend('topleft',c("454_GS_Junior","Illumina_HiSeq","Illumina_HiSeq2000","Illumina_MiSeq"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.ST,pch=16)
title(main='Canberra NMDS colored by Sequencing_Technology (subset)')

plot of chunk plotNMDScan.SS

levels(all.fridge.toilet.map.12.10$Building_Type2)

## [1] "condo_town"          "family_home"         "low_rise_apt"       
## [4] "university_building"

colvec.BT2=c("darkorchid","aquamarine","deeppink","darkgoldenrod2")
plot(all.12.10.can.nmds,pch=16,col=colvec.BT2[all.fridge.toilet.map.12.10$Building_Type2])
legend('topleft',c("condo_town","family_home","low_rise_apt","university_building"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.BT2,pch=16)
title(main='Canberra NMDS colored by Building_Type2 (subset)')

plot of chunk plotNMDScan.SS

Without chloroplasts, target region seems to drive clustering the best, between subsampled toilets and refrigerators among the 4 studies.

toilet.SS <- which(all.fridge.toilet.map.12.10$Place_1 == 'toilet')
toilet.SS.map <- all.fridge.toilet.map.12.10[toilet.SS, ]
toilet.SS.table <- all.fridgetoilet.table.12.10[toilet.SS, ]

fridge.SS <- which(all.fridge.toilet.map.12.10$Place_1 == 'refrigerator')
fridge.SS.map <- all.fridge.toilet.map.12.10[fridge.SS, ]
fridge.SS.table <- all.fridgetoilet.table.12.10[fridge.SS, ]

toilet.toilet.can.dist.SS <- as.dist(all.12.10.can.mat[toilet.SS, toilet.SS])
can.TT.tri.SS <- toilet.toilet.can.dist.SS[upper.tri(toilet.toilet.can.dist.SS)]
toilet.toilet.can.dist.SS <- can.TT.tri.SS[-which(is.na(can.TT.tri.SS))]

fridge.fridge.can.dist.SS <- as.dist(all.12.10.can.mat[fridge.SS, fridge.SS])
can.FF.tri.SS <- fridge.fridge.can.dist.SS[upper.tri(fridge.fridge.can.dist.SS)]
fridge.fridge.can.dist.SS <- can.FF.tri.SS[-which(is.na(can.FF.tri.SS))]

toilet.fridge.can.dist.SS <- as.dist(all.12.10.can.mat[toilet.SS, fridge.SS])

## Warning: non-square matrix

Make vector of distances for the comparisons you want to make.

mat1.SS <- as.vector(toilet.toilet.can.dist.SS)
mat2.SS <- as.vector(fridge.fridge.can.dist.SS)
mat3.SS <- as.vector(toilet.fridge.can.dist.SS)

Run t-tests on, and make boxplot, of distance comparisons.

Toilet/Toilet v. Toilet/Fridge

t.test(mat1.SS, mat3.SS)

## 
##  Welch Two Sample t-test
## 
## data:  mat1.SS and mat3.SS
## t = -4.021, df = 109, p-value = 0.0001069
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.025684 -0.008725
## sample estimates:
## mean of x mean of y 
##    0.9717    0.9889

boxplot(mat1.SS,mat3.SS, main="Toilet/Toilet v. Toilet/Fridge",names=c("Toilet/Toilet","Toilet/Fridge"), ylab="Canberra Distance")

plot of chunk testDistsCan1.SS

Fridge/Fridge v. Toilet/Fridge

t.test(mat2.SS, mat3.SS)

## 
##  Welch Two Sample t-test
## 
## data:  mat2.SS and mat3.SS
## t = -6.463, df = 173.7, p-value = 1e-09
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.02150 -0.01144
## sample estimates:
## mean of x mean of y 
##    0.9724    0.9889

boxplot(mat2.SS ,mat3.SS, main="Fridge/Fridge v. Toilet/Fridge",names=c("Fridge/Fridge","Toilet/Fridge"), ylab="Canberra Distance")

plot of chunk testDistsCan2.SS

Unweighted Unifrac

Trying to do unifrac in R for this OTU table (having a hard time doing biom/txt file conversions).

FJL.tree <- read_tree_greengenes('/macqiime/greengenes/gg_13_8_otus/trees/97_otus.tree')
UF.SS <- unifrac(all.fridgetoilet.table.12.10, FJL.tree) # takes a really long time??????

Could re-try the biom conversion to do the Qiime unifrac. would be better/more consistent anyways.