FloresJeonLeff Subset Study - BEMA project

(Rachel Adams, Ashley Bateman, Holly Bik, James Meadow)

Question: Do rooms in a building look more similar the same kind of room in another building: Do restrooms look like restrooms, or do kitchens look like kitchens, no matter where you live? Or, do regional differences in microbial communities make kitchens and restrooms that are in the same location look more like each other?

library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.0-10
library(ape)
library(labdsv)
## Loading required package: mgcv
## Loading required package: nlme
## This is mgcv 1.8-2. For overview type 'help("mgcv-package")'.
## Loading required package: MASS
## 
## Attaching package: 'labdsv'
## 
## The following object is masked from 'package:stats':
## 
##     density
### These just for debugging, not evaluated. 
# save.image('~/Dropbox/FloresJeonLeff.RData')
# load('~/Dropbox/FloresJeonLeff.RData')

Below is a custom function (QiimeIn) written by James Meadow for reading in OTU tables.

QiimeIn <- function ( file='') {
  tmp.table <- read.table ( file, sep='\t', head=TRUE, row.names=1, comment.char='%', skip=1 )
  tax.numb <- dim(tmp.table)[2]  ## last col is taxon names
  taxa.names <- as.character(tmp.table[,tax.numb])  ## save taxon names
  tmp.to.t <- tmp.table[,1:tax.numb-1]  ## trim names from table
  name <- t(tmp.to.t)  ## transpose after trimming names
  
  preview <- name[1:5,1:5]
  samples <- row.names(name)
  taxa.tmp <- cbind(colnames(name),taxa.names)
  taxa <- data.frame(taxa.tmp)
  names(taxa)[1] <- 'qiime.id'
  row.names(taxa) <- taxa.tmp[, 1]
  dimensions <- dim(name)
  
  table.info <- list( preview, dimensions, samples, taxa, name )
  names(table.info)[[1]] <- 'Preview'
  names(table.info)[[2]] <- 'Dimensions'
  names(table.info)[[3]] <- 'Samples'
  names(table.info)[[4]] <- 'Taxa'
  names(table.info)[[5]] <- 'Table'
  
  invisible(table.info)
  
}

Read in OTU table, mapping file, and unifrac distance matrix generated with Qiime.

Read in OTU table using custom function.

FJL.list <- QiimeIn('data/Ftable.from_biom_w_taxonomy.txt')

Read in mapping file, after removing ‘#’ (needed for Qiime scripts) from the header.

FJL.map <- read.delim('data/mergedMapF_no_source_no_kit.txt', head=TRUE, row.names=1, sep='\t')

Read in unweighted unifrac distance matrix, generated with Qiime script. Preview table, and convert to matrix.

unifrac.dist <- read.table('data/unweighted_unifrac_dm.txt', header=T)
unifrac.dist[1:5,1:5]
##              KiB54.630662 KiC69.630743 KiB70.630661 KiB40.630629
## KiB54.630662       0.0000       0.6981       0.6727       0.6479
## KiC69.630743       0.6981       0.0000       0.6550       0.7115
## KiB70.630661       0.6727       0.6550       0.0000       0.6470
## KiB40.630629       0.6479       0.7115       0.6470       0.0000
## KiB36.630609       0.6819       0.6775       0.6682       0.6931
##              KiB36.630609
## KiB54.630662       0.6819
## KiC69.630743       0.6775
## KiB70.630661       0.6682
## KiB40.630629       0.6931
## KiB36.630609       0.0000
unifrac.dist.mat <- as.matrix(unifrac.dist)

Checkpoints to make sure otu table looks correct and everything is lined up.

names(FJL.list)
FJL.list$Preview
FJL.list$Dimensions
FJL.list$Samples
FJL.list$Taxa

Match up/Reorder OTU table, map, and unifrac distance matrix

Extract the OTU table and taxon table into two different objects.

FJL.table <- FJL.list$Table
FJL.taxa <- FJL.list$Taxa

Make sure that the OTU table and map line up. If they don’t, reorder using the next section of code.

identical(row.names(FJL.map), row.names(FJL.table)) #False
cbind(row.names(FJL.map), row.names(FJL.table))     #Not matching
all(row.names(FJL.map) %in% row.names(FJL.table))   #True
all(row.names(FJL.table) %in% row.names(FJL.map))   #True

Reorder both the map and the OTU table so that they have the same rows and columns.

FJL.map <- FJL.map[order(row.names(FJL.map)), ]
FJL.table <- FJL.table[row.names(FJL.map), ]
# an alternative way of doing it (without the two linked together):
# FJL.map <- FJL.map[order(row.names(FJL.map)), ]
# FJL.table <- FJL.table[order(row.names(FJL.table)), ]

Reorder the Unifrac distance matrix so that it has the same rows and columns as the map.

unifrac.dist.mat <- unifrac.dist.mat[row.names(FJL.map), row.names(FJL.map)]

Make groups of interest, to make calling them easier later.

restroom <- which(FJL.map$Room_Function == 'restroom')
restroom.map <- FJL.map[restroom, ]
restroom.table <- FJL.table[restroom, ]

kitchen <- which(FJL.map$Room_Function == 'kitchen')
kitchen.map <- FJL.map[kitchen, ]
kitchen.table <- FJL.table[kitchen, ]

toilet <- which(FJL.map$Place_1 == 'toilet')
fridge <- which(FJL.map$Place_1 == 'refrigerator')

Come back and look at this code later - right now I haven’t removed plants or unclassified from otu table, or map. Check on what has been taken out of unifrac distance matrix with QIIME scripts.

# identify plants by calling them by name
# plants <- grep('Chloroplast', FJL.taxa[,2])
# take out anything not classified as bacteria (i.e. unclassified)
#unclassified <- grep('Unclassified', FJL.taxa[,2])
# and remove these otus (columns) from table
# FJL.table.new <- FJL.table[, -plants]
# dim(FJL.table.new)
# 17281
# FJL.table <- FJL.table.new
#FJL.table.new <- FJL.table.new[, -unclassified]
# find out how many seqs were lost to plants
#cbind(rev(sort(rowSums(FJL.table))), rev(sort(rowSums(FJL.table.new))))

How many OTUs are in my OTU table?

dim(FJL.table) # 17398 OTUs
## [1]   758 17341

How many singletons are in my OTU table?

# abundance curve
plot(rev(sort(colSums(FJL.table))))

plot of chunk plotSingletons

length(which(colSums(FJL.table) == 1))/ncol(FJL.table) # 31% are singletons
## [1] 0.3137

Canberra

Make a Canberra distance matrix.

FJL.can <- vegdist(FJL.table, 'canberra')
FJL.can.mat <- as.matrix(FJL.can)

Make an ordination with the canberra distance matrix.

FJL.can.nmds <- nmds(FJL.can)
## initial  value 36.417895 
## iter   5 value 24.526274
## iter  10 value 23.608134
## final  value 23.354897 
## converged

Plot ordination, colored by different variables.

levels(FJL.map$Place_1)
##  [1] "cabinet"         "counter"         "cutting_board"  
##  [4] "dish_drying_pan" "disposal"        "door"           
##  [7] "drawer"          "faucet"          "freezer"        
## [10] "garbage can"     "garbage_can"     "left_sink"      
## [13] "microwave"       "oven"            "pillowcase"     
## [16] "refrigerator"    "right_sink"      "sink"           
## [19] "soap_dispenser"  "sponge"          "stall"          
## [22] "stove"           "television"      "toilet"         
## [25] "wall"            "water"
colvec.Place_1=c("green4","darkorange","darkolivegreen","darkgoldenrod1","cyan2","cornflowerblue","coral1","chocolate3","chartreuse","blueviolet","aquamarine","darkslateblue","cornsilk1","firebrick","indianred1","lightblue1","darkorchid3","darkgoldenrod4","darkcyan","chartreuse4","greenyellow","brown1","deepskyblue2")
plot(FJL.can.nmds, pch=16, col=colvec.Place_1[FJL.map$Place_1])
legend('topleft', c("cabinet","counter","cutting_board","dish_drying_pan","disposal","door","drawer","faucet","freezer","garbage_can","microwave","oven","pillowcase","refrigerator","sink","soap_dispenser","sponge","stall","stove","television","toilet","wall","water"), cex=0.5, pt.cex=1, text.width=0.2, y.intersp=0.9, col=colvec.Place_1, pch=16)
title(main="Canberra NMDS colored by Place_1")

plot of chunk plotNMDScan

levels(FJL.map$Geolocation)
## [1] "Boulder_CO_USA"       "RaleighDurham_NC_USA" "Seoul_Korea"
colvec.Geoloc=c("darkorchid","darkorange","cyan2")
plot(FJL.can.nmds, pch=16, col = colvec.Geoloc[FJL.map$Geolocation])
legend('topleft', c("Boulder_CO_USA","RaleighDurham_NC_USA","Seoul_Korea"),cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9, col=colvec.Geoloc, pch=16)
title(main='Canberra NMDS colored by Geolocation')

plot of chunk plotNMDScan

levels(FJL.map$phinchID)
## [1] "Flores_Kitchen"  "Flores_restroom" "Jeon_korea"      "Leff_homes"
colvec.pID=c("aquamarine","chartreuse4","firebrick","darkgoldenrod1")
plot(FJL.can.nmds, pch=16, col=colvec.pID[FJL.map$phinchID])
legend('topleft',c("Flores_Kitchen","Flores_restroom","Jeon_korea","Leff_homes"), cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9,col=colvec.pID, pch=16)
title(main='Canberra NMDS colored by study (phinchID)')

plot of chunk plotNMDScan

levels(FJL.map$Room_Function)
## [1] "home"     "kitchen"  "restroom"
colvec.RF=c("darkorchid","cornflowerblue","coral1")
plot(FJL.can.nmds,pch=16,col=colvec.RF[FJL.map$Room_Function])
legend('topleft',c("home","kitchen","restroom"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.RF,pch=16)
title(main='Canberra NMDS colored by Room_Function')

plot of chunk plotNMDScan

levels(FJL.map$Target_Region)
## [1] "V1_V2" "V1_V3" "V4"
colvec.TR=c("deeppink","chartreuse","goldenrod")
plot(FJL.can.nmds,pch=16,col=colvec.TR[FJL.map$Room_Function])
legend('topleft',c("V1_V2","V1_V3","V4"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.TR,pch=16)
title(main='Canberra NMDS colored by Target_Region')

plot of chunk plotNMDScan

levels(FJL.map$Sequencing_Technology)
## [1] "454_GS_Junior"      "Illumina_HiSeq"     "Illumina_HiSeq2000"
## [4] "Illumina_MiSeq"
colvec.ST=c("coral1","cornflowerblue","chartreuse","darkorchid")
plot(FJL.can.nmds,pch=16,col=colvec.ST[FJL.map$Sequencing_Technology])
legend('topleft',c("454_GS_Junior","Illumina_HiSeq","Illumina_HiSeq2000","Illumina_MiSeq"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.ST,pch=16)
title(main='Canberra NMDS colored by Sequencing_Technology')

plot of chunk plotNMDScan

levels(FJL.map$Building_Type2)
## [1] "condo_town"          "family_home"         "low_rise_apt"       
## [4] "university_building"
colvec.BT2=c("darkorchid","aquamarine","deeppink","darkgoldenrod2")
plot(FJL.can.nmds,pch=16,col=colvec.BT2[FJL.map$Building_Type2])
legend('topleft',c("condo_town","family_home","low_rise_apt","university_building"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.BT2,pch=16)
title(main='Canberra NMDS colored by Building_Type2')

plot of chunk plotNMDScan

Could Sequencing technologies (Miseq v. HiSeq/HiSeq2000 be driving that split in the Leff homes data, resulting in the clusters that we see?)

Need to remove the one half of the square for the self-self comparisons.

restroom.restroom.dist <- as.dist(FJL.can.mat[restroom, restroom])
test <- restroom.restroom.dist[upper.tri(restroom.restroom.dist)]
test <- test[-which(is.na(test))]

Make distance matrices between locations of interest, while removing upper triangle for self-self comparisons.

restroom.restroom.can.dist <- as.dist(FJL.can.mat[restroom, restroom])
can.RR.tri <- restroom.restroom.can.dist[upper.tri(restroom.restroom.can.dist)]
restroom.restroom.can.dist <- can.RR.tri[-which(is.na(can.RR.tri))]

kitchen.kitchen.can.dist <- as.dist(FJL.can.mat[kitchen, kitchen])
can.KK.tri <- kitchen.kitchen.can.dist[upper.tri(kitchen.kitchen.can.dist)]
kitchen.kitchen.can.dist <- can.KK.tri[-which(is.na(can.KK.tri))]

toilet.toilet.can.dist <- as.dist(FJL.can.mat[toilet,toilet])
can.TT.tri <- toilet.toilet.can.dist[upper.tri(toilet.toilet.can.dist)]
toilet.toilet.can.dist <- can.TT.tri[-which(is.na(can.TT.tri))]

fridge.fridge.can.dist <- as.dist(FJL.can.mat[fridge,fridge])
can.FF.tri <- fridge.fridge.can.dist[upper.tri(fridge.fridge.can.dist)]
fridge.fridge.can.dist <- can.FF.tri[-which(is.na(can.FF.tri))]

restroom.kitchen.can.dist <- as.dist(FJL.can.mat[restroom, kitchen])
## Warning: non-square matrix
toilet.fridge.can.dist <- as.dist(FJL.can.mat[toilet, fridge])
## Warning: non-square matrix

Make vector of distances for the comparisons you want to make.

mat1 <- as.vector(restroom.restroom.can.dist)
mat2 <- as.vector(kitchen.kitchen.can.dist)
mat3 <- as.vector(toilet.toilet.can.dist)
mat4 <- as.vector(fridge.fridge.can.dist)
mat5 <- as.vector(restroom.kitchen.can.dist)
mat6 <- as.vector(toilet.fridge.can.dist)

Run t-tests on, and make boxplot, of distance comparisons.

Restroom/Restroom v. Restroom/Kitchen

t.test(mat1,mat5)
## 
##  Welch Two Sample t-test
## 
## data:  mat1 and mat5
## t = -34.93, df = 3089, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.02736 -0.02445
## sample estimates:
## mean of x mean of y 
##    0.9622    0.9881
boxplot(mat1,mat5, main="Restroom/Restroom v. Restroom/Kitchen",names=c("Restroom/Restroom","Restroom/Kitchen"), ylab="Canberra Distance")

plot of chunk testDistsCan1

Kitchen/Kitchen v. Restroom/Kitchen

t.test(mat2,mat5)
## 
##  Welch Two Sample t-test
## 
## data:  mat2 and mat5
## t = -164.5, df = 29014, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.04516 -0.04410
## sample estimates:
## mean of x mean of y 
##    0.9435    0.9881
boxplot(mat2,mat5, main="Kitchen/Kitchen v. Restroom/Kitchen",names=c("Kitchen/Kitchen","Restroom/Kitchen"), ylab="Canberra Distance")

plot of chunk testDistsCan2

Unweighted Unifrac

The unweighted unifrac distance matrix was already imported, and re-ordered to match the map file.

Make an ordination with the unweighted unifrac distance matrix.

FJL.uni.nmds <- nmds(unifrac.dist.mat)
## initial  value 32.583909 
## iter   5 value 17.629171
## iter  10 value 16.602288
## iter  10 value 16.588331
## iter  10 value 16.587316
## final  value 16.587316 
## converged

Plot ordination, colored by different variables.

plot(FJL.uni.nmds, pch=16, col=colvec.Place_1[FJL.map$Place_1])
legend('topleft', c("cabinet","counter","cutting_board","dish_drying_pan","disposal","door","drawer","faucet","freezer","garbage_can","microwave","oven","pillowcase","refrigerator","sink","soap_dispenser","sponge","stall","stove","television","toilet","wall","water"), cex=0.5, pt.cex=1, text.width=0.2, y.intersp=0.9, col=colvec.Place_1, pch=16)
title(main="Unweighted Unifrac NMDS colored by Place_1")

plot of chunk plotNMDSuni

plot(FJL.uni.nmds, pch=16, col=colvec.Geoloc[FJL.map$Geolocation])
legend('topleft', c("Boulder_CO_USA","RaleighDurham_NC_USA","Seoul_Korea"),cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9, col=colvec.Geoloc, pch=16)
title(main='Unweighted Unifrac NMDS colored by Geolocation')

plot of chunk plotNMDSuni

plot(FJL.uni.nmds, pch=16, col=colvec.pID[FJL.map$phinchID])
legend('topleft',c("Flores_Kitchen","Flores_restroom","Jeon_korea","Leff_homes"), cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9,col=colvec.pID, pch=16)
title(main='Unweighted Unifrac NMDS colored by study (phinchID)')

plot of chunk plotNMDSuni

plot(FJL.uni.nmds,pch=16,col=colvec.RF[FJL.map$Room_Function])
legend('topleft',c("home","kitchen","restroom"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.RF,pch=16)
title(main='Unweighted Unifrac NMDS colored by Room_Function')

plot of chunk plotNMDSuni

plot(FJL.uni.nmds,pch=16,col=colvec.TR[FJL.map$Room_Function])
legend('topleft',c("V1_V2","V1_V3","V4"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.TR,pch=16)
title(main='Unweighted Unifrac NMDS colored by Target_Region')

plot of chunk plotNMDSuni

plot(FJL.uni.nmds,pch=16,col=colvec.ST[FJL.map$Sequencing_Technology])
legend('topleft',c("454_GS_Junior","Illumina_HiSeq","Illumina_HiSeq2000","Illumina_MiSeq"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.ST,pch=16)
title(main='Unweighted Unifrac NMDS colored by Sequencing_Technology')

plot of chunk plotNMDSuni

plot(FJL.uni.nmds,pch=16,col=colvec.BT2[FJL.map$Building_Type2])
legend('topleft',c("condo_town","family_home","low_rise_apt","university_building"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.BT2,pch=16)
title(main='Unweighted Unifrac colored by Building_Type2')

plot of chunk plotNMDSuni

Make distance matrices between locations of interest, while removing upper triangle for self-self comparisons.

restroom.restroom.uni.dist <- as.dist(unifrac.dist.mat[restroom, restroom])
uni.RR.tri <- restroom.restroom.uni.dist[upper.tri(restroom.restroom.uni.dist)]
restroom.restroom.uni.dist <- uni.RR.tri[-which(is.na(uni.RR.tri))]

kitchen.kitchen.uni.dist <- as.dist(unifrac.dist.mat[kitchen, kitchen])
uni.KK.tri <- kitchen.kitchen.uni.dist[upper.tri(kitchen.kitchen.uni.dist)]
kitchen.kitchen.uni.dist <- uni.KK.tri[-which(is.na(uni.KK.tri))]

toilet.toilet.uni.dist <- as.dist(unifrac.dist.mat[toilet,toilet])
uni.TT.tri <- toilet.toilet.uni.dist[upper.tri(toilet.toilet.uni.dist)]
toilet.toilet.uni.dist <- uni.TT.tri[-which(is.na(uni.TT.tri))]

fridge.fridge.uni.dist <- as.dist(unifrac.dist.mat[fridge,fridge])
uni.FF.tri <- fridge.fridge.uni.dist[upper.tri(fridge.fridge.uni.dist)]
fridge.fridge.uni.dist <- uni.FF.tri[-which(is.na(uni.FF.tri))]

restroom.kitchen.uni.dist <- as.dist(unifrac.dist.mat[restroom, kitchen])
## Warning: non-square matrix
toilet.fridge.uni.dist <- as.dist(unifrac.dist.mat[toilet, fridge])
## Warning: non-square matrix

Make vector of distances for the comparisons you want to make.

mat7 <- as.vector(restroom.restroom.uni.dist)
mat8 <- as.vector(kitchen.kitchen.uni.dist)
mat9 <- as.vector(toilet.toilet.uni.dist)
mat10 <- as.vector(fridge.fridge.uni.dist)
mat11 <- as.vector(restroom.kitchen.uni.dist)
mat12 <- as.vector(toilet.fridge.uni.dist)

Run t-tests on, and make boxplot, of distance comparisons.

Restroom/Restroom v. Restroom/Kitchen

t.test(mat7,mat11)
## 
##  Welch Two Sample t-test
## 
## data:  mat7 and mat11
## t = -27.02, df = 3556, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.05020 -0.04341
## sample estimates:
## mean of x mean of y 
##    0.7357    0.7825
boxplot(mat7,mat11, main= "Restroom/Restroom v. Restroom/Kitchen",names=c("Restroom/Restroom","Restroom/Kitchen"),ylab='Unweighted Unifrac Distance')

plot of chunk testDistsUni1

Kitchen/Kitchen v. Restroom/Kitchen

t.test(mat8,mat11)
## 
##  Welch Two Sample t-test
## 
## data:  mat8 and mat11
## t = -91.55, df = 29935, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.06981 -0.06688
## sample estimates:
## mean of x mean of y 
##    0.7142    0.7825
boxplot(mat8,mat11, main="Kitchen/Kitchen v. Restroom/Kitchen",names=c('Kitchen/Kitchen','Restroom/Kitchen'),ylab='Unweighted Unifrac Distance')

plot of chunk testDistsUni2

Jaccard

Create a Jaccard distance matrix.

FJL.jac <- vegdist(FJL.table, 'jaccard')
FJL.jac.mat <- as.matrix(FJL.jac)

Create an ordination using jaccard.

FJL.jac.nmds <- nmds(FJL.jac)
## initial  value 42.934861 
## iter   5 value 28.977639
## iter  10 value 25.290675
## final  value 25.022110 
## converged

Plot the ordination, colored by different variables.

plot(FJL.jac.nmds, pch=16, col=colvec.Place_1[FJL.map$Place_1])
legend('topleft', c("cabinet","counter","cutting_board","dish_drying_pan","disposal","door","drawer","faucet","freezer","garbage_can","microwave","oven","pillowcase","refrigerator","sink","soap_dispenser","sponge","stall","stove","television","toilet","wall","water"), cex=0.5, pt.cex=1, text.width=0.2, y.intersp=0.9, col=colvec.Place_1, pch=16)
title(main="Jaccard NMDS colored by Place_1")

plot of chunk plotNMDSJaccard

plot(FJL.jac.nmds, pch=16, col=colvec.Geoloc[FJL.map$Geolocation])
legend('topleft', c("Boulder_CO_USA","RaleighDurham_NC_USA","Seoul_Korea"),cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9, col=colvec.Geoloc, pch=16)
title(main='Jaccard NMDS colored by Geolocation')

plot of chunk plotNMDSJaccard

plot(FJL.jac.nmds, pch=16, col=colvec.pID[FJL.map$phinchID])
legend('topleft',c("Flores_Kitchen","Flores_restroom","Jeon_korea","Leff_homes"), cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9,col=colvec.pID, pch=16)
title(main='Jaccard NMDS colored by study (phinchID)')

plot of chunk plotNMDSJaccard

plot(FJL.jac.nmds,pch=16,col=colvec.RF[FJL.map$Room_Function])
legend('topleft',c("home","kitchen","restroom"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.RF,pch=16)
title(main='Jaccard NMDS colored by Room_Function')

plot of chunk plotNMDSJaccard

plot(FJL.jac.nmds,pch=16,col=colvec.TR[FJL.map$Room_Function])
legend('topleft',c("V1_V2","V1_V3","V4"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.TR,pch=16)
title(main='Jaccard NMDS colored by Target_Region')

plot of chunk plotNMDSJaccard

plot(FJL.jac.nmds,pch=16,col=colvec.ST[FJL.map$Sequencing_Technology])
legend('topleft',c("454_GS_Junior","Illumina_HiSeq","Illumina_HiSeq2000","Illumina_MiSeq"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.ST,pch=16)
title(main='Jaccard NMDS colored by Sequencing_Technology')

plot of chunk plotNMDSJaccard

plot(FJL.jac.nmds,pch=16,col=colvec.BT2[FJL.map$Building_Type2])
legend('topleft',c("condo_town","family_home","low_rise_apt","university_building"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.BT2,pch=16)
title(main='Jaccard NMDS colored by Building_Type2')

plot of chunk plotNMDSJaccard

Make distance matrices between locations of interest, while removing upper triangle for self-self comparisons.

restroom.restroom.jac.dist <- as.dist(FJL.jac.mat[restroom, restroom])
jac.RR.tri <- restroom.restroom.jac.dist[upper.tri(restroom.restroom.jac.dist)]
restroom.restroom.jac.dist <- jac.RR.tri[-which(is.na(jac.RR.tri))]

kitchen.kitchen.jac.dist <- as.dist(FJL.jac.mat[kitchen, kitchen])
jac.KK.tri <- kitchen.kitchen.jac.dist[upper.tri(kitchen.kitchen.jac.dist)]
kitchen.kitchen.jac.dist <- jac.KK.tri[-which(is.na(jac.KK.tri))]

toilet.toilet.jac.dist <- as.dist(FJL.jac.mat[toilet,toilet])
jac.TT.tri <- toilet.toilet.jac.dist[upper.tri(toilet.toilet.jac.dist)]
toilet.toilet.jac.dist <- jac.TT.tri[-which(is.na(jac.TT.tri))]

fridge.fridge.jac.dist <- as.dist(FJL.jac.mat[fridge,fridge])
jac.FF.tri <- fridge.fridge.jac.dist[upper.tri(fridge.fridge.jac.dist)]
fridge.fridge.jac.dist <- jac.FF.tri[-which(is.na(jac.FF.tri))]

restroom.kitchen.jac.dist <- as.dist(FJL.jac.mat[restroom, kitchen])
## Warning: non-square matrix
toilet.fridge.jac.dist <- as.dist(FJL.jac.mat[toilet, fridge])
## Warning: non-square matrix

Make vector of distances for the comparisons you want to make.

mat13 <- as.vector(restroom.restroom.jac.dist)
mat14 <- as.vector(kitchen.kitchen.jac.dist)
mat15 <- as.vector(toilet.toilet.jac.dist)
mat16 <- as.vector(fridge.fridge.jac.dist)
mat17 <- as.vector(restroom.kitchen.jac.dist)
mat18 <- as.vector(toilet.fridge.jac.dist)

Run t-tests on, and make boxplot, of distance comparisons.

Restroom/Restroom v. Restroom/Kitchen

t.test(mat13,mat17)
## 
##  Welch Two Sample t-test
## 
## data:  mat13 and mat17
## t = -33.98, df = 2998, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.04784 -0.04262
## sample estimates:
## mean of x mean of y 
##    0.9427    0.9880
boxplot(mat13,mat17, main="Restroom/Restroom v. Restroom/Kitchen",names=c('Restroom/Restroom','Restroom/Kitchen'),ylab='Jaccard Distance')

plot of chunk testDistsJac1

Kitchen/Kitchen v. Restroom/Kitchen

t.test(mat14,mat17)
## 
##  Welch Two Sample t-test
## 
## data:  mat14 and mat17
## t = -139.8, df = 23386, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.08032 -0.07810
## sample estimates:
## mean of x mean of y 
##    0.9088    0.9880
boxplot(mat14,mat17, main="Kitchen/Kitchen v. Restroom/Kitchen",names=c('Kitchen/Kitchen','Restroom/Kitchen'),ylab='Jaccard Distance')

plot of chunk testDistsJac2