Question: Do rooms in a building look more similar the same kind of room in another building: Do restrooms look like restrooms, or do kitchens look like kitchens, no matter where you live? Or, do regional differences in microbial communities make kitchens and restrooms that are in the same location look more like each other?
library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.0-10
library(ape)
library(labdsv)
## Loading required package: mgcv
## Loading required package: nlme
## This is mgcv 1.8-2. For overview type 'help("mgcv-package")'.
## Loading required package: MASS
##
## Attaching package: 'labdsv'
##
## The following object is masked from 'package:stats':
##
## density
### These just for debugging, not evaluated.
# save.image('~/Dropbox/FloresJeonLeff.RData')
# load('~/Dropbox/FloresJeonLeff.RData')
Below is a custom function (QiimeIn) written by James Meadow for reading in OTU tables.
QiimeIn <- function ( file='') {
tmp.table <- read.table ( file, sep='\t', head=TRUE, row.names=1, comment.char='%', skip=1 )
tax.numb <- dim(tmp.table)[2] ## last col is taxon names
taxa.names <- as.character(tmp.table[,tax.numb]) ## save taxon names
tmp.to.t <- tmp.table[,1:tax.numb-1] ## trim names from table
name <- t(tmp.to.t) ## transpose after trimming names
preview <- name[1:5,1:5]
samples <- row.names(name)
taxa.tmp <- cbind(colnames(name),taxa.names)
taxa <- data.frame(taxa.tmp)
names(taxa)[1] <- 'qiime.id'
row.names(taxa) <- taxa.tmp[, 1]
dimensions <- dim(name)
table.info <- list( preview, dimensions, samples, taxa, name )
names(table.info)[[1]] <- 'Preview'
names(table.info)[[2]] <- 'Dimensions'
names(table.info)[[3]] <- 'Samples'
names(table.info)[[4]] <- 'Taxa'
names(table.info)[[5]] <- 'Table'
invisible(table.info)
}
Read in OTU table using custom function.
FJL.list <- QiimeIn('data/Ftable.from_biom_w_taxonomy.txt')
Read in mapping file, after removing ‘#’ (needed for Qiime scripts) from the header.
FJL.map <- read.delim('data/mergedMapF_no_source_no_kit.txt', head=TRUE, row.names=1, sep='\t')
Read in unweighted unifrac distance matrix, generated with Qiime script. Preview table, and convert to matrix.
unifrac.dist <- read.table('data/unweighted_unifrac_dm.txt', header=T)
unifrac.dist[1:5,1:5]
## KiB54.630662 KiC69.630743 KiB70.630661 KiB40.630629
## KiB54.630662 0.0000 0.6981 0.6727 0.6479
## KiC69.630743 0.6981 0.0000 0.6550 0.7115
## KiB70.630661 0.6727 0.6550 0.0000 0.6470
## KiB40.630629 0.6479 0.7115 0.6470 0.0000
## KiB36.630609 0.6819 0.6775 0.6682 0.6931
## KiB36.630609
## KiB54.630662 0.6819
## KiC69.630743 0.6775
## KiB70.630661 0.6682
## KiB40.630629 0.6931
## KiB36.630609 0.0000
unifrac.dist.mat <- as.matrix(unifrac.dist)
Checkpoints to make sure otu table looks correct and everything is lined up.
names(FJL.list)
FJL.list$Preview
FJL.list$Dimensions
FJL.list$Samples
FJL.list$Taxa
Extract the OTU table and taxon table into two different objects.
FJL.table <- FJL.list$Table
FJL.taxa <- FJL.list$Taxa
Make sure that the OTU table and map line up. If they don’t, reorder using the next section of code.
identical(row.names(FJL.map), row.names(FJL.table)) #False
cbind(row.names(FJL.map), row.names(FJL.table)) #Not matching
all(row.names(FJL.map) %in% row.names(FJL.table)) #True
all(row.names(FJL.table) %in% row.names(FJL.map)) #True
Reorder both the map and the OTU table so that they have the same rows and columns.
FJL.map <- FJL.map[order(row.names(FJL.map)), ]
FJL.table <- FJL.table[row.names(FJL.map), ]
# an alternative way of doing it (without the two linked together):
# FJL.map <- FJL.map[order(row.names(FJL.map)), ]
# FJL.table <- FJL.table[order(row.names(FJL.table)), ]
Reorder the Unifrac distance matrix so that it has the same rows and columns as the map.
unifrac.dist.mat <- unifrac.dist.mat[row.names(FJL.map), row.names(FJL.map)]
Make groups of interest, to make calling them easier later.
restroom <- which(FJL.map$Room_Function == 'restroom')
restroom.map <- FJL.map[restroom, ]
restroom.table <- FJL.table[restroom, ]
kitchen <- which(FJL.map$Room_Function == 'kitchen')
kitchen.map <- FJL.map[kitchen, ]
kitchen.table <- FJL.table[kitchen, ]
toilet <- which(FJL.map$Place_1 == 'toilet')
fridge <- which(FJL.map$Place_1 == 'refrigerator')
Come back and look at this code later - right now I haven’t removed plants or unclassified from otu table, or map. Check on what has been taken out of unifrac distance matrix with QIIME scripts.
# identify plants by calling them by name
# plants <- grep('Chloroplast', FJL.taxa[,2])
# take out anything not classified as bacteria (i.e. unclassified)
#unclassified <- grep('Unclassified', FJL.taxa[,2])
# and remove these otus (columns) from table
# FJL.table.new <- FJL.table[, -plants]
# dim(FJL.table.new)
# 17281
# FJL.table <- FJL.table.new
#FJL.table.new <- FJL.table.new[, -unclassified]
# find out how many seqs were lost to plants
#cbind(rev(sort(rowSums(FJL.table))), rev(sort(rowSums(FJL.table.new))))
How many OTUs are in my OTU table?
dim(FJL.table) # 17398 OTUs
## [1] 758 17341
How many singletons are in my OTU table?
# abundance curve
plot(rev(sort(colSums(FJL.table))))
length(which(colSums(FJL.table) == 1))/ncol(FJL.table) # 31% are singletons
## [1] 0.3137
Make a Canberra distance matrix.
FJL.can <- vegdist(FJL.table, 'canberra')
FJL.can.mat <- as.matrix(FJL.can)
Make an ordination with the canberra distance matrix.
FJL.can.nmds <- nmds(FJL.can)
## initial value 36.417895
## iter 5 value 24.526274
## iter 10 value 23.608134
## final value 23.354897
## converged
Plot ordination, colored by different variables.
levels(FJL.map$Place_1)
## [1] "cabinet" "counter" "cutting_board"
## [4] "dish_drying_pan" "disposal" "door"
## [7] "drawer" "faucet" "freezer"
## [10] "garbage can" "garbage_can" "left_sink"
## [13] "microwave" "oven" "pillowcase"
## [16] "refrigerator" "right_sink" "sink"
## [19] "soap_dispenser" "sponge" "stall"
## [22] "stove" "television" "toilet"
## [25] "wall" "water"
colvec.Place_1=c("green4","darkorange","darkolivegreen","darkgoldenrod1","cyan2","cornflowerblue","coral1","chocolate3","chartreuse","blueviolet","aquamarine","darkslateblue","cornsilk1","firebrick","indianred1","lightblue1","darkorchid3","darkgoldenrod4","darkcyan","chartreuse4","greenyellow","brown1","deepskyblue2")
plot(FJL.can.nmds, pch=16, col=colvec.Place_1[FJL.map$Place_1])
legend('topleft', c("cabinet","counter","cutting_board","dish_drying_pan","disposal","door","drawer","faucet","freezer","garbage_can","microwave","oven","pillowcase","refrigerator","sink","soap_dispenser","sponge","stall","stove","television","toilet","wall","water"), cex=0.5, pt.cex=1, text.width=0.2, y.intersp=0.9, col=colvec.Place_1, pch=16)
title(main="Canberra NMDS colored by Place_1")
levels(FJL.map$Geolocation)
## [1] "Boulder_CO_USA" "RaleighDurham_NC_USA" "Seoul_Korea"
colvec.Geoloc=c("darkorchid","darkorange","cyan2")
plot(FJL.can.nmds, pch=16, col = colvec.Geoloc[FJL.map$Geolocation])
legend('topleft', c("Boulder_CO_USA","RaleighDurham_NC_USA","Seoul_Korea"),cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9, col=colvec.Geoloc, pch=16)
title(main='Canberra NMDS colored by Geolocation')
levels(FJL.map$phinchID)
## [1] "Flores_Kitchen" "Flores_restroom" "Jeon_korea" "Leff_homes"
colvec.pID=c("aquamarine","chartreuse4","firebrick","darkgoldenrod1")
plot(FJL.can.nmds, pch=16, col=colvec.pID[FJL.map$phinchID])
legend('topleft',c("Flores_Kitchen","Flores_restroom","Jeon_korea","Leff_homes"), cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9,col=colvec.pID, pch=16)
title(main='Canberra NMDS colored by study (phinchID)')
levels(FJL.map$Room_Function)
## [1] "home" "kitchen" "restroom"
colvec.RF=c("darkorchid","cornflowerblue","coral1")
plot(FJL.can.nmds,pch=16,col=colvec.RF[FJL.map$Room_Function])
legend('topleft',c("home","kitchen","restroom"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.RF,pch=16)
title(main='Canberra NMDS colored by Room_Function')
levels(FJL.map$Target_Region)
## [1] "V1_V2" "V1_V3" "V4"
colvec.TR=c("deeppink","chartreuse","goldenrod")
plot(FJL.can.nmds,pch=16,col=colvec.TR[FJL.map$Room_Function])
legend('topleft',c("V1_V2","V1_V3","V4"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.TR,pch=16)
title(main='Canberra NMDS colored by Target_Region')
levels(FJL.map$Sequencing_Technology)
## [1] "454_GS_Junior" "Illumina_HiSeq" "Illumina_HiSeq2000"
## [4] "Illumina_MiSeq"
colvec.ST=c("coral1","cornflowerblue","chartreuse","darkorchid")
plot(FJL.can.nmds,pch=16,col=colvec.ST[FJL.map$Sequencing_Technology])
legend('topleft',c("454_GS_Junior","Illumina_HiSeq","Illumina_HiSeq2000","Illumina_MiSeq"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.ST,pch=16)
title(main='Canberra NMDS colored by Sequencing_Technology')
levels(FJL.map$Building_Type2)
## [1] "condo_town" "family_home" "low_rise_apt"
## [4] "university_building"
colvec.BT2=c("darkorchid","aquamarine","deeppink","darkgoldenrod2")
plot(FJL.can.nmds,pch=16,col=colvec.BT2[FJL.map$Building_Type2])
legend('topleft',c("condo_town","family_home","low_rise_apt","university_building"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.BT2,pch=16)
title(main='Canberra NMDS colored by Building_Type2')
Could Sequencing technologies (Miseq v. HiSeq/HiSeq2000 be driving that split in the Leff homes data, resulting in the clusters that we see?)
Need to remove the one half of the square for the self-self comparisons.
restroom.restroom.dist <- as.dist(FJL.can.mat[restroom, restroom])
test <- restroom.restroom.dist[upper.tri(restroom.restroom.dist)]
test <- test[-which(is.na(test))]
Make distance matrices between locations of interest, while removing upper triangle for self-self comparisons.
restroom.restroom.can.dist <- as.dist(FJL.can.mat[restroom, restroom])
can.RR.tri <- restroom.restroom.can.dist[upper.tri(restroom.restroom.can.dist)]
restroom.restroom.can.dist <- can.RR.tri[-which(is.na(can.RR.tri))]
kitchen.kitchen.can.dist <- as.dist(FJL.can.mat[kitchen, kitchen])
can.KK.tri <- kitchen.kitchen.can.dist[upper.tri(kitchen.kitchen.can.dist)]
kitchen.kitchen.can.dist <- can.KK.tri[-which(is.na(can.KK.tri))]
toilet.toilet.can.dist <- as.dist(FJL.can.mat[toilet,toilet])
can.TT.tri <- toilet.toilet.can.dist[upper.tri(toilet.toilet.can.dist)]
toilet.toilet.can.dist <- can.TT.tri[-which(is.na(can.TT.tri))]
fridge.fridge.can.dist <- as.dist(FJL.can.mat[fridge,fridge])
can.FF.tri <- fridge.fridge.can.dist[upper.tri(fridge.fridge.can.dist)]
fridge.fridge.can.dist <- can.FF.tri[-which(is.na(can.FF.tri))]
restroom.kitchen.can.dist <- as.dist(FJL.can.mat[restroom, kitchen])
## Warning: non-square matrix
toilet.fridge.can.dist <- as.dist(FJL.can.mat[toilet, fridge])
## Warning: non-square matrix
Make vector of distances for the comparisons you want to make.
mat1 <- as.vector(restroom.restroom.can.dist)
mat2 <- as.vector(kitchen.kitchen.can.dist)
mat3 <- as.vector(toilet.toilet.can.dist)
mat4 <- as.vector(fridge.fridge.can.dist)
mat5 <- as.vector(restroom.kitchen.can.dist)
mat6 <- as.vector(toilet.fridge.can.dist)
Run t-tests on, and make boxplot, of distance comparisons.
t.test(mat1,mat5)
##
## Welch Two Sample t-test
##
## data: mat1 and mat5
## t = -34.93, df = 3089, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.02736 -0.02445
## sample estimates:
## mean of x mean of y
## 0.9622 0.9881
boxplot(mat1,mat5, main="Restroom/Restroom v. Restroom/Kitchen",names=c("Restroom/Restroom","Restroom/Kitchen"), ylab="Canberra Distance")
t.test(mat2,mat5)
##
## Welch Two Sample t-test
##
## data: mat2 and mat5
## t = -164.5, df = 29014, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.04516 -0.04410
## sample estimates:
## mean of x mean of y
## 0.9435 0.9881
boxplot(mat2,mat5, main="Kitchen/Kitchen v. Restroom/Kitchen",names=c("Kitchen/Kitchen","Restroom/Kitchen"), ylab="Canberra Distance")
The unweighted unifrac distance matrix was already imported, and re-ordered to match the map file.
Make an ordination with the unweighted unifrac distance matrix.
FJL.uni.nmds <- nmds(unifrac.dist.mat)
## initial value 32.583909
## iter 5 value 17.629171
## iter 10 value 16.602288
## iter 10 value 16.588331
## iter 10 value 16.587316
## final value 16.587316
## converged
Plot ordination, colored by different variables.
plot(FJL.uni.nmds, pch=16, col=colvec.Place_1[FJL.map$Place_1])
legend('topleft', c("cabinet","counter","cutting_board","dish_drying_pan","disposal","door","drawer","faucet","freezer","garbage_can","microwave","oven","pillowcase","refrigerator","sink","soap_dispenser","sponge","stall","stove","television","toilet","wall","water"), cex=0.5, pt.cex=1, text.width=0.2, y.intersp=0.9, col=colvec.Place_1, pch=16)
title(main="Unweighted Unifrac NMDS colored by Place_1")
plot(FJL.uni.nmds, pch=16, col=colvec.Geoloc[FJL.map$Geolocation])
legend('topleft', c("Boulder_CO_USA","RaleighDurham_NC_USA","Seoul_Korea"),cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9, col=colvec.Geoloc, pch=16)
title(main='Unweighted Unifrac NMDS colored by Geolocation')
plot(FJL.uni.nmds, pch=16, col=colvec.pID[FJL.map$phinchID])
legend('topleft',c("Flores_Kitchen","Flores_restroom","Jeon_korea","Leff_homes"), cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9,col=colvec.pID, pch=16)
title(main='Unweighted Unifrac NMDS colored by study (phinchID)')
plot(FJL.uni.nmds,pch=16,col=colvec.RF[FJL.map$Room_Function])
legend('topleft',c("home","kitchen","restroom"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.RF,pch=16)
title(main='Unweighted Unifrac NMDS colored by Room_Function')
plot(FJL.uni.nmds,pch=16,col=colvec.TR[FJL.map$Room_Function])
legend('topleft',c("V1_V2","V1_V3","V4"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.TR,pch=16)
title(main='Unweighted Unifrac NMDS colored by Target_Region')
plot(FJL.uni.nmds,pch=16,col=colvec.ST[FJL.map$Sequencing_Technology])
legend('topleft',c("454_GS_Junior","Illumina_HiSeq","Illumina_HiSeq2000","Illumina_MiSeq"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.ST,pch=16)
title(main='Unweighted Unifrac NMDS colored by Sequencing_Technology')
plot(FJL.uni.nmds,pch=16,col=colvec.BT2[FJL.map$Building_Type2])
legend('topleft',c("condo_town","family_home","low_rise_apt","university_building"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.BT2,pch=16)
title(main='Unweighted Unifrac colored by Building_Type2')
Make distance matrices between locations of interest, while removing upper triangle for self-self comparisons.
restroom.restroom.uni.dist <- as.dist(unifrac.dist.mat[restroom, restroom])
uni.RR.tri <- restroom.restroom.uni.dist[upper.tri(restroom.restroom.uni.dist)]
restroom.restroom.uni.dist <- uni.RR.tri[-which(is.na(uni.RR.tri))]
kitchen.kitchen.uni.dist <- as.dist(unifrac.dist.mat[kitchen, kitchen])
uni.KK.tri <- kitchen.kitchen.uni.dist[upper.tri(kitchen.kitchen.uni.dist)]
kitchen.kitchen.uni.dist <- uni.KK.tri[-which(is.na(uni.KK.tri))]
toilet.toilet.uni.dist <- as.dist(unifrac.dist.mat[toilet,toilet])
uni.TT.tri <- toilet.toilet.uni.dist[upper.tri(toilet.toilet.uni.dist)]
toilet.toilet.uni.dist <- uni.TT.tri[-which(is.na(uni.TT.tri))]
fridge.fridge.uni.dist <- as.dist(unifrac.dist.mat[fridge,fridge])
uni.FF.tri <- fridge.fridge.uni.dist[upper.tri(fridge.fridge.uni.dist)]
fridge.fridge.uni.dist <- uni.FF.tri[-which(is.na(uni.FF.tri))]
restroom.kitchen.uni.dist <- as.dist(unifrac.dist.mat[restroom, kitchen])
## Warning: non-square matrix
toilet.fridge.uni.dist <- as.dist(unifrac.dist.mat[toilet, fridge])
## Warning: non-square matrix
Make vector of distances for the comparisons you want to make.
mat7 <- as.vector(restroom.restroom.uni.dist)
mat8 <- as.vector(kitchen.kitchen.uni.dist)
mat9 <- as.vector(toilet.toilet.uni.dist)
mat10 <- as.vector(fridge.fridge.uni.dist)
mat11 <- as.vector(restroom.kitchen.uni.dist)
mat12 <- as.vector(toilet.fridge.uni.dist)
Run t-tests on, and make boxplot, of distance comparisons.
t.test(mat7,mat11)
##
## Welch Two Sample t-test
##
## data: mat7 and mat11
## t = -27.02, df = 3556, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.05020 -0.04341
## sample estimates:
## mean of x mean of y
## 0.7357 0.7825
boxplot(mat7,mat11, main= "Restroom/Restroom v. Restroom/Kitchen",names=c("Restroom/Restroom","Restroom/Kitchen"),ylab='Unweighted Unifrac Distance')
t.test(mat8,mat11)
##
## Welch Two Sample t-test
##
## data: mat8 and mat11
## t = -91.55, df = 29935, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.06981 -0.06688
## sample estimates:
## mean of x mean of y
## 0.7142 0.7825
boxplot(mat8,mat11, main="Kitchen/Kitchen v. Restroom/Kitchen",names=c('Kitchen/Kitchen','Restroom/Kitchen'),ylab='Unweighted Unifrac Distance')
Create a Jaccard distance matrix.
FJL.jac <- vegdist(FJL.table, 'jaccard')
FJL.jac.mat <- as.matrix(FJL.jac)
Create an ordination using jaccard.
FJL.jac.nmds <- nmds(FJL.jac)
## initial value 42.934861
## iter 5 value 28.977639
## iter 10 value 25.290675
## final value 25.022110
## converged
Plot the ordination, colored by different variables.
plot(FJL.jac.nmds, pch=16, col=colvec.Place_1[FJL.map$Place_1])
legend('topleft', c("cabinet","counter","cutting_board","dish_drying_pan","disposal","door","drawer","faucet","freezer","garbage_can","microwave","oven","pillowcase","refrigerator","sink","soap_dispenser","sponge","stall","stove","television","toilet","wall","water"), cex=0.5, pt.cex=1, text.width=0.2, y.intersp=0.9, col=colvec.Place_1, pch=16)
title(main="Jaccard NMDS colored by Place_1")
plot(FJL.jac.nmds, pch=16, col=colvec.Geoloc[FJL.map$Geolocation])
legend('topleft', c("Boulder_CO_USA","RaleighDurham_NC_USA","Seoul_Korea"),cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9, col=colvec.Geoloc, pch=16)
title(main='Jaccard NMDS colored by Geolocation')
plot(FJL.jac.nmds, pch=16, col=colvec.pID[FJL.map$phinchID])
legend('topleft',c("Flores_Kitchen","Flores_restroom","Jeon_korea","Leff_homes"), cex=0.5, pt.cex=1, text.width=0.4,y.intersp=0.9,col=colvec.pID, pch=16)
title(main='Jaccard NMDS colored by study (phinchID)')
plot(FJL.jac.nmds,pch=16,col=colvec.RF[FJL.map$Room_Function])
legend('topleft',c("home","kitchen","restroom"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.RF,pch=16)
title(main='Jaccard NMDS colored by Room_Function')
plot(FJL.jac.nmds,pch=16,col=colvec.TR[FJL.map$Room_Function])
legend('topleft',c("V1_V2","V1_V3","V4"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.TR,pch=16)
title(main='Jaccard NMDS colored by Target_Region')
plot(FJL.jac.nmds,pch=16,col=colvec.ST[FJL.map$Sequencing_Technology])
legend('topleft',c("454_GS_Junior","Illumina_HiSeq","Illumina_HiSeq2000","Illumina_MiSeq"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.ST,pch=16)
title(main='Jaccard NMDS colored by Sequencing_Technology')
plot(FJL.jac.nmds,pch=16,col=colvec.BT2[FJL.map$Building_Type2])
legend('topleft',c("condo_town","family_home","low_rise_apt","university_building"),cex=0.5,pt.cex=1, text.width=0.4, y.intersp=0.9, col=colvec.BT2,pch=16)
title(main='Jaccard NMDS colored by Building_Type2')
Make distance matrices between locations of interest, while removing upper triangle for self-self comparisons.
restroom.restroom.jac.dist <- as.dist(FJL.jac.mat[restroom, restroom])
jac.RR.tri <- restroom.restroom.jac.dist[upper.tri(restroom.restroom.jac.dist)]
restroom.restroom.jac.dist <- jac.RR.tri[-which(is.na(jac.RR.tri))]
kitchen.kitchen.jac.dist <- as.dist(FJL.jac.mat[kitchen, kitchen])
jac.KK.tri <- kitchen.kitchen.jac.dist[upper.tri(kitchen.kitchen.jac.dist)]
kitchen.kitchen.jac.dist <- jac.KK.tri[-which(is.na(jac.KK.tri))]
toilet.toilet.jac.dist <- as.dist(FJL.jac.mat[toilet,toilet])
jac.TT.tri <- toilet.toilet.jac.dist[upper.tri(toilet.toilet.jac.dist)]
toilet.toilet.jac.dist <- jac.TT.tri[-which(is.na(jac.TT.tri))]
fridge.fridge.jac.dist <- as.dist(FJL.jac.mat[fridge,fridge])
jac.FF.tri <- fridge.fridge.jac.dist[upper.tri(fridge.fridge.jac.dist)]
fridge.fridge.jac.dist <- jac.FF.tri[-which(is.na(jac.FF.tri))]
restroom.kitchen.jac.dist <- as.dist(FJL.jac.mat[restroom, kitchen])
## Warning: non-square matrix
toilet.fridge.jac.dist <- as.dist(FJL.jac.mat[toilet, fridge])
## Warning: non-square matrix
Make vector of distances for the comparisons you want to make.
mat13 <- as.vector(restroom.restroom.jac.dist)
mat14 <- as.vector(kitchen.kitchen.jac.dist)
mat15 <- as.vector(toilet.toilet.jac.dist)
mat16 <- as.vector(fridge.fridge.jac.dist)
mat17 <- as.vector(restroom.kitchen.jac.dist)
mat18 <- as.vector(toilet.fridge.jac.dist)
Run t-tests on, and make boxplot, of distance comparisons.
t.test(mat13,mat17)
##
## Welch Two Sample t-test
##
## data: mat13 and mat17
## t = -33.98, df = 2998, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.04784 -0.04262
## sample estimates:
## mean of x mean of y
## 0.9427 0.9880
boxplot(mat13,mat17, main="Restroom/Restroom v. Restroom/Kitchen",names=c('Restroom/Restroom','Restroom/Kitchen'),ylab='Jaccard Distance')
t.test(mat14,mat17)
##
## Welch Two Sample t-test
##
## data: mat14 and mat17
## t = -139.8, df = 23386, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.08032 -0.07810
## sample estimates:
## mean of x mean of y
## 0.9088 0.9880
boxplot(mat14,mat17, main="Kitchen/Kitchen v. Restroom/Kitchen",names=c('Kitchen/Kitchen','Restroom/Kitchen'),ylab='Jaccard Distance')