cep %>%
str()
## Classes 'tbl_df', 'tbl' and 'data.frame': 290 obs. of 19 variables:
## $ municipality : chr "Ale" "Alingsås" "Alvesta" "Aneby" ...
## $ region : chr "Götaland" "Götaland" "Götaland" "Götaland" ...
## $ county : chr "Västra Götalands län" "Västra Götalands län" "Kronobergs län" "Jönköpings län" ...
## $ income : chr "Middle" "Middle" "Low" "Low" ...
## $ pop.size : num 30223 40390 20026 6776 13934 ...
## $ area : num 317 472 974 518 325 ...
## $ mean.age : num 39.5 42.1 41.8 42.4 44.2 47 45.3 44.6 46.2 43.9 ...
## $ mortality : num 0.84 1.005 0.939 0.945 1.184 ...
## $ natality : num 1.18 1.05 1.31 1.45 1.08 ...
## $ pop.change : num 2.23 0.854 0.879 2.553 0.222 ...
## $ immigration : num 7.41 4.88 6.82 8.09 5.96 ...
## $ emigration : num 5.52 4.12 6.31 6.02 5.76 ...
## $ tax.capacity : num 197627 199056 174595 181317 177804 ...
## $ tax.equal : num -462 -1025 2357 -837 -885 ...
## $ unemployment : num 3.9 5.1 8.5 5.5 9.4 4.8 7.6 6.6 5.2 11.8 ...
## $ foreign.origin : num 21.8 14.6 24.3 14.8 18.2 ...
## $ higher.edu : num 21 24.2 17.2 17.7 18.9 ...
## $ greenhouse.gases: num 103773 105709 119691 50356 62066 ...
## $ dioxin.mg : num 29.5 69.2 69.7 18.5 35 ...
cep <- na.omit(cep) #remove missing values
PCA.model<-princomp(~area+mean.age+mortality+natality+pop.change+
immigration+emigration+tax.capacity+unemployment+foreign.origin+
higher.edu+greenhouse.gases+dioxin.mg, cor=TRUE, data=cep, scores=TRUE)
#the number of PCs to show can be set with ncps.
pca.scatter <- screeplot(PCA.model, npcs = 13, main = "Importance of components")

cep_pc <<- within(cep, {
PC3 <- PCA.model$scores[,3]
PC2 <- PCA.model$scores[,2]
PC1 <- PCA.model$scores[,1] })
autoplot(PCA.model, data = cep, colour = 'income', addEllipses=TRUE, ellipse.level=0.95) +
theme_light() + scale_colour_discrete("income")

autoplot(PCA.model, data = cep, colour = 'region') +
theme_light() + scale_colour_discrete("region")

autoplot(PCA.model, data = cep, colour = 'county') +
theme_light() + scale_colour_discrete("county")

p3<-autoplot(PCA.model, data = cep, colour = 'income', loadings = TRUE,
loadings.colour = 'blue', loadings.label = TRUE, loadings.label.size = 3)
p3 + theme_light() +
scale_colour_discrete("income") +
geom_text(label=cep$municipality, vjust = -1, size=2)

p4<-ggplot(cep_pc) +
aes(x = mean.age, y = PC1, colour = income) +
geom_point(size = 2L) +
scale_color_hue() +
xlab('Mean Age')+
theme_light() +
theme(legend.position ="none")
p5<-ggplot(cep_pc) +
aes(x = emigration, y = PC2, colour = income) +
geom_point(size = 2L) +
scale_color_hue() +
xlab('Emigration')+
theme_light() +
theme(legend.position ="none")
grid.arrange(p4,p5, ncol=2)

df<-subset(cep, select = -c(county,income,region,municipality,pop.size,tax.equal))
PCA.model2<-PCA(df, graph=FALSE)
get_eig(PCA.model2)
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 4.5061448557 34.662652736 34.66265
## Dim.2 2.5582835242 19.679104032 54.34176
## Dim.3 1.8160902012 13.969924624 68.31168
## Dim.4 1.1538428253 8.875714041 77.18740
## Dim.5 0.8077968685 6.213822065 83.40122
## Dim.6 0.6016893918 4.628379937 88.02960
## Dim.7 0.4079067353 3.137744118 91.16734
## Dim.8 0.3603945414 2.772265703 93.93961
## Dim.9 0.2905023827 2.234633713 96.17424
## Dim.10 0.2409524099 1.853480076 98.02772
## Dim.11 0.1597052843 1.228502187 99.25622
## Dim.12 0.0962585005 0.740450003 99.99667
## Dim.13 0.0004324794 0.003326765 100.00000
fviz_screeplot(PCA.model2, addlabels = TRUE, main = 'Importance of principal components', xlab = 'Component')

fviz_contrib(PCA.model2, choice = "var", axes = 1)

fviz_contrib(PCA.model2, choice = "var", axes = 2)

fviz_pca_var(PCA.model2, col.var="contrib",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE
)

fviz_pca_biplot(PCA.model, label="var", habillage=cep$income, addEllipses=TRUE, ellipse.level=0.95)

otu_table(enter)[1:10,1:9]
## OTU Table: [10 taxa and 9 samples]
## taxa are rows
## AM.AD.1 AM.AD.2 DA.AD.1 DA.AD.2 DA.AD.3 DA.AD.4
## -1 0.4265035 0.33226194 0.533604 0.63470800 0.77980981 0.76409898
## Bacteria 0.0000000 0.00000000 0.000000 0.00001328 0.00000000 0.00000000
## Prosthecochloris 0.0000000 0.00000000 0.000000 0.00000000 0.00000000 0.00000000
## Chloroflexus 0.0000000 0.00000000 0.000000 0.00000000 0.00000000 0.00000000
## Dehalococcoides 0.0000000 0.00026835 0.000000 0.00002030 0.00000000 0.00008728
## Thermus 0.0000000 0.00000000 0.000000 0.00000000 0.00000000 0.00000000
## Listeria 0.0000000 0.00000000 0.000000 0.00000000 0.00000000 0.00000000
## Fusobacterium 0.0000000 0.00001508 0.000000 0.00000852 0.00000000 0.00000000
## Streptobacillus 0.0000000 0.00000000 0.000000 0.00000000 0.00005138 0.00000000
## Brevibacillus 0.0000000 0.00000000 0.000000 0.00000237 0.00000000 0.00000000
## ES.AD.1 ES.AD.2 ES.AD.3
## -1 0.25166833 0.4271269 0.3912698
## Bacteria 0.00000000 0.0000000 0.0000000
## Prosthecochloris 0.00000000 0.0000000 0.0000000
## Chloroflexus 0.00000000 0.0000000 0.0000000
## Dehalococcoides 0.00004637 0.0000000 0.0000000
## Thermus 0.00000000 0.0000000 0.0000000
## Listeria 0.00000000 0.0000000 0.0000000
## Fusobacterium 0.00000000 0.0000000 0.0000000
## Streptobacillus 0.00000000 0.0000000 0.0000000
## Brevibacillus 0.00000000 0.0000000 0.0000000
apply(otu_table(enter),2,sum)
## AM.AD.1 AM.AD.2 DA.AD.1 DA.AD.2 DA.AD.3 DA.AD.4 ES.AD.1 ES.AD.2
## 0.9999999 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000
## ES.AD.3 ES.AD.4 FR.AD.1 FR.AD.2 FR.AD.3 FR.AD.4 FR.AD.5 FR.AD.6
## 1.0000000 1.0000000 1.0000000 1.0000000 0.9999999 1.0000000 1.0000000 1.0000000
## FR.AD.7 FR.AD.8 IT.AD.1 IT.AD.2 IT.AD.3 IT.AD.4 IT.AD.5 IT.AD.6
## 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000001 0.9999999
## JP.AD.1 JP.AD.2 JP.AD.3 JP.AD.4 JP.AD.5 JP.AD.6 JP.AD.7 JP.AD.8
## 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000
## JP.AD.9
## 1.0000000
newsampledata=sample_data(sventer)
newsampledata$Enterotype=addNA(newsampledata$Enterotype)
sample_data(enter)=newsampledata
table(sample_data(enter)$Enterotype)
##
## 1 2 3 <NA>
## 8 6 17 2
levels(sample_data(enter)$Enterotype) =c("1","2","3","4")
table(sample_data(enter)$Enterotype)
##
## 1 2 3 4
## 8 6 17 2
data = as(otu_table(enter), "matrix")
data = data[-1, ] ####Take out the data missing an otu classification.
myTaxa <- names(sort(taxa_sums(enter), decreasing = TRUE)[1:1000])
GP3 <- prune_taxa(myTaxa, enter)
GP3
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 553 taxa and 33 samples ]
## sample_data() Sample Data: [ 33 samples by 9 sample variables ]
## tax_table() Taxonomy Table: [ 553 taxa by 1 taxonomic ranks ]
require(ggplot2)
#Bray-Curtis
nmds.bray <- ordinate(GP3, method="NMDS", distance="bray")
## Run 0 stress 0.1508618
## Run 1 stress 0.1735976
## Run 2 stress 0.1530483
## Run 3 stress 0.155411
## Run 4 stress 0.1530476
## Run 5 stress 0.1925618
## Run 6 stress 0.1508605
## ... New best solution
## ... Procrustes: rmse 0.0006311593 max resid 0.0024577
## ... Similar to previous best
## Run 7 stress 0.1504053
## ... New best solution
## ... Procrustes: rmse 0.03488855 max resid 0.1655918
## Run 8 stress 0.1584142
## Run 9 stress 0.1634679
## Run 10 stress 0.1859829
## Run 11 stress 0.1715057
## Run 12 stress 0.1514534
## Run 13 stress 0.1937191
## Run 14 stress 0.1810915
## Run 15 stress 0.1541248
## Run 16 stress 0.1579322
## Run 17 stress 0.1508605
## ... Procrustes: rmse 0.03492331 max resid 0.1663783
## Run 18 stress 0.1551209
## Run 19 stress 0.1530478
## Run 20 stress 0.1512936
## *** No convergence -- monoMDS stopping criteria:
## 20: stress ratio > sratmax
print(nmds.bray)
##
## Call:
## metaMDS(comm = veganifyOTU(physeq), distance = distance)
##
## global Multidimensional Scaling using monoMDS
##
## Data: veganifyOTU(physeq)
## Distance: bray
##
## Dimensions: 2
## Stress: 0.1504053
## Stress type 1, weak ties
## No convergent solutions - best solution after 20 tries
## Scaling: centring, PC rotation, halfchange scaling
## Species: expanded scores based on 'veganifyOTU(physeq)'
# Create plot, store as temp variable, p
p <- plot_ordination(GP3, nmds.bray, color = "Enterotype", shape = "Nationality")
# Add title to each plot
p <- p + ggtitle("Bray Curtis Distance, NonMetric MDS")
p + geom_point(size = 4)+ theme_bw () + scale_colour_brewer(type="qual", palette="Set1")

veganotu = function(physeq) {
require("vegan")
OTU = otu_table(physeq)
if (taxa_are_rows(OTU)) {
OTU = t(OTU)
}
return(as(OTU, "matrix"))
}
#export data from phyloseq to vegan-compatible object
GP3_vegan<-veganotu(GP3)
# make a data frame that can be used in vegan from the sample_data in the phyloseq object
sampledf <- data.frame(sample_data(GP3))
#create a dissimilarity matrix similar to the one used in the analysis in phyloseq specify the same transformation and standardisation as used earlier.
GP3_BC<-vegdist((GP3_vegan), method = "bray")
# testing for homogeneity of group dispersions
betadisp_GP3<-betadisper(GP3_BC, sampledf$Nationality)
betadisp_GP3
##
## Homogeneity of multivariate dispersions
##
## Call: betadisper(d = GP3_BC, group = sampledf$Nationality)
##
## No. of Positive Eigenvalues: 21
## No. of Negative Eigenvalues: 11
##
## Average distance to median:
## american danish french italian japanese spanish
## 0.1770 0.1596 0.1631 0.2161 0.2269 0.2249
##
## Eigenvalues for PCoA axes:
## (Showing 8 of 32 eigenvalues)
## PCoA1 PCoA2 PCoA3 PCoA4 PCoA5 PCoA6 PCoA7 PCoA8
## 0.94156 0.40317 0.33686 0.20205 0.14594 0.13778 0.08504 0.06803
boxplot(betadisp_GP3, xlab="", las=2, cex.axis=0.8)

anova(betadisp_GP3)
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 5 0.028893 0.0057786 0.7874 0.5679
## Residuals 27 0.198156 0.0073391
adonis(GP3_BC ~ Nationality, data = sampledf)
##
## Call:
## adonis(formula = GP3_BC ~ Nationality, data = sampledf)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Nationality 5 0.91239 0.182479 3.3139 0.3803 0.001 ***
## Residuals 27 1.48675 0.055065 0.6197
## Total 32 2.39914 1.0000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
GP3_mds <- metaMDS(venter, trace = FALSE)
GP3_mds
##
## Call:
## metaMDS(comm = venter, trace = FALSE)
##
## global Multidimensional Scaling using monoMDS
##
## Data: venter
## Distance: bray
##
## Dimensions: 2
## Stress: 0.1490281
## Stress type 1, weak ties
## Two convergent solutions found after 20 tries
## Scaling: centring, PC rotation, halfchange scaling
## Species: expanded scores based on 'venter'
# Build a color palette, 9 (number of sample types) colours coming from the Set1 color palette (which was used in the original NMDS above)
colpal <- brewer.pal(9, "Set2")
## Warning in brewer.pal(9, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
plot(GP3_mds,type="n", dis="sp") #ordiplot can also be used.
with(sampledf, points(GP3_mds, display = "species", col = colpal[Nationality],
pch = 16, cex=2))
text(sampledf, cex = 0.65, col = "black",pos=1)
with(sampledf, legend(2.4, 1.5, legend = levels(Nationality), bty = "n",
col = colpal, pch = 16, cex = 1))
text(2.4, -1.5, "Stress = 0.149", cex=1)
