#set up environment
setwd("~/Desktop")
library(ggplot2)
library(broom)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
df=read.table(text="Gut Shannon index Skin Shannon index Tongue Shannon index Age BMI height
2.738 2.339 2.923 34 26.071 163.9
2.008 2.414 2.753 41 21.327 162.0
1.995 2.519 2.382 48 22.068 162.1
2.189 2.546 2.011 44 23.410 156.0
2.209 2.592 2.278 40 21.403 164.6
2.162 2.717 2.602 42 24.496 149.8
2.010 2.752 2.617 54 19.644 179.1
2.476 2.756 2.316 57 21.857 164.3
1.776 2.780 2.642 24 21.712 164.8
1.702 2.790 2.171 44 20.947 162.0
2.307 2.812 2.556 36 22.113 159.1
2.053 2.832 2.236 58 25.020 158.7
1.963 2.839 2.481 51 23.414 160.9
2.076 2.840 2.176 36 25.066 157.3
2.399 2.846 2.699 54 25.238 150.6
2.192 2.869 2.549 53 23.389 169.3
2.013 2.896 2.567 47 20.818 155.0
1.833 2.906 2.337 41 22.495 160.6
1.731 2.937 2.152 34 22.758 161.0
1.848 2.938 2.087 47 21.540 165.5
2.037 2.961 2.625 43 23.774 150.7
1.950 2.968 2.534 43 24.323 158.4
2.376 2.993 2.596 41 20.124 163.8
1.687 3.000 2.288 41 20.183 166.6
1.568 3.020 2.563 54 25.932 152.1
2.370 3.031 2.836 52 26.563 155.2
2.073 3.074 2.169 25 21.702 156.3
1.822 3.085 2.824 49 22.061 171.6
1.918 3.110 2.673 53 24.391 160.7
2.224 3.115 2.747 50 21.381 161.8
1.551 3.158 2.335 49 21.636 158.0
2.266 3.162 1.947 65 25.414 151.1
1.711 3.169 2.518 36 23.406 161.4
2.283 3.183 2.664 43 23.068 162.6
2.034 3.188 2.706 63 19.895 173.7
1.850 3.189 2.381 37 22.052 153.6
2.229 3.190 2.440 53 24.018 160.1
2.526 3.214 2.603 53 27.695 152.8
2.834 3.284 2.415 45 21.875 165.6
2.246 3.287 2.447 46 22.189 160.3
1.555 3.289 2.201 47 22.344 149.6
1.756 3.295 2.477 52 22.762 165.0
2.272 3.323 2.310 61 26.946 156.5
2.265 3.336 2.502 62 25.025 163.6
2.360 3.438 2.639 61 25.560 166.4
2.231 3.459 2.410 38 21.361 159.0
2.147 3.673 2.459 56 24.304 158.4
2.481 3.752 2.710 60 26.596 159.9
2.428 3.821 2.745 65 26.260 157.3
2.768 3.870 2.696 57 29.624 150.1",sep="\t",header=T)
colnames(df)=c("gut_Shannon_index","skin_Shannon_index","tongue_Shannon_index","age","BMI","height")
df=as.data.frame(df)
df$gut_Shannon_index=as.numeric(df$gut_Shannon_index)
df$skin_Shannon_index=as.numeric(df$skin_Shannon_index)
df$tongue_Shannon_index=as.numeric(df$tongue_Shannon_index)
df$age=as.numeric(df$age)
df$BMI=as.numeric(df$BMI)
df$height=as.numeric(df$height)
df$participant=1:50
df$participant=as.character(df$participant)
#shapiro test
shapiro_gut=tidy(shapiro.test(df$gut_Shannon_index))
shapiro_skin=tidy(shapiro.test(df$skin_Shannon_index))
shapiro_tongue=tidy(shapiro.test(df$tongue_Shannon_index))
shapiro_Shannon=rbind(shapiro_gut,shapiro_skin,shapiro_tongue)
shapiro_Shannon$body_site=c("gut","skin","tongue")
#ANOVA by body site
body_site=data.frame(c(rep("gut",50),rep("skin",50),rep("tongue",50)))
body_site_df=cbind(body_site,stack(df[1:3]))
colnames(body_site_df)=c("body_site","Shannon_index","ID")
model=lm(Shannon_index~body_site,data=body_site_df)
one_way_anova=tidy(anova(model))
#Shannon_means and ANOVA
mean_gut=tibble(mean(df$gut_Shannon_index))
mean_skin=tibble(mean(df$skin_Shannon_index))
mean_tongue=tibble(mean(df$tongue_Shannon_index))
mean_Shannon=cbind(mean_gut,mean_skin,mean_tongue,one_way_anova$p.value)
colnames(mean_Shannon)=c("gut","skin","tongue","p.value.anova")
#Correlation to phenotypes
#age
gut_age_pearson=tidy(cor.test(df$gut_Shannon_index,df$age,method="pearson"))
skin_age_pearson=tidy(cor.test(df$skin_Shannon_index,df$age,method="pearson"))
tongue_age_pearson=tidy(cor.test(df$tongue_Shannon_index,df$age,method="pearson"))
age_pearson=rbind(gut_age_pearson,skin_age_pearson,tongue_age_pearson)
age_pearson$body_site=c("gut","skin","tongue")
age_pearson$phenotype=rep("age",3)
colnames(age_pearson)=c("R","statistic","p.value","parameter","conf.low","conf.high","method","alternative","body_site","phenotype")
plot(df$gut_Shannon_index~df$age,ylim=c(0,4),xlim=c(20,70))

dev.copy(png,"Q4_gut_age.png")
## quartz_off_screen
## 3
dev.off()
## quartz_off_screen
## 2
plot(df$skin_Shannon_index~df$age,ylim=c(0,4),xlim=c(20,70))

dev.copy(png,"Q4_skin_age.png")
## quartz_off_screen
## 3
dev.off()
## quartz_off_screen
## 2
plot(df$tongue_Shannon_index~df$age,ylim=c(0,4),xlim=c(20,70))

dev.copy(png,"Q4_tongue_age.png")
## quartz_off_screen
## 3
dev.off()
## quartz_off_screen
## 2
#BMI
gut_BMI_pearson=tidy(cor.test(df$gut_Shannon_index,df$BMI,method="pearson"))
skin_BMI_pearson=tidy(cor.test(df$skin_Shannon_index,df$BMI,method="pearson"))
tongue_BMI_pearson=tidy(cor.test(df$tongue_Shannon_index,df$BMI,method="pearson"))
BMI_pearson=rbind(gut_BMI_pearson,skin_BMI_pearson,tongue_BMI_pearson)
BMI_pearson$body_site=c("gut","skin","tongue")
BMI_pearson$phenotype=rep("BMI",3)
colnames(BMI_pearson)=c("R","statistic","p.value","parameter","conf.low","conf.high","method","alternative","body_site","phenotype")
plot(df$gut_Shannon_index~df$BMI,ylim=c(0,4),xlim=c(15,35))

dev.copy(png,"Q4_gut_BMI.png")
## quartz_off_screen
## 3
dev.off()
## quartz_off_screen
## 2
plot(df$skin_Shannon_index~df$BMI,ylim=c(0,4),xlim=c(15,35))

dev.copy(png,"Q4_skin_BMI.png")
## quartz_off_screen
## 3
dev.off()
## quartz_off_screen
## 2
plot(df$tongue_Shannon_index~df$BMI,ylim=c(0,4),xlim=c(15,35))

dev.copy(png,"Q4_tongue_BMI.png")
## quartz_off_screen
## 3
dev.off()
## quartz_off_screen
## 2
#height
gut_height_pearson=tidy(cor.test(df$gut_Shannon_index,df$height,method="pearson"))
skin_height_pearson=tidy(cor.test(df$skin_Shannon_index,df$height,method="pearson"))
tongue_height_pearson=tidy(cor.test(df$tongue_Shannon_index,df$height,method="pearson"))
height_pearson=rbind(gut_height_pearson,skin_height_pearson,tongue_height_pearson)
height_pearson$body_site=c("gut","skin","tongue")
height_pearson$phenotype=rep("height",3)
colnames(height_pearson)=c("R","statistic","p.value","parameter","conf.low","conf.high","method","alternative","body_site","phenotype")
plot(df$gut_Shannon_index~df$height,ylim=c(0,4),xlim=c(100,200))

dev.copy(png,"Q4_gut_height.png")
## quartz_off_screen
## 3
dev.off()
## quartz_off_screen
## 2
plot(df$skin_Shannon_index~df$height,ylim=c(0,4),xlim=c(100,200))

dev.copy(png,"Q4_skin_height.png")
## quartz_off_screen
## 3
dev.off()
## quartz_off_screen
## 2
plot(df$tongue_Shannon_index~df$height,ylim=c(0,4),xlim=c(100,200))

dev.copy(png,"Q4_tongue_height.png")
## quartz_off_screen
## 3
dev.off()
## quartz_off_screen
## 2
#concatenate pearson
pearson_correlation=rbind(age_pearson,BMI_pearson,height_pearson)
#export restuls
Q4_shapiro.csv=write.csv(shapiro_Shannon,"~/Desktop/Q4.shapiro.shannon.csv")
Q4_pearson_correlation.csv=write.csv(pearson_correlation,"~/Desktop/Q4.pearson.correlation.csv")
Q4_shannon.csv=write.csv(mean_Shannon,"~/Desktop/Q4.shannon.csv")
#plot significant correlations
significant_correlation=subset(pearson_correlation,p.value <= 0.05)
print(significant_correlation[,9:10])
## # A tibble: 3 x 2
## body_site phenotype
## <chr> <chr>
## 1 skin age
## 2 gut BMI
## 3 skin BMI
#skin vs age
g=ggplot(df,aes(x=age,y=skin_Shannon_index,color=skin_Shannon_index))+geom_point(aes(color=skin_Shannon_index),size=3)+geom_point(shape=1,size=3,colour="black")+geom_smooth(method="lm",color="black")+labs(title="skin microbial diversity vs age",x="age",y="Shannon index",color="Shannon index")+theme(plot.title=element_text(hjust=0.5),text=element_text(size=18,face="bold"))
g+scale_color_gradientn(colors=rainbow(5),limits=c(1,4))
## `geom_smooth()` using formula 'y ~ x'

dev.copy(png,"Q4_skin_vs_age_significant.png")
## quartz_off_screen
## 3
dev.off()
## quartz_off_screen
## 2
#gut vs BMI
g=ggplot(df,aes(x=BMI,y=gut_Shannon_index,color=gut_Shannon_index))+geom_point(aes(color=gut_Shannon_index),size=3)+geom_point(shape=1,size=3,colour="black")+geom_smooth(method="lm",color="black")+labs(title="gut microbial diversity vs BMI",x="BMI",y="Shannon index",color="Shannon index")+theme(plot.title=element_text(hjust=0.5),text=element_text(size=18,face="bold"))
g+scale_color_gradientn(colors=rainbow(5),limits=c(1,4))
## `geom_smooth()` using formula 'y ~ x'

dev.copy(png,"Q4_gut_vs_BMI_significant.png")
## quartz_off_screen
## 3
dev.off()
## quartz_off_screen
## 2
#skin vs BMI
g=ggplot(df,aes(x=BMI,y=skin_Shannon_index,color=skin_Shannon_index))+geom_point(aes(color=skin_Shannon_index),size=3)+geom_point(shape=1,size=3,colour="black")+geom_smooth(method="lm",color="black")+labs(title="skin microbial diversity vs BMI",x="BMI",y="Shannon index",color="Shannon index")+theme(plot.title=element_text(hjust=0.5),text=element_text(size=18,face="bold"))
g+scale_color_gradientn(colors=rainbow(5),limits=c(1,4))
## `geom_smooth()` using formula 'y ~ x'

dev.copy(png,"Q4_skin_vs_BMI_significant.png")
## quartz_off_screen
## 3
dev.off()
## quartz_off_screen
## 2
#plot overall Shannon
p=ggplot(body_site_df,aes(x=body_site,y=Shannon_index,color=Shannon_index))
p1 =geom_boxplot(aes(x=body_site,y=Shannon_index,fill=NULL),outlier.shape = NULL,outlier.color = NA)
p2 =geom_jitter(aes(color=Shannon_index),size=3,position=position_jitter(width=0.1,seed=1))
p3=geom_jitter(shape=1,size=3,colour="black",position=position_jitter(width=0.1,seed=1))
p+p1+p2+p3+scale_color_gradientn(colors=rainbow(5),limits=c(1.5,4))+labs(title="microbial diversity by body site",x="body site",y="Shannon index",color="Shannon index")+theme(plot.title=element_text(hjust=0.5),text=element_text(size=18,face="bold"))

dev.copy(png,"Q4_Shannon.png")
## quartz_off_screen
## 3
dev.off()
## quartz_off_screen
## 2