#set up environment
setwd("~/Desktop")
library(ggplot2)
library(broom)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
df=read.table(text="Gut Shannon index   Skin Shannon index  Tongue Shannon index    Age BMI height
2.738   2.339   2.923   34  26.071  163.9
2.008   2.414   2.753   41  21.327  162.0
1.995   2.519   2.382   48  22.068  162.1
2.189   2.546   2.011   44  23.410  156.0
2.209   2.592   2.278   40  21.403  164.6
2.162   2.717   2.602   42  24.496  149.8
2.010   2.752   2.617   54  19.644  179.1
2.476   2.756   2.316   57  21.857  164.3
1.776   2.780   2.642   24  21.712  164.8
1.702   2.790   2.171   44  20.947  162.0
2.307   2.812   2.556   36  22.113  159.1
2.053   2.832   2.236   58  25.020  158.7
1.963   2.839   2.481   51  23.414  160.9
2.076   2.840   2.176   36  25.066  157.3
2.399   2.846   2.699   54  25.238  150.6
2.192   2.869   2.549   53  23.389  169.3
2.013   2.896   2.567   47  20.818  155.0
1.833   2.906   2.337   41  22.495  160.6
1.731   2.937   2.152   34  22.758  161.0
1.848   2.938   2.087   47  21.540  165.5
2.037   2.961   2.625   43  23.774  150.7
1.950   2.968   2.534   43  24.323  158.4
2.376   2.993   2.596   41  20.124  163.8
1.687   3.000   2.288   41  20.183  166.6
1.568   3.020   2.563   54  25.932  152.1
2.370   3.031   2.836   52  26.563  155.2
2.073   3.074   2.169   25  21.702  156.3
1.822   3.085   2.824   49  22.061  171.6
1.918   3.110   2.673   53  24.391  160.7
2.224   3.115   2.747   50  21.381  161.8
1.551   3.158   2.335   49  21.636  158.0
2.266   3.162   1.947   65  25.414  151.1
1.711   3.169   2.518   36  23.406  161.4
2.283   3.183   2.664   43  23.068  162.6
2.034   3.188   2.706   63  19.895  173.7
1.850   3.189   2.381   37  22.052  153.6
2.229   3.190   2.440   53  24.018  160.1
2.526   3.214   2.603   53  27.695  152.8
2.834   3.284   2.415   45  21.875  165.6
2.246   3.287   2.447   46  22.189  160.3
1.555   3.289   2.201   47  22.344  149.6
1.756   3.295   2.477   52  22.762  165.0
2.272   3.323   2.310   61  26.946  156.5
2.265   3.336   2.502   62  25.025  163.6
2.360   3.438   2.639   61  25.560  166.4
2.231   3.459   2.410   38  21.361  159.0
2.147   3.673   2.459   56  24.304  158.4
2.481   3.752   2.710   60  26.596  159.9
2.428   3.821   2.745   65  26.260  157.3
2.768   3.870   2.696   57  29.624  150.1",sep="\t",header=T)
colnames(df)=c("gut_Shannon_index","skin_Shannon_index","tongue_Shannon_index","age","BMI","height")
df=as.data.frame(df)
df$gut_Shannon_index=as.numeric(df$gut_Shannon_index)
df$skin_Shannon_index=as.numeric(df$skin_Shannon_index)
df$tongue_Shannon_index=as.numeric(df$tongue_Shannon_index)
df$age=as.numeric(df$age)
df$BMI=as.numeric(df$BMI)
df$height=as.numeric(df$height)
df$participant=1:50
df$participant=as.character(df$participant)
#shapiro test
shapiro_gut=tidy(shapiro.test(df$gut_Shannon_index))
shapiro_skin=tidy(shapiro.test(df$skin_Shannon_index))
shapiro_tongue=tidy(shapiro.test(df$tongue_Shannon_index))
shapiro_Shannon=rbind(shapiro_gut,shapiro_skin,shapiro_tongue)
shapiro_Shannon$body_site=c("gut","skin","tongue")
#ANOVA by body site
body_site=data.frame(c(rep("gut",50),rep("skin",50),rep("tongue",50)))
body_site_df=cbind(body_site,stack(df[1:3]))
colnames(body_site_df)=c("body_site","Shannon_index","ID")
model=lm(Shannon_index~body_site,data=body_site_df)
one_way_anova=tidy(anova(model))
#Shannon_means and ANOVA
mean_gut=tibble(mean(df$gut_Shannon_index))
mean_skin=tibble(mean(df$skin_Shannon_index))
mean_tongue=tibble(mean(df$tongue_Shannon_index))
mean_Shannon=cbind(mean_gut,mean_skin,mean_tongue,one_way_anova$p.value)
colnames(mean_Shannon)=c("gut","skin","tongue","p.value.anova")
#Correlation to phenotypes
#age
gut_age_pearson=tidy(cor.test(df$gut_Shannon_index,df$age,method="pearson"))
skin_age_pearson=tidy(cor.test(df$skin_Shannon_index,df$age,method="pearson"))
tongue_age_pearson=tidy(cor.test(df$tongue_Shannon_index,df$age,method="pearson"))
age_pearson=rbind(gut_age_pearson,skin_age_pearson,tongue_age_pearson)
age_pearson$body_site=c("gut","skin","tongue")
age_pearson$phenotype=rep("age",3)
colnames(age_pearson)=c("R","statistic","p.value","parameter","conf.low","conf.high","method","alternative","body_site","phenotype")
plot(df$gut_Shannon_index~df$age,ylim=c(0,4),xlim=c(20,70))

dev.copy(png,"Q4_gut_age.png")
## quartz_off_screen 
##                 3
dev.off()
## quartz_off_screen 
##                 2
plot(df$skin_Shannon_index~df$age,ylim=c(0,4),xlim=c(20,70))

dev.copy(png,"Q4_skin_age.png")
## quartz_off_screen 
##                 3
dev.off()
## quartz_off_screen 
##                 2
plot(df$tongue_Shannon_index~df$age,ylim=c(0,4),xlim=c(20,70))

dev.copy(png,"Q4_tongue_age.png")
## quartz_off_screen 
##                 3
dev.off()
## quartz_off_screen 
##                 2
#BMI
gut_BMI_pearson=tidy(cor.test(df$gut_Shannon_index,df$BMI,method="pearson"))
skin_BMI_pearson=tidy(cor.test(df$skin_Shannon_index,df$BMI,method="pearson"))
tongue_BMI_pearson=tidy(cor.test(df$tongue_Shannon_index,df$BMI,method="pearson"))
BMI_pearson=rbind(gut_BMI_pearson,skin_BMI_pearson,tongue_BMI_pearson)
BMI_pearson$body_site=c("gut","skin","tongue")
BMI_pearson$phenotype=rep("BMI",3)
colnames(BMI_pearson)=c("R","statistic","p.value","parameter","conf.low","conf.high","method","alternative","body_site","phenotype")
plot(df$gut_Shannon_index~df$BMI,ylim=c(0,4),xlim=c(15,35))

dev.copy(png,"Q4_gut_BMI.png")
## quartz_off_screen 
##                 3
dev.off()
## quartz_off_screen 
##                 2
plot(df$skin_Shannon_index~df$BMI,ylim=c(0,4),xlim=c(15,35))

dev.copy(png,"Q4_skin_BMI.png")
## quartz_off_screen 
##                 3
dev.off()
## quartz_off_screen 
##                 2
plot(df$tongue_Shannon_index~df$BMI,ylim=c(0,4),xlim=c(15,35))

dev.copy(png,"Q4_tongue_BMI.png")
## quartz_off_screen 
##                 3
dev.off()
## quartz_off_screen 
##                 2
#height
gut_height_pearson=tidy(cor.test(df$gut_Shannon_index,df$height,method="pearson"))
skin_height_pearson=tidy(cor.test(df$skin_Shannon_index,df$height,method="pearson"))
tongue_height_pearson=tidy(cor.test(df$tongue_Shannon_index,df$height,method="pearson"))
height_pearson=rbind(gut_height_pearson,skin_height_pearson,tongue_height_pearson)
height_pearson$body_site=c("gut","skin","tongue")
height_pearson$phenotype=rep("height",3)
colnames(height_pearson)=c("R","statistic","p.value","parameter","conf.low","conf.high","method","alternative","body_site","phenotype")
plot(df$gut_Shannon_index~df$height,ylim=c(0,4),xlim=c(100,200))

dev.copy(png,"Q4_gut_height.png")
## quartz_off_screen 
##                 3
dev.off()
## quartz_off_screen 
##                 2
plot(df$skin_Shannon_index~df$height,ylim=c(0,4),xlim=c(100,200))

dev.copy(png,"Q4_skin_height.png")
## quartz_off_screen 
##                 3
dev.off()
## quartz_off_screen 
##                 2
plot(df$tongue_Shannon_index~df$height,ylim=c(0,4),xlim=c(100,200))

dev.copy(png,"Q4_tongue_height.png")
## quartz_off_screen 
##                 3
dev.off()
## quartz_off_screen 
##                 2
#concatenate pearson
pearson_correlation=rbind(age_pearson,BMI_pearson,height_pearson)
#export restuls
Q4_shapiro.csv=write.csv(shapiro_Shannon,"~/Desktop/Q4.shapiro.shannon.csv")
Q4_pearson_correlation.csv=write.csv(pearson_correlation,"~/Desktop/Q4.pearson.correlation.csv")
Q4_shannon.csv=write.csv(mean_Shannon,"~/Desktop/Q4.shannon.csv")
#plot significant correlations
significant_correlation=subset(pearson_correlation,p.value <= 0.05)
print(significant_correlation[,9:10])
## # A tibble: 3 x 2
##   body_site phenotype
##   <chr>     <chr>    
## 1 skin      age      
## 2 gut       BMI      
## 3 skin      BMI
#skin vs age
g=ggplot(df,aes(x=age,y=skin_Shannon_index,color=skin_Shannon_index))+geom_point(aes(color=skin_Shannon_index),size=3)+geom_point(shape=1,size=3,colour="black")+geom_smooth(method="lm",color="black")+labs(title="skin microbial diversity vs age",x="age",y="Shannon index",color="Shannon index")+theme(plot.title=element_text(hjust=0.5),text=element_text(size=18,face="bold"))
g+scale_color_gradientn(colors=rainbow(5),limits=c(1,4))
## `geom_smooth()` using formula 'y ~ x'

dev.copy(png,"Q4_skin_vs_age_significant.png")
## quartz_off_screen 
##                 3
dev.off()
## quartz_off_screen 
##                 2
#gut vs BMI
g=ggplot(df,aes(x=BMI,y=gut_Shannon_index,color=gut_Shannon_index))+geom_point(aes(color=gut_Shannon_index),size=3)+geom_point(shape=1,size=3,colour="black")+geom_smooth(method="lm",color="black")+labs(title="gut microbial diversity vs BMI",x="BMI",y="Shannon index",color="Shannon index")+theme(plot.title=element_text(hjust=0.5),text=element_text(size=18,face="bold"))
g+scale_color_gradientn(colors=rainbow(5),limits=c(1,4))
## `geom_smooth()` using formula 'y ~ x'

dev.copy(png,"Q4_gut_vs_BMI_significant.png")
## quartz_off_screen 
##                 3
dev.off()
## quartz_off_screen 
##                 2
#skin vs BMI
g=ggplot(df,aes(x=BMI,y=skin_Shannon_index,color=skin_Shannon_index))+geom_point(aes(color=skin_Shannon_index),size=3)+geom_point(shape=1,size=3,colour="black")+geom_smooth(method="lm",color="black")+labs(title="skin microbial diversity vs BMI",x="BMI",y="Shannon index",color="Shannon index")+theme(plot.title=element_text(hjust=0.5),text=element_text(size=18,face="bold"))
g+scale_color_gradientn(colors=rainbow(5),limits=c(1,4))
## `geom_smooth()` using formula 'y ~ x'

dev.copy(png,"Q4_skin_vs_BMI_significant.png")
## quartz_off_screen 
##                 3
dev.off()
## quartz_off_screen 
##                 2
#plot overall Shannon
p=ggplot(body_site_df,aes(x=body_site,y=Shannon_index,color=Shannon_index))
p1 =geom_boxplot(aes(x=body_site,y=Shannon_index,fill=NULL),outlier.shape = NULL,outlier.color = NA)
p2 =geom_jitter(aes(color=Shannon_index),size=3,position=position_jitter(width=0.1,seed=1))
p3=geom_jitter(shape=1,size=3,colour="black",position=position_jitter(width=0.1,seed=1))
p+p1+p2+p3+scale_color_gradientn(colors=rainbow(5),limits=c(1.5,4))+labs(title="microbial diversity by body site",x="body site",y="Shannon index",color="Shannon index")+theme(plot.title=element_text(hjust=0.5),text=element_text(size=18,face="bold"))

dev.copy(png,"Q4_Shannon.png")
## quartz_off_screen 
##                 3
dev.off()
## quartz_off_screen 
##                 2