#set up environment
setwd("~/Desktop")
library(ggplot2)
library(broom)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ tibble 3.1.1 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ✓ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following object is masked from 'package:purrr':
##
## compact
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
#import data
df=read.table(text="Protein1 Protein2 Protein3 Protein4 Protein5
val sub-type val sub-type val sub-type val sub-type val sub-type
57.482 1 103.396 1 98.606 1 9.165 1 88.199 1
119.005 1 117.412 1 61.271 1 10.205 1 58.784 1
73.110 1 96.305 1 113.288 1 9.629 1 53.632 1
53.632 1 107.128 1 80.371 1 7.818 1 74.245 1
22.348 1 89.579 1 100.463 1 9.029 1 46.165 1
48.522 1 124.209 1 88.539 1 9.553 1 22.069 1
69.914 1 81.468 1 36.870 1 9.205 1 30.649 1
61.729 1 93.104 1 89.663 1 10.159 1 58.724 1
74.043 1 80.248 1 9.395 1 7.996 1 23.777 1
74.950 1 54.331 1 75.912 1 11.658 1 47.637 1
76.899 1 118.747 1 78.560 1 10.972 1 70.667 1
82.424 1 130.335 1 87.607 1 14.265 1 64.758 1
69.827 1 59.772 1 109.202 1 9.926 1 61.953 1
64.165 1 105.167 1 84.888 1 13.699 1 30.772 1
92.175 1 98.349 1 23.576 1 10.382 1 52.539 1
67.971 1 99.818 1 37.247 1 7.691 1 77.821 1
59.792 1 109.362 1 63.915 1 10.530 1 63.672 1
97.942 1 81.556 1 94.147 1 16.548 1 46.171 1
69.973 1 88.362 1 54.000 1 11.367 1 32.374 1
79.419 1 109.140 1 72.748 1 9.589 1 91.042 1
42.740 2 114.196 2 94.572 2 22.257 2 42.472 2
46.103 2 140.263 2 83.682 2 22.027 2 84.544 2
54.385 2 93.955 2 123.625 2 19.546 2 66.478 2
36.360 2 115.011 2 83.584 2 19.923 2 81.213 2
75.594 2 107.553 2 101.957 2 22.949 2 56.041 2
33.528 2 83.035 2 94.882 2 20.546 2 43.093 2
13.522 2 120.736 2 14.486 2 22.953 2 32.116 2
20.734 2 115.177 2 28.516 2 22.439 2 43.379 2
32.595 2 108.149 2 116.849 2 19.897 2 33.214 2
36.113 2 102.281 2 82.667 2 18.327 2 33.167 2
70.623 2 66.946 2 43.171 2 18.139 2 32.228 2
60.979 2 93.238 2 70.510 2 20.908 2 80.295 2
63.948 2 87.478 2 114.332 2 16.807 2 70.313 2
62.890 2 91.166 2 105.416 2 23.686 2 20.083 2
48.987 2 101.087 2 88.498 2 17.652 2 62.748 2
19.243 2 81.013 2 30.497 2 19.438 2 32.073 2
15.209 2 113.532 2 75.874 2 15.538 2 45.878 2
59.752 2 126.907 2 69.462 2 20.179 2 73.802 2
35.422 2 136.290 2 39.289 2 18.403 2 12.194 2
29.971 2 120.171 2 52.534 2 20.993 2 63.059 2
31.310 3 97.733 3 90.607 3 6.483 3 22.361 3
56.552 3 101.080 3 79.611 3 12.953 3 69.689 3
53.016 3 101.892 3 71.473 3 11.213 3 45.505 3
40.546 3 78.668 3 61.536 3 9.312 3 40.181 3
63.595 3 124.311 3 104.837 3 12.250 3 67.309 3
51.739 3 108.379 3 97.236 3 11.781 3 15.553 3
45.456 3 118.043 3 82.759 3 10.548 3 16.583 3
73.841 3 101.973 3 62.251 3 7.747 3 42.124 3
72.382 3 89.828 3 108.762 3 5.782 3 42.094 3
80.121 3 106.446 3 45.659 3 8.037 3 31.139 3
44.287 3 67.104 3 97.229 3 8.339 3 50.659 3
63.678 3 114.556 3 101.786 3 10.783 3 59.985 3
44.544 3 92.657 3 126.872 3 9.549 3 35.376 3
42.847 3 99.107 3 152.693 3 7.982 3 26.731 3
57.403 3 105.493 3 54.770 3 10.687 3 37.779 3
44.397 3 125.425 3 49.127 3 13.012 3 78.002 3
56.022 3 118.404 3 63.976 3 11.921 3 9.116 3
54.751 3 80.204 3 95.134 3 8.799 3 46.151 3
35.340 3 88.366 3 93.126 3 3.780 3 64.136 3
54.936 3 55.733 3 117.967 3 11.730 3 -4.208 3
32.013 4 137.997 4 94.111 4 10.860 4 32.176 4
63.505 4 84.952 4 87.008 4 18.185 4 26.800 4
41.451 4 87.939 4 122.593 4 16.325 4 48.754 4
8.830 4 93.065 4 72.344 4 15.690 4 59.431 4
68.498 4 103.434 4 100.177 4 12.342 4 77.391 4
57.758 4 79.330 4 87.225 4 16.567 4 66.144 4
45.386 4 93.576 4 92.125 4 12.354 4 62.574 4
53.205 4 104.744 4 80.963 4 14.537 4 46.923 4
94.197 4 94.589 4 55.133 4 14.433 4 34.027 4
66.161 4 87.152 4 26.925 4 15.587 4 44.072 4
63.686 4 79.144 4 114.885 4 14.681 4 19.267 4
50.878 4 79.244 4 103.499 4 12.420 4 32.223 4
45.229 4 97.442 4 78.414 4 15.809 4 47.447 4
72.437 4 88.776 4 55.989 4 15.683 4 50.347 4
77.430 4 102.754 4 106.791 4 14.463 4 49.541 4
38.758 4 95.116 4 62.918 4 10.983 4 64.498 4
106.814 4 74.458 4 95.855 4 13.074 4 39.637 4
38.711 4 73.450 4 117.527 4 15.526 4 22.198 4
96.244 4 99.387 4 96.686 4 18.528 4 33.466 4
96.146 4 71.388 4 120.994 4 18.053 4 69.743 4
42.866 5 88.404 5 127.918 5 7.804 5 34.766 5
49.870 5 77.253 5 107.431 5 6.241 5 84.440 5
26.730 5 114.006 5 134.467 5 7.735 5 91.934 5
49.705 5 91.424 5 39.276 5 7.852 5 41.571 5
56.080 5 81.120 5 57.258 5 9.614 5 94.850 5
52.667 5 93.082 5 78.138 5 5.741 5 98.539 5
70.950 5 127.168 5 88.649 5 10.920 5 37.388 5
36.821 5 100.108 5 104.889 5 6.702 5 82.757 5
63.473 5 81.358 5 110.676 5 7.260 5 49.537 5
25.918 5 109.095 5 9.980 5 7.643 5 62.151 5
85.354 5 85.267 5 97.822 5 9.952 5 64.836 5
64.296 5 70.873 5 102.359 5 5.434 5 61.909 5
21.158 5 121.322 5 116.285 5 10.002 5 81.795 5
61.795 5 61.971 5 124.205 5 11.164 5 68.530 5
40.939 5 84.277 5 69.256 5 10.237 5 29.830 5
45.120 5 128.741 5 67.229 5 7.204 5 44.236 5
29.650 5 83.356 5 81.531 5 7.629 5 39.805 5
43.583 5 86.665 5 72.811 5 6.206 5 56.481 5
29.717 5 107.723 5 74.727 5 8.074 5 51.547 5
62.826 5 83.149 5 78.899 5 7.992 5 31.280 5
90.500 6 76.343 6 94.408 6 3.710 6 6.517 6
46.342 6 117.109 6 83.523 6 11.215 6 68.087 6
40.371 6 104.756 6 74.694 6 9.110 6 76.413 6
50.977 6 90.253 6 57.413 6 8.801 6 70.545 6
58.921 6 86.378 6 41.618 6 7.280 6 34.571 6
13.778 6 111.100 6 100.632 6 10.023 6 38.817 6
15.701 6 85.970 6 34.582 6 11.002 6 51.015 6
69.050 6 72.936 6 151.661 6 10.154 6 37.320 6
63.114 6 116.740 6 84.917 6 10.788 6 33.823 6
34.920 6 76.669 6 42.835 6 6.584 6 60.945 6
45.247 6 109.673 6 19.256 6 9.630 6 70.346 6
28.257 6 114.745 6 52.427 6 8.442 6 51.865 6
74.728 6 92.221 6 75.262 6 8.467 6 62.526 6
44.482 6 133.283 6 87.740 6 9.702 6 67.314 6
29.212 6 139.387 6 71.645 6 9.460 6 37.245 6
29.540 6 95.011 6 118.027 6 10.859 6 48.643 6
88.591 6 100.822 6 72.472 6 6.877 6 57.884 6
91.041 6 125.986 6 78.184 6 5.504 6 47.971 6
40.524 6 102.352 6 44.857 6 7.356 6 38.458 6
82.128 6 120.258 6 99.662 6 9.148 6 56.880 6",header=F,sep="\t",stringsAsFactors=F)
#format data
df$protein1=rep("p1",122)
df$protein2=rep("p2",122)
df$protein3=rep("p3",122)
df$protein4=rep("p4",122)
df$protein5=rep("p5",122)
df=as.data.frame(df[3:122,])
df=cbind(df[,c(2,15,1,16,4,17,7,18,10,19,13)],row.names=NULL)
colnames(df)=c("sub_type","p1","p1_value","p2","p2_value","p3","p3_value","p4","p4_value","p5","p5_value")
df=as.data.frame(df)
df_values=data.frame(df[1],stack(df[c(3,5,7,9,11)]))
df_protein=data.frame(df[1],stack(df[c(2,4,6,8,10)]))
df=cbind(df_values$sub_type,df_protein$values,df_values$values,row.names=NULL)
colnames(df)=c("subtype","protein","value")
df=as.data.frame(df)
df$value=as.numeric(df$value)
df$subtype_protein=paste(df$subtype,df$protein,sep="_")
#shapiro test by protein and disease subtype
shapiro_test=df %>% group_by(subtype_protein) %>% do(tidy(shapiro.test(.$value)))
#one way anova for variance in protein abundance
#p1
model_one_way_anova=lm(value~subtype,data=subset(df, protein=="p1"))
one_way_anova1=tidy(anova(model_one_way_anova))
anova(model_one_way_anova)
## Analysis of Variance Table
##
## Response: value
## Df Sum Sq Mean Sq F value Pr(>F)
## subtype 5 9792 1958.4 4.8827 0.0004382 ***
## Residuals 114 45725 401.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
one_way_anova_summary1=tidy(summary(model_one_way_anova))
#p2
model_one_way_anova=lm(value~subtype,data=subset(df, protein=="p2"))
one_way_anova2=tidy(anova(model_one_way_anova))
one_way_anova_summary2=tidy(summary(model_one_way_anova))
#p3
model_one_way_anova=lm(value~subtype,data=subset(df, protein=="p3"))
one_way_anova3=tidy(anova(model_one_way_anova))
one_way_anova_summary3=tidy(summary(model_one_way_anova))
#p4
model_one_way_anova=lm(value~subtype,data=subset(df, protein=="p4"))
one_way_anova4=tidy(anova(model_one_way_anova))
one_way_anova_summary4=tidy(summary(model_one_way_anova))
#p5
model_one_way_anova=lm(value~subtype,data=subset(df, protein=="p5"))
one_way_anova5=tidy(anova(model_one_way_anova))
one_way_anova_summary5=tidy(summary(model_one_way_anova))
#concatenate ANOVA
one_way_anova=rbind(one_way_anova1,one_way_anova2,one_way_anova3,one_way_anova4,one_way_anova5)
one_way_anova$protein=c(rep("p1",2),rep("p2",2),rep("p3",2),rep("p4",2),rep("p5",2))
one_way_anova_summary=rbind(one_way_anova_summary1,one_way_anova_summary2,one_way_anova_summary3,one_way_anova_summary4,one_way_anova_summary5)
one_way_anova_summary$protein=c(rep("p1",6),rep("p2",6),rep("p3",6),rep("p4",6),rep("p5",6))
#pairwise t test
#p1
protein1=subset(df,protein=="p1")
pairwise.t.test1=tidy(pairwise.t.test(data=protein1,x=protein1$value,g=protein1$subtype,alternative="two.sided",paired=F,p.adj="bonferroni"))
pairwise.t.test(data=protein1,x=protein1$value,g=protein1$subtype,alternative="two.sided",paired=F,p.adj="bonferroni")
##
## Pairwise comparisons using t tests with pooled SD
##
## data: protein1$value and protein1$subtype
##
## 1 2 3 4 5
## 2 0.00038 - - - -
## 3 0.10345 1.00000 - - -
## 4 1.00000 0.08220 1.00000 - -
## 5 0.00712 1.00000 1.00000 0.66187 -
## 6 0.05230 1.00000 1.00000 1.00000 1.00000
##
## P value adjustment method: bonferroni
#p4
protein4=subset(df,protein=="p4")
pairwise.t.test4=tidy(pairwise.t.test(data=protein4,x=protein4$value,g=protein4$subtype,alternative="two.sided",paired=F,p.adj="bonferroni"))
#p5
protein5=subset(df,protein=="p5")
pairwise.t.test5=tidy(pairwise.t.test(data=protein5,x=protein5$value,g=protein5$subtype,alternative="two.sided",paired=F,p.adj="bonferroni"))
#concatenate pairwise t test
pairwise_t_test=rbind(pairwise.t.test1,pairwise.t.test4,pairwise.t.test5)
pairwise_t_test$protein=c(rep("p1",15),rep("p4",15),rep("p5",15))
#export data
Q1_shapiro_test.csv=write.csv(shapiro_test,"~/Desktop/Q1.shapiro.test.csv")
Q1_one_way_anova.csv=write.csv(one_way_anova,"~/Desktop/Q1.one.way.anova.csv")
Q1_one_way_anova_summary.csv=write.csv(one_way_anova_summary,"~/Desktop/Q1.one.way.anova.summary.csv")
Q1_pairwise_t_test_bonferroni.csv=write.csv(pairwise_t_test,"~/Desktop/Q1.pairwise.t.test.bonferroni.csv")
#plot
p1=subset(df,protein=="p1")
p2=subset(df,protein=="p2")
p3=subset(df,protein=="p3")
p4=subset(df,protein=="p4")
p5=subset(df,protein=="p5")
ggplot(p1, aes(x=subtype,y=value,fill=subtype))+geom_boxplot(outlier.shape = NULL,outlier.color = NA)+geom_jitter(color="black",shape=16,position=position_jitter(0.1),size=2)+labs(title="protein 1",x="asthma subtype",y="protein abundance",fill="asthma subtype")+theme(plot.title=element_text(hjust=0.5),text=element_text(size=18,face="bold"))

dev.copy(png,"Q1_p1.png")
## quartz_off_screen
## 3
dev.off()
## quartz_off_screen
## 2
ggplot(p2, aes(x=subtype,y=value,fill=subtype))+geom_boxplot(outlier.shape = NULL,outlier.color = NA)+geom_jitter(color="black",shape=16,position=position_jitter(0.1),size=2)+labs(title="protein 2",x="asthma subtype",y="protein abundance",fill="asthma subtype")+theme(plot.title=element_text(hjust=0.5),text=element_text(size=18,face="bold"))

dev.copy(png,"Q1_p2.png")
## quartz_off_screen
## 3
dev.off()
## quartz_off_screen
## 2
ggplot(p3, aes(x=subtype,y=value,fill=subtype))+geom_boxplot(outlier.shape = NULL,outlier.color = NA)+geom_jitter(color="black",shape=16,position=position_jitter(0.1),size=2)+labs(title="protein 3",x="asthma subtype",y="protein abundance",fill="asthma subtype")+theme(plot.title=element_text(hjust=0.5),text=element_text(size=18,face="bold"))

dev.copy(png,"Q1_p3.png")
## quartz_off_screen
## 3
dev.off()
## quartz_off_screen
## 2
ggplot(p4, aes(x=subtype,y=value,fill=subtype))+geom_boxplot(outlier.shape = NULL,outlier.color = NA)+geom_jitter(color="black",shape=16,position=position_jitter(0.1),size=2)+labs(title="protein 4",x="asthma subtype",y="protein abundance",fill="asthma subtype")+theme(plot.title=element_text(hjust=0.5),text=element_text(size=18,face="bold"))

dev.copy(png,"Q1_p4.png")
## quartz_off_screen
## 3
dev.off()
## quartz_off_screen
## 2
ggplot(p5, aes(x=subtype,y=value,fill=subtype))+geom_boxplot(outlier.shape = NULL,outlier.color = NA)+geom_jitter(color="black",shape=16,position=position_jitter(0.1),size=2)+labs(title="protein 5",x="asthma subtype",y="protein abundance",fill="asthma subtype")+theme(plot.title=element_text(hjust=0.5),text=element_text(size=18,face="bold"))

dev.copy(png,"Q1_p5.png")
## quartz_off_screen
## 3
dev.off()
## quartz_off_screen
## 2