#set up environment
setwd("~/Desktop")
library(ggplot2)
library(broom)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
#import data
R1=read.table(text="values group
1.277 ctl
4.536 ctl
15.132 ctl
18.331 ctl
936.037 ctl
5.458 ctl
832.274 ctl
71.431 ctl
772.714 ctl
61.509 ctl
2475.600 PDs
240.035 PDs
43.123 PDs
183.149 PDs
29.695 PDs
15.331 PDs
612.920 PDs
2537.024 PDs
57.311 PDs
334.873 PDs
380.653 PDf
95.227 PDf
288.645 PDf
1537.802 PDf
1073.821 PDf
23.464 PDf
1015.809 PDf
220.654 PDf
1549.093 PDf
731.665 PDf
115.51 ctl
2074.35 ctl
162.79 ctl
3151.89 ctl
126.18 ctl
109.02 ctl
193.12 ctl
15.02 ctl
1.26 ctl
53.11 ctl
98.36 PDs
1320.50 PDs
13429.86 PDs
13.25 PDs
419.16 PDs
9174.41 PDs
0.16 PDs
8.13 PDs
1804.04 PDs
174.35 PDs
94.91 PDf
426.43 PDf
235.35 PDf
1160.06 PDf
24635.33 PDf
142186.50 PDf
11.63 PDf
37.69 PDf
123186.10 PDf
9.84 PDf
14.129 ctl
4.923 ctl
7.236 ctl
15.809 ctl
4.155 ctl
7.158 ctl
21.857 ctl
8.619 ctl
16.283 ctl
3.076 ctl
6.232 PDs
13.612 PDs
15.024 PDs
5.212 PDs
5.257 PDs
6.784 PDs
8.160 PDs
7.960 PDs
4.096 PDs
8.464 PDs
6.207 PDf
7.606 PDf
9.971 PDf
7.371 PDf
19.997 PDf
25.867 PDf
13.159 PDf
12.610 PDf
6.101 PDf
15.186 PDf",sep="\t",header=T)
R2=read.table(text="values group
22.602 ctl
12.331 ctl
72.793 ctl
56.953 ctl
29.837 ctl
14.019 ctl
828.823 ctl
16.830 ctl
228.290 ctl
376.437 ctl
5.550 PDs
657.836 PDs
4.821 PDs
410.577 PDs
899.526 PDs
114.758 PDs
3.309 PDs
895.571 PDs
404.456 PDs
238.665 PDs
15533.938 PDf
11305.549 PDf
127.291 PDf
593.696 PDf
293.743 PDf
1369.173 PDf
68.118 PDf
42.660 PDf
197.310 PDf
4061.364 PDf
21.19 ctl
11239.82 ctl
9455.81 ctl
30.80 ctl
1173.61 ctl
1450.36 ctl
26.75 ctl
10243.85 ctl
1530.56 ctl
48.75 ctl
44.19 PDs
106.49 PDs
39917.93 PDs
9.44 PDs
17897.32 PDs
214861.40 PDs
1405.23 PDs
74402.63 PDs
92.03 PDs
64.93 PDs
1736.78 PDf
11472.04 PDf
1108.83 PDf
7.98 PDf
7264.93 PDf
4356.45 PDf
649.88 PDf
25.54 PDf
85659.37 PDf
205.90 PDf
19.049 ctl
6.519 ctl
11.031 ctl
13.923 ctl
12.572 ctl
7.491 ctl
26.997 ctl
35.093 ctl
9.770 ctl
15.539 ctl
11.356 PDs
14.027 PDs
18.072 PDs
29.006 PDs
13.070 PDs
12.642 PDs
7.784 PDs
9.568 PDs
17.543 PDs
9.867 PDs
12.004 PDf
12.467 PDf
15.990 PDf
12.899 PDf
14.553 PDf
18.418 PDf
9.058 PDf
12.920 PDf
10.430 PDf
14.445 PDf",sep="\t",header=T)
R3=read.table(text="values group
1802.991 ctl
4460.414 ctl
4.710 ctl
91.868 ctl
836.264 ctl
12.555 ctl
20.222 ctl
25062.158 ctl
11911.837 ctl
237.471 ctl
320.499 PDs
1658.661 PDs
41.171 PDs
36761.070 PDs
18.031 PDs
572.979 PDs
29.269 PDs
10.252 PDs
79950.062 PDs
23.113 PDs
3566.236 PDf
2324.984 PDf
9748.803 PDf
5545.472 PDf
261.158 PDf
420.699 PDf
335.080 PDf
30228.128 PDf
17060.386 PDf
13466.620 PDf
108.76 ctl
6.59 ctl
611.54 ctl
1.85 ctl
123.25 ctl
17846.78 ctl
1788.32 ctl
3085.47 ctl
11.50 ctl
23.45 ctl
74.36 PDs
51.92 PDs
10200.43 PDs
1743.02 PDs
130373.90 PDs
1516.35 PDs
2169.86 PDs
31.73 PDs
228.25 PDs
308.20 PDs
68.92 PDf
397439.90 PDf
1051753.00 PDf
85560.89 PDf
5137.36 PDf
2206.15 PDf
5298.07 PDf
13277.03 PDf
9693.61 PDf
155.92 PDf
6.742 ctl
23.496 ctl
25.736 ctl
10.871 ctl
16.778 ctl
23.763 ctl
8.220 ctl
9.268 ctl
17.027 ctl
23.624 ctl
23.932 PDs
37.983 PDs
21.798 PDs
34.582 PDs
27.638 PDs
15.840 PDs
21.936 PDs
20.020 PDs
27.722 PDs
28.302 PDs
14.222 PDf
23.401 PDf
15.133 PDf
13.444 PDf
10.710 PDf
34.973 PDf
47.328 PDf
4.302 PDf
54.408 PDf
17.107 PDf",sep="\t",header=T)
df=rbind(R1,R2,R3)
df$repository=c(rep("R1",90),rep("R2",90),rep("R3",90))
df$cytokine=rep(c(rep("IL1a",30),rep("TNFa",30),rep("C1Q",30)),3)
df=as.data.frame(df)
df$group_repository=paste(df$group,df$repository,sep="_")
df$group_cytokine=paste(df$group,df$cytokine,sep="_")
df$repository_cyokine=paste(df$repository,df$cytokine,sep="_")
df$group_repository_cytokine=paste(df$group,df$repository,df$cytokine,sep="_")
#shapiro test by group and treatment
shapiro_test=df %>% group_by(group_repository_cytokine) %>% do(tidy(shapiro.test(.$values)))
#Log2 transform weight and run Shapiro again
#all normal except PDs_R2_IL1a (p=0.01478135)
df$log2_values=log(df$values,base=exp(2))
log2_shapiro_test=df %>% group_by(group_repository_cytokine) %>% do(tidy(shapiro.test(.$log2_values)))
#subset data by cytokine for anova
IL1a=df[which(df$cytokine=='IL1a'),]
TNFa=df[which(df$cytokine=='TNFa'),]
C1Q=df[which(df$cytokine=='C1Q'),]
#two way anova for variance in cytokine measurement considering repository IL1a
model_two_way_anova=lm(log2_values~group*repository,data=IL1a)
IL1a_two_way_anova=tidy(anova(model_two_way_anova))
IL1a_two_way_anova_summary=tidy(summary(model_two_way_anova))
#two way anova for variance in cytokine measurement considering repository TNFa
model_two_way_anova=lm(log2_values~group*repository,data=TNFa)
TNFa_two_way_anova=tidy(anova(model_two_way_anova))
TNFa_two_way_anova_summary=tidy(summary(model_two_way_anova))
#two way anova for variance in cytokine measurement considering repository C1Q
model_two_way_anova=lm(log2_values~group*repository,data=C1Q)
C1Q_two_way_anova=tidy(anova(model_two_way_anova))
C1Q_two_way_anova_summary=tidy(summary(model_two_way_anova))
#concatenate and export data
two_way_anova=rbind(IL1a_two_way_anova,TNFa_two_way_anova,C1Q_two_way_anova)
two_way_anova$cytokine=c(rep("IL1a",4),rep("TNFa",4),rep("C1Q",4))
two_way_anova_summary=rbind(IL1a_two_way_anova_summary,TNFa_two_way_anova_summary,C1Q_two_way_anova_summary)
two_way_anova_summary$cytokine=c(rep("IL1a",9),rep("TNFa",9),rep("C1Q",9))
Q3_log2_shapiro_test.csv=write.csv(log2_shapiro_test,"~/Desktop/Q3.log2.shapiro.test.csv")
Q3_two_way_anova.csv=write.csv(two_way_anova,"~/Desktop/Q3.two.way.anova.csv")
Q3_two_way_anova_summary.csv=write.csv(two_way_anova_summary,"~/Desktop/Q3.two.way.anova.summary.csv")
#plot interaction
#IL1a
interaction.plot(IL1a$group,IL1a$repository,IL1a$log2_values,ylab="mean of log2 values",xlab="group")

dev.copy(png,"Q3_IL1a_interaction.png")
## quartz_off_screen
## 3
dev.off()
## quartz_off_screen
## 2
#TNFa
interaction.plot(TNFa$group,TNFa$repository,TNFa$log2_values,ylab="mean of log2 values",xlab="group")

dev.copy(png,"Q3_TNFa_interaction.png")
## quartz_off_screen
## 3
dev.off()
## quartz_off_screen
## 2
#C1Q
interaction.plot(C1Q$group,C1Q$repository,C1Q$log2_values,ylab="mean of log2 values",xlab="group")

dev.copy(png,"Q3_C1Q_interaction.png")
## quartz_off_screen
## 3
dev.off()
## quartz_off_screen
## 2
#plot distribution colored by repository
#IL1a
p = ggplot(IL1a,aes(group,log2_values))
p1 = geom_boxplot(aes(group,log2_values),outlier.shape = NULL,outlier.color = NA)
p2= geom_point(aes(group,log2_values,color=repository),shape=16,size=3,position=position_dodge(width=0.5))
p+p1+p2+labs(title="IL1a",x="group",y="log2 value")+theme(plot.title=element_text(hjust=0.5),text=element_text(size=18,face="bold"))

dev.copy(png,"Q3_IL1a_boxplot.png")
## quartz_off_screen
## 3
dev.off()
## quartz_off_screen
## 2
#TNFa
p = ggplot(TNFa,aes(group,log2_values))
p1 = geom_boxplot(aes(group,log2_values),outlier.shape = NULL,outlier.color = NA)
p2= geom_point(aes(group,log2_values,color=repository),shape=16,size=3,position=position_dodge(width=0.5))
p+p1+p2+labs(title="TNFa",x="group",y="log2 value")+theme(plot.title=element_text(hjust=0.5),text=element_text(size=18,face="bold"))

dev.copy(png,"Q3_TNFa_boxplot.png")
## quartz_off_screen
## 3
dev.off()
## quartz_off_screen
## 2
#C1Q
p = ggplot(C1Q,aes(group,log2_values))
p1 = geom_boxplot(aes(group,log2_values),outlier.shape = NULL,outlier.color = NA)
p2= geom_point(aes(group,log2_values,color=repository),shape=16,size=3,position=position_dodge(width=0.5))
p+p1+p2+labs(title="C1Q",x="group",y="log2 value")+theme(plot.title=element_text(hjust=0.5),text=element_text(size=18,face="bold"))

dev.copy(png,"Q3_C1Q_boxplot.png")
## quartz_off_screen
## 3
dev.off()
## quartz_off_screen
## 2