#set up environment
setwd("~/Desktop")
library(ggplot2)
library(broom)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
#import data
R1=read.table(text="values  group
1.277   ctl
4.536   ctl
15.132  ctl
18.331  ctl
936.037 ctl
5.458   ctl
832.274 ctl
71.431  ctl
772.714 ctl
61.509  ctl
2475.600    PDs
240.035 PDs
43.123  PDs
183.149 PDs
29.695  PDs
15.331  PDs
612.920 PDs
2537.024    PDs
57.311  PDs
334.873 PDs
380.653 PDf
95.227  PDf
288.645 PDf
1537.802    PDf
1073.821    PDf
23.464  PDf
1015.809    PDf
220.654 PDf
1549.093    PDf
731.665 PDf
115.51  ctl
2074.35 ctl
162.79  ctl
3151.89 ctl
126.18  ctl
109.02  ctl
193.12  ctl
15.02   ctl
1.26    ctl
53.11   ctl
98.36   PDs
1320.50 PDs
13429.86    PDs
13.25   PDs
419.16  PDs
9174.41 PDs
0.16    PDs
8.13    PDs
1804.04 PDs
174.35  PDs
94.91   PDf
426.43  PDf
235.35  PDf
1160.06 PDf
24635.33    PDf
142186.50   PDf
11.63   PDf
37.69   PDf
123186.10   PDf
9.84    PDf
14.129  ctl
4.923   ctl
7.236   ctl
15.809  ctl
4.155   ctl
7.158   ctl
21.857  ctl
8.619   ctl
16.283  ctl
3.076   ctl
6.232   PDs
13.612  PDs
15.024  PDs
5.212   PDs
5.257   PDs
6.784   PDs
8.160   PDs
7.960   PDs
4.096   PDs
8.464   PDs
6.207   PDf
7.606   PDf
9.971   PDf
7.371   PDf
19.997  PDf
25.867  PDf
13.159  PDf
12.610  PDf
6.101   PDf
15.186  PDf",sep="\t",header=T)
R2=read.table(text="values  group
22.602  ctl
12.331  ctl
72.793  ctl
56.953  ctl
29.837  ctl
14.019  ctl
828.823 ctl
16.830  ctl
228.290 ctl
376.437 ctl
5.550   PDs
657.836 PDs
4.821   PDs
410.577 PDs
899.526 PDs
114.758 PDs
3.309   PDs
895.571 PDs
404.456 PDs
238.665 PDs
15533.938   PDf
11305.549   PDf
127.291 PDf
593.696 PDf
293.743 PDf
1369.173    PDf
68.118  PDf
42.660  PDf
197.310 PDf
4061.364    PDf
21.19   ctl
11239.82    ctl
9455.81 ctl
30.80   ctl
1173.61 ctl
1450.36 ctl
26.75   ctl
10243.85    ctl
1530.56 ctl
48.75   ctl
44.19   PDs
106.49  PDs
39917.93    PDs
9.44    PDs
17897.32    PDs
214861.40   PDs
1405.23 PDs
74402.63    PDs
92.03   PDs
64.93   PDs
1736.78 PDf
11472.04    PDf
1108.83 PDf
7.98    PDf
7264.93 PDf
4356.45 PDf
649.88  PDf
25.54   PDf
85659.37    PDf
205.90  PDf
19.049  ctl
6.519   ctl
11.031  ctl
13.923  ctl
12.572  ctl
7.491   ctl
26.997  ctl
35.093  ctl
9.770   ctl
15.539  ctl
11.356  PDs
14.027  PDs
18.072  PDs
29.006  PDs
13.070  PDs
12.642  PDs
7.784   PDs
9.568   PDs
17.543  PDs
9.867   PDs
12.004  PDf
12.467  PDf
15.990  PDf
12.899  PDf
14.553  PDf
18.418  PDf
9.058   PDf
12.920  PDf
10.430  PDf
14.445  PDf",sep="\t",header=T)
R3=read.table(text="values  group
1802.991    ctl
4460.414    ctl
4.710   ctl
91.868  ctl
836.264 ctl
12.555  ctl
20.222  ctl
25062.158   ctl
11911.837   ctl
237.471 ctl
320.499 PDs
1658.661    PDs
41.171  PDs
36761.070   PDs
18.031  PDs
572.979 PDs
29.269  PDs
10.252  PDs
79950.062   PDs
23.113  PDs
3566.236    PDf
2324.984    PDf
9748.803    PDf
5545.472    PDf
261.158 PDf
420.699 PDf
335.080 PDf
30228.128   PDf
17060.386   PDf
13466.620   PDf
108.76  ctl
6.59    ctl
611.54  ctl
1.85    ctl
123.25  ctl
17846.78    ctl
1788.32 ctl
3085.47 ctl
11.50   ctl
23.45   ctl
74.36   PDs
51.92   PDs
10200.43    PDs
1743.02 PDs
130373.90   PDs
1516.35 PDs
2169.86 PDs
31.73   PDs
228.25  PDs
308.20  PDs
68.92   PDf
397439.90   PDf
1051753.00  PDf
85560.89    PDf
5137.36 PDf
2206.15 PDf
5298.07 PDf
13277.03    PDf
9693.61 PDf
155.92  PDf
6.742   ctl
23.496  ctl
25.736  ctl
10.871  ctl
16.778  ctl
23.763  ctl
8.220   ctl
9.268   ctl
17.027  ctl
23.624  ctl
23.932  PDs
37.983  PDs
21.798  PDs
34.582  PDs
27.638  PDs
15.840  PDs
21.936  PDs
20.020  PDs
27.722  PDs
28.302  PDs
14.222  PDf
23.401  PDf
15.133  PDf
13.444  PDf
10.710  PDf
34.973  PDf
47.328  PDf
4.302   PDf
54.408  PDf
17.107  PDf",sep="\t",header=T)
df=rbind(R1,R2,R3)
df$repository=c(rep("R1",90),rep("R2",90),rep("R3",90))
df$cytokine=rep(c(rep("IL1a",30),rep("TNFa",30),rep("C1Q",30)),3)
df=as.data.frame(df)
df$group_repository=paste(df$group,df$repository,sep="_")
df$group_cytokine=paste(df$group,df$cytokine,sep="_")
df$repository_cyokine=paste(df$repository,df$cytokine,sep="_")
df$group_repository_cytokine=paste(df$group,df$repository,df$cytokine,sep="_")
#shapiro test by group and treatment
shapiro_test=df %>% group_by(group_repository_cytokine) %>% do(tidy(shapiro.test(.$values)))
#Log2 transform weight and run Shapiro again
#all normal except PDs_R2_IL1a (p=0.01478135)
df$log2_values=log(df$values,base=exp(2)) 
log2_shapiro_test=df %>% group_by(group_repository_cytokine) %>% do(tidy(shapiro.test(.$log2_values)))
#subset data by cytokine for anova
IL1a=df[which(df$cytokine=='IL1a'),]
TNFa=df[which(df$cytokine=='TNFa'),]
C1Q=df[which(df$cytokine=='C1Q'),]
#two way anova for variance in cytokine measurement considering repository IL1a
model_two_way_anova=lm(log2_values~group*repository,data=IL1a)
IL1a_two_way_anova=tidy(anova(model_two_way_anova))
IL1a_two_way_anova_summary=tidy(summary(model_two_way_anova))
#two way anova for variance in cytokine measurement considering repository TNFa
model_two_way_anova=lm(log2_values~group*repository,data=TNFa)
TNFa_two_way_anova=tidy(anova(model_two_way_anova))
TNFa_two_way_anova_summary=tidy(summary(model_two_way_anova))
#two way anova for variance in cytokine measurement considering repository C1Q
model_two_way_anova=lm(log2_values~group*repository,data=C1Q)
C1Q_two_way_anova=tidy(anova(model_two_way_anova))
C1Q_two_way_anova_summary=tidy(summary(model_two_way_anova))
#concatenate and export data
two_way_anova=rbind(IL1a_two_way_anova,TNFa_two_way_anova,C1Q_two_way_anova)
two_way_anova$cytokine=c(rep("IL1a",4),rep("TNFa",4),rep("C1Q",4))
two_way_anova_summary=rbind(IL1a_two_way_anova_summary,TNFa_two_way_anova_summary,C1Q_two_way_anova_summary)
two_way_anova_summary$cytokine=c(rep("IL1a",9),rep("TNFa",9),rep("C1Q",9))
Q3_log2_shapiro_test.csv=write.csv(log2_shapiro_test,"~/Desktop/Q3.log2.shapiro.test.csv")
Q3_two_way_anova.csv=write.csv(two_way_anova,"~/Desktop/Q3.two.way.anova.csv")
Q3_two_way_anova_summary.csv=write.csv(two_way_anova_summary,"~/Desktop/Q3.two.way.anova.summary.csv")
#plot interaction
#IL1a
interaction.plot(IL1a$group,IL1a$repository,IL1a$log2_values,ylab="mean of log2 values",xlab="group")

dev.copy(png,"Q3_IL1a_interaction.png")
## quartz_off_screen 
##                 3
dev.off()
## quartz_off_screen 
##                 2
#TNFa
interaction.plot(TNFa$group,TNFa$repository,TNFa$log2_values,ylab="mean of log2 values",xlab="group")

dev.copy(png,"Q3_TNFa_interaction.png")
## quartz_off_screen 
##                 3
dev.off()
## quartz_off_screen 
##                 2
#C1Q
interaction.plot(C1Q$group,C1Q$repository,C1Q$log2_values,ylab="mean of log2 values",xlab="group")

dev.copy(png,"Q3_C1Q_interaction.png")
## quartz_off_screen 
##                 3
dev.off()
## quartz_off_screen 
##                 2
#plot distribution colored by repository
#IL1a
p = ggplot(IL1a,aes(group,log2_values))
p1 = geom_boxplot(aes(group,log2_values),outlier.shape = NULL,outlier.color = NA)
p2= geom_point(aes(group,log2_values,color=repository),shape=16,size=3,position=position_dodge(width=0.5))
p+p1+p2+labs(title="IL1a",x="group",y="log2 value")+theme(plot.title=element_text(hjust=0.5),text=element_text(size=18,face="bold"))

dev.copy(png,"Q3_IL1a_boxplot.png")
## quartz_off_screen 
##                 3
dev.off()
## quartz_off_screen 
##                 2
#TNFa
p = ggplot(TNFa,aes(group,log2_values))
p1 = geom_boxplot(aes(group,log2_values),outlier.shape = NULL,outlier.color = NA)
p2= geom_point(aes(group,log2_values,color=repository),shape=16,size=3,position=position_dodge(width=0.5))
p+p1+p2+labs(title="TNFa",x="group",y="log2 value")+theme(plot.title=element_text(hjust=0.5),text=element_text(size=18,face="bold"))

dev.copy(png,"Q3_TNFa_boxplot.png")
## quartz_off_screen 
##                 3
dev.off()
## quartz_off_screen 
##                 2
#C1Q
p = ggplot(C1Q,aes(group,log2_values))
p1 = geom_boxplot(aes(group,log2_values),outlier.shape = NULL,outlier.color = NA)
p2= geom_point(aes(group,log2_values,color=repository),shape=16,size=3,position=position_dodge(width=0.5))
p+p1+p2+labs(title="C1Q",x="group",y="log2 value")+theme(plot.title=element_text(hjust=0.5),text=element_text(size=18,face="bold"))

dev.copy(png,"Q3_C1Q_boxplot.png")
## quartz_off_screen 
##                 3
dev.off()
## quartz_off_screen 
##                 2