#set up environment
setwd("~/Desktop")
library(ggplot2)
library(broom)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
#import data
HFD=read.table(text="weight group
26.97278    WT
36.19753    WT
35.49289    WT
40.15701    WT
35.60393    WT
63.81115    KO
49.61674    KO
52.67053    KO
51.81782    KO
60.87102    KO",header=T,sep="\t")
NC=read.table(text="weight  group
33.79063    WT
20.65086    WT
33.91779    WT
34.79092    WT
20.29709    WT
42.05878    KO
20.36919    KO
27.55098    KO
27.80189    KO
21.97933    KO",header=T,sep="\t")
df=rbind(NC,HFD)
df$treatment=c(rep("NC",10),rep("HFD",10))
df=as.data.frame(df)
df$group_treatment=paste(df$group,df$treatment,sep="_")
#shapiro test by group and treatment
shapiro_test=df %>% group_by(group_treatment) %>% do(tidy(shapiro.test(.$weight)))
#WT NC group not normally distributed
#Log2 transform weight and run Shapiro again
df$log2_weight=log(df$weight,base=exp(2))  
log2_shapiro_test=df %>% group_by(group_treatment) %>% do(tidy(shapiro.test(.$log2_weight)))
#well that made the problem even worse...
#check power and standard deviation by genotype (group) and diet (treatment)
means=df %>% group_by(group_treatment) %>% do(tibble(mean(.$weight)))
sd=df %>% group_by(group_treatment) %>% do(tibble(sd(.$weight)))
means
## # A tibble: 4 x 2
## # Groups:   group_treatment [4]
##   group_treatment `mean(.$weight)`
##   <chr>                      <dbl>
## 1 KO_HFD                      55.8
## 2 KO_NC                       28.0
## 3 WT_HFD                      34.9
## 4 WT_NC                       28.7
sd
## # A tibble: 4 x 2
## # Groups:   group_treatment [4]
##   group_treatment `sd(.$weight)`
##   <chr>                    <dbl>
## 1 KO_HFD                    6.20
## 2 KO_NC                     8.55
## 3 WT_HFD                    4.82
## 4 WT_NC                     7.51
t.test(sd$`sd(.$weight)`)
## 
##  One Sample t-test
## 
## data:  sd$`sd(.$weight)`
## t = 8.3798, df = 3, p-value = 0.003564
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  4.199217 9.341773
## sample estimates:
## mean of x 
##  6.770495
d_WT=(34.9*28.7)/(sqrt((4.82^2+7.51^2)/2))
d_KO=(55.8*28.0)/(sqrt((6.2^2+8.55^2)/2))
d_HFD=(34.9*55.8)/(sqrt((4.82^2+6.20^2)/2))
d_NC=(28.7*28.0)/(sqrt((7.51^2+8.55^2)/2))
power.t.test(n=10,sig.level = 0.05,power=NULL,delta=d_WT,sd=1)
## 
##      Two-sample t test power calculation 
## 
##               n = 10
##           delta = 158.7366
##              sd = 1
##       sig.level = 0.05
##           power = 1
##     alternative = two.sided
## 
## NOTE: n is number in *each* group
power.t.test(n=10,sig.level = 0.05,power=NULL,delta=d_KO,sd=1)
## 
##      Two-sample t test power calculation 
## 
##               n = 10
##           delta = 209.2122
##              sd = 1
##       sig.level = 0.05
##           power = 1
##     alternative = two.sided
## 
## NOTE: n is number in *each* group
power.t.test(n=10,sig.level = 0.05,power=NULL,delta=d_HFD,sd=1)
## 
##      Two-sample t test power calculation 
## 
##               n = 10
##           delta = 350.6947
##              sd = 1
##       sig.level = 0.05
##           power = 1
##     alternative = two.sided
## 
## NOTE: n is number in *each* group
power.t.test(n=10,sig.level = 0.05,power=NULL,delta=d_NC,sd=1)
## 
##      Two-sample t test power calculation 
## 
##               n = 10
##           delta = 99.86555
##              sd = 1
##       sig.level = 0.05
##           power = 1
##     alternative = two.sided
## 
## NOTE: n is number in *each* group
#moving on with the non-normalized data
#two way anova for variance in weight considering diet and genotype
model_two_way_anova=lm(weight~group*treatment,data=df)
two_way_anova=tidy(anova(model_two_way_anova))
two_way_anova_summary=tidy(summary(model_two_way_anova))
#export data
Q2_shapiro_test.csv=write.csv(shapiro_test,"~/Desktop/Q2.shapiro.test.csv")
Q2_two_way_anova.csv=write.csv(two_way_anova,"~/Desktop/Q2.two.way.anova.csv")
Q2_two_way_anova_summary.csv=write.csv(two_way_anova_summary,"~/Desktop/2.two.way.anova.summary.csv")
#plot
interaction.plot(df$treatment,df$group,df$weight, ylab="weight",xlab="diet", trace.label="genotype", xpd = par(mtext,0.5 ),col="red")

dev.copy(png,"Q2_interaction.png")
## quartz_off_screen 
##                 3
dev.off()
## quartz_off_screen 
##                 2
NC_plot=df[which(df$treatment=='NC'),]
HFD_plot=df[which(df$treatment=='HFD'),]
ggplot(NC_plot, aes(x=group,y=weight,fill=group))+geom_boxplot(outlier.shape = NULL,outlier.color = NA)+geom_jitter(color="black",shape=16,position=position_jitter(0.1),size=2)+labs(title="normal chow",x=NULL,y="weight(g)",fill="group")+theme(plot.title=element_text(hjust=0.5),text=element_text(size=18,face="bold"))

dev.copy(png,"Q2_NC.png")
## quartz_off_screen 
##                 3
dev.off()
## quartz_off_screen 
##                 2
ggplot(HFD_plot, aes(x=group,y=weight,fill=group))+geom_boxplot(outlier.shape = NULL,outlier.color = NA)+geom_jitter(color="black",shape=16,position=position_jitter(0.1),size=2)+labs(title="high fat diet",x=NULL,y="weight(g)",fill="group")+theme(plot.title=element_text(hjust=0.5),text=element_text(size=18,face="bold"))

dev.copy(png,"Q2_HFD.png")
## quartz_off_screen 
##                 3
dev.off()
## quartz_off_screen 
##                 2