#set up environment
setwd("~/Desktop")
library(ggplot2)
library(broom)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
#import data
HFD=read.table(text="weight group
26.97278 WT
36.19753 WT
35.49289 WT
40.15701 WT
35.60393 WT
63.81115 KO
49.61674 KO
52.67053 KO
51.81782 KO
60.87102 KO",header=T,sep="\t")
NC=read.table(text="weight group
33.79063 WT
20.65086 WT
33.91779 WT
34.79092 WT
20.29709 WT
42.05878 KO
20.36919 KO
27.55098 KO
27.80189 KO
21.97933 KO",header=T,sep="\t")
df=rbind(NC,HFD)
df$treatment=c(rep("NC",10),rep("HFD",10))
df=as.data.frame(df)
df$group_treatment=paste(df$group,df$treatment,sep="_")
#shapiro test by group and treatment
shapiro_test=df %>% group_by(group_treatment) %>% do(tidy(shapiro.test(.$weight)))
#WT NC group not normally distributed
#Log2 transform weight and run Shapiro again
df$log2_weight=log(df$weight,base=exp(2))
log2_shapiro_test=df %>% group_by(group_treatment) %>% do(tidy(shapiro.test(.$log2_weight)))
#well that made the problem even worse...
#check power and standard deviation by genotype (group) and diet (treatment)
means=df %>% group_by(group_treatment) %>% do(tibble(mean(.$weight)))
sd=df %>% group_by(group_treatment) %>% do(tibble(sd(.$weight)))
means
## # A tibble: 4 x 2
## # Groups: group_treatment [4]
## group_treatment `mean(.$weight)`
## <chr> <dbl>
## 1 KO_HFD 55.8
## 2 KO_NC 28.0
## 3 WT_HFD 34.9
## 4 WT_NC 28.7
sd
## # A tibble: 4 x 2
## # Groups: group_treatment [4]
## group_treatment `sd(.$weight)`
## <chr> <dbl>
## 1 KO_HFD 6.20
## 2 KO_NC 8.55
## 3 WT_HFD 4.82
## 4 WT_NC 7.51
t.test(sd$`sd(.$weight)`)
##
## One Sample t-test
##
## data: sd$`sd(.$weight)`
## t = 8.3798, df = 3, p-value = 0.003564
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 4.199217 9.341773
## sample estimates:
## mean of x
## 6.770495
d_WT=(34.9*28.7)/(sqrt((4.82^2+7.51^2)/2))
d_KO=(55.8*28.0)/(sqrt((6.2^2+8.55^2)/2))
d_HFD=(34.9*55.8)/(sqrt((4.82^2+6.20^2)/2))
d_NC=(28.7*28.0)/(sqrt((7.51^2+8.55^2)/2))
power.t.test(n=10,sig.level = 0.05,power=NULL,delta=d_WT,sd=1)
##
## Two-sample t test power calculation
##
## n = 10
## delta = 158.7366
## sd = 1
## sig.level = 0.05
## power = 1
## alternative = two.sided
##
## NOTE: n is number in *each* group
power.t.test(n=10,sig.level = 0.05,power=NULL,delta=d_KO,sd=1)
##
## Two-sample t test power calculation
##
## n = 10
## delta = 209.2122
## sd = 1
## sig.level = 0.05
## power = 1
## alternative = two.sided
##
## NOTE: n is number in *each* group
power.t.test(n=10,sig.level = 0.05,power=NULL,delta=d_HFD,sd=1)
##
## Two-sample t test power calculation
##
## n = 10
## delta = 350.6947
## sd = 1
## sig.level = 0.05
## power = 1
## alternative = two.sided
##
## NOTE: n is number in *each* group
power.t.test(n=10,sig.level = 0.05,power=NULL,delta=d_NC,sd=1)
##
## Two-sample t test power calculation
##
## n = 10
## delta = 99.86555
## sd = 1
## sig.level = 0.05
## power = 1
## alternative = two.sided
##
## NOTE: n is number in *each* group
#moving on with the non-normalized data
#two way anova for variance in weight considering diet and genotype
model_two_way_anova=lm(weight~group*treatment,data=df)
two_way_anova=tidy(anova(model_two_way_anova))
two_way_anova_summary=tidy(summary(model_two_way_anova))
#export data
Q2_shapiro_test.csv=write.csv(shapiro_test,"~/Desktop/Q2.shapiro.test.csv")
Q2_two_way_anova.csv=write.csv(two_way_anova,"~/Desktop/Q2.two.way.anova.csv")
Q2_two_way_anova_summary.csv=write.csv(two_way_anova_summary,"~/Desktop/2.two.way.anova.summary.csv")
#plot
interaction.plot(df$treatment,df$group,df$weight, ylab="weight",xlab="diet", trace.label="genotype", xpd = par(mtext,0.5 ),col="red")

dev.copy(png,"Q2_interaction.png")
## quartz_off_screen
## 3
dev.off()
## quartz_off_screen
## 2
NC_plot=df[which(df$treatment=='NC'),]
HFD_plot=df[which(df$treatment=='HFD'),]
ggplot(NC_plot, aes(x=group,y=weight,fill=group))+geom_boxplot(outlier.shape = NULL,outlier.color = NA)+geom_jitter(color="black",shape=16,position=position_jitter(0.1),size=2)+labs(title="normal chow",x=NULL,y="weight(g)",fill="group")+theme(plot.title=element_text(hjust=0.5),text=element_text(size=18,face="bold"))

dev.copy(png,"Q2_NC.png")
## quartz_off_screen
## 3
dev.off()
## quartz_off_screen
## 2
ggplot(HFD_plot, aes(x=group,y=weight,fill=group))+geom_boxplot(outlier.shape = NULL,outlier.color = NA)+geom_jitter(color="black",shape=16,position=position_jitter(0.1),size=2)+labs(title="high fat diet",x=NULL,y="weight(g)",fill="group")+theme(plot.title=element_text(hjust=0.5),text=element_text(size=18,face="bold"))

dev.copy(png,"Q2_HFD.png")
## quartz_off_screen
## 3
dev.off()
## quartz_off_screen
## 2