# EEB 201, R bootcamp
# 9/14/21 afternoon exercises
# Joanna Wu's assignment

library(tidyverse)
library(ggplot2)

# load chickweight data
cw = ChickWeight
head(cw)
##   weight Time Chick Diet
## 1     42    0     1    1
## 2     51    2     1    1
## 3     59    4     1    1
## 4     64    6     1    1
## 5     76    8     1    1
## 6     93   10     1    1
str(cw)
## Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame':   578 obs. of  4 variables:
##  $ weight: num  42 51 59 64 76 93 106 125 149 171 ...
##  $ Time  : num  0 2 4 6 8 10 12 14 16 18 ...
##  $ Chick : Ord.factor w/ 50 levels "18"<"16"<"15"<..: 15 15 15 15 15 15 15 15 15 15 ...
##  $ Diet  : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
##  - attr(*, "formula")=Class 'formula'  language weight ~ Time | Chick
##   .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv> 
##  - attr(*, "outer")=Class 'formula'  language ~Diet
##   .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv> 
##  - attr(*, "labels")=List of 2
##   ..$ x: chr "Time"
##   ..$ y: chr "Body weight"
##  - attr(*, "units")=List of 2
##   ..$ x: chr "(days)"
##   ..$ y: chr "(gm)"
# 1. Plot and examine if there is a statistically significant difference between the weights of chicks assigned to different diet treatments on day 0. 
day0 <- cw %>% # filter out day 0 data
  filter(Time==0)

p1 <- ggplot(day0, aes(x = Diet, y = weight, fill = Diet)) +
  geom_boxplot()
p1 + xlab('Diet types') + ylab('Weight (g)') + ggtitle('Chick weights on Day 0') +
  theme_classic() 

plot of chunk Noa afternoon exercises ----------- ##

## Perform an ANOVA test on weight as a function of diet on day 0.
anova0 <- aov(weight ~ Diet, data = day0)
summary(anova0)
##             Df Sum Sq Mean Sq F value Pr(>F)
## Diet         3   4.32   1.440   1.132  0.346
## Residuals   46  58.50   1.272
## As P = 0.34, there is no observable difference in chick weights between the diet types on day 0.

# 2. Plot and examine if there is a statistically significant difference between the weights of chicks assigned to different diet treatments at the end of the study - on day 21.
## subset day 21 weights
day21 <- cw %>%
  filter(Time==21)

p2 <- ggplot(day21, aes(x = Diet, y = weight, fill = Diet)) +
  geom_boxplot()
p2 + xlab('Diet types') + ylab('Weight (g)') + ggtitle('Chick weights on Day 21') +
  theme_classic() 

plot of chunk Noa afternoon exercises ----------- ##

## From visual observation, diet types 1 & 3 appear to be different from each other.
## Perform an ANOVA to test chick weight differences by diet on day 21
anova21 <-  aov(weight ~ Diet, data = day21)
summary(anova21)
##             Df Sum Sq Mean Sq F value  Pr(>F)   
## Diet         3  57164   19055   4.655 0.00686 **
## Residuals   41 167839    4094                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Yes, P < 0.001, meaning there are significant differences in weight as determined by diet.

## Take it further by testing for differences between groups using a Tukey test
TukeyHSD(anova21)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = weight ~ Diet, data = day21)
## 
## $Diet
##          diff        lwr       upr     p adj
## 2-1  36.95000  -32.11064 106.01064 0.4868095
## 3-1  92.55000   23.48936 161.61064 0.0046959
## 4-1  60.80556  -10.57710 132.18821 0.1192661
## 3-2  55.60000  -21.01591 132.21591 0.2263918
## 4-2  23.85556  -54.85981 102.57092 0.8486781
## 4-3 -31.74444 -110.45981  46.97092 0.7036249
## Diets 1 & 3 are significantly different from one another (P < 0.05), but the other groups are not.

# 3.Is there an effect of diet on chick growth? (HINT: first determine what ‘growth’ means).
## Growth here is defined by chick weight on day 21, given we determined there are no differences in weight on day 0.
## Yes, the anova of variance test shows P < 0.001, meaning there are significant differences in weight as determined by diet.
## The caveat is that there could be other factors affecting weight

# 4. Plot the effects of diet on chick growth.
# smooth over the variation of each chick. use geom_smooth in ggplot()
p3 <- ggplot(cw, aes(x = Time, y = weight, color = Diet)) +
  geom_smooth()
p3 + xlab('Time') + ylab('Weight (g)') + ggtitle('Chick weights over time') +
  theme_classic() 
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

plot of chunk Noa afternoon exercises ----------- ##

# the output plot is a line plot with smoothed line for 4 different chick diet types.
# the x-axis is diet type, and y is weight

# 5. Using a ‘for’ loop, plot the growth of each chick in a different color, all on the same plot.
# ggplot below works well and more easily :) 
ggplot(cw, aes(x = Time, y = weight, color = Chick)) +
  geom_line() +
  theme_classic()

plot of chunk Noa afternoon exercises ----------- ##

# for loop to plot all chicks on one plot
c1 <- cw %>% # first subset one chick 
  filter(Chick==1)
# first create a base plot to add subsequent plots on
plot(x=c1$Time, c1$weight, type = 'l',
     xlab = 'Time',
     ylab = 'Chick weight (g)')
# to make each chick a different color, create a vector of 50 colors
colors <- topo.colors(50)
for(i in 2:50){ # start with 2 because you plot the initial time frame at 1

  sub <- cw %>%
    filter(Chick==i) # filter for each individual chick

  lines(x=sub$Time, sub$weight, col = colors[i]) # use lines to superimpose lines to the same plot
}

plot of chunk Noa afternoon exercises ----------- ##

# knit into rmd
# library(knitr)
# library(markdown)
# spin('r_bootcamp_pinter_wollman_exercise.R')