# EEB 201, R bootcamp
# 9/14/21 afternoon exercises
# Joanna Wu's assignment
library(tidyverse)
library(ggplot2)
# load chickweight data
cw = ChickWeight
head(cw)
## weight Time Chick Diet
## 1 42 0 1 1
## 2 51 2 1 1
## 3 59 4 1 1
## 4 64 6 1 1
## 5 76 8 1 1
## 6 93 10 1 1
str(cw)
## Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame': 578 obs. of 4 variables:
## $ weight: num 42 51 59 64 76 93 106 125 149 171 ...
## $ Time : num 0 2 4 6 8 10 12 14 16 18 ...
## $ Chick : Ord.factor w/ 50 levels "18"<"16"<"15"<..: 15 15 15 15 15 15 15 15 15 15 ...
## $ Diet : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
## - attr(*, "formula")=Class 'formula' language weight ~ Time | Chick
## .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv>
## - attr(*, "outer")=Class 'formula' language ~Diet
## .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv>
## - attr(*, "labels")=List of 2
## ..$ x: chr "Time"
## ..$ y: chr "Body weight"
## - attr(*, "units")=List of 2
## ..$ x: chr "(days)"
## ..$ y: chr "(gm)"
# 1. Plot and examine if there is a statistically significant difference between the weights of chicks assigned to different diet treatments on day 0.
day0 <- cw %>% # filter out day 0 data
filter(Time==0)
p1 <- ggplot(day0, aes(x = Diet, y = weight, fill = Diet)) +
geom_boxplot()
p1 + xlab('Diet types') + ylab('Weight (g)') + ggtitle('Chick weights on Day 0') +
theme_classic()

## Perform an ANOVA test on weight as a function of diet on day 0.
anova0 <- aov(weight ~ Diet, data = day0)
summary(anova0)
## Df Sum Sq Mean Sq F value Pr(>F)
## Diet 3 4.32 1.440 1.132 0.346
## Residuals 46 58.50 1.272
## As P = 0.34, there is no observable difference in chick weights between the diet types on day 0.
# 2. Plot and examine if there is a statistically significant difference between the weights of chicks assigned to different diet treatments at the end of the study - on day 21.
## subset day 21 weights
day21 <- cw %>%
filter(Time==21)
p2 <- ggplot(day21, aes(x = Diet, y = weight, fill = Diet)) +
geom_boxplot()
p2 + xlab('Diet types') + ylab('Weight (g)') + ggtitle('Chick weights on Day 21') +
theme_classic()

## From visual observation, diet types 1 & 3 appear to be different from each other.
## Perform an ANOVA to test chick weight differences by diet on day 21
anova21 <- aov(weight ~ Diet, data = day21)
summary(anova21)
## Df Sum Sq Mean Sq F value Pr(>F)
## Diet 3 57164 19055 4.655 0.00686 **
## Residuals 41 167839 4094
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Yes, P < 0.001, meaning there are significant differences in weight as determined by diet.
## Take it further by testing for differences between groups using a Tukey test
TukeyHSD(anova21)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = weight ~ Diet, data = day21)
##
## $Diet
## diff lwr upr p adj
## 2-1 36.95000 -32.11064 106.01064 0.4868095
## 3-1 92.55000 23.48936 161.61064 0.0046959
## 4-1 60.80556 -10.57710 132.18821 0.1192661
## 3-2 55.60000 -21.01591 132.21591 0.2263918
## 4-2 23.85556 -54.85981 102.57092 0.8486781
## 4-3 -31.74444 -110.45981 46.97092 0.7036249
## Diets 1 & 3 are significantly different from one another (P < 0.05), but the other groups are not.
# 3.Is there an effect of diet on chick growth? (HINT: first determine what ‘growth’ means).
## Growth here is defined by chick weight on day 21, given we determined there are no differences in weight on day 0.
## Yes, the anova of variance test shows P < 0.001, meaning there are significant differences in weight as determined by diet.
## The caveat is that there could be other factors affecting weight
# 4. Plot the effects of diet on chick growth.
# smooth over the variation of each chick. use geom_smooth in ggplot()
p3 <- ggplot(cw, aes(x = Time, y = weight, color = Diet)) +
geom_smooth()
p3 + xlab('Time') + ylab('Weight (g)') + ggtitle('Chick weights over time') +
theme_classic()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# the output plot is a line plot with smoothed line for 4 different chick diet types.
# the x-axis is diet type, and y is weight
# 5. Using a ‘for’ loop, plot the growth of each chick in a different color, all on the same plot.
# ggplot below works well and more easily :)
ggplot(cw, aes(x = Time, y = weight, color = Chick)) +
geom_line() +
theme_classic()

# for loop to plot all chicks on one plot
c1 <- cw %>% # first subset one chick
filter(Chick==1)
# first create a base plot to add subsequent plots on
plot(x=c1$Time, c1$weight, type = 'l',
xlab = 'Time',
ylab = 'Chick weight (g)')
# to make each chick a different color, create a vector of 50 colors
colors <- topo.colors(50)
for(i in 2:50){ # start with 2 because you plot the initial time frame at 1
sub <- cw %>%
filter(Chick==i) # filter for each individual chick
lines(x=sub$Time, sub$weight, col = colors[i]) # use lines to superimpose lines to the same plot
}

# knit into rmd
# library(knitr)
# library(markdown)
# spin('r_bootcamp_pinter_wollman_exercise.R')