# Load necessary libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
# Load the data
data(ChickWeight)
# 1. Identify chicks with complete weight measurements (12 observations)
complete <- ChickWeight %>%
group_by(Chick) %>%
mutate(n_obs = n()) %>%
filter(n_obs == 12)
incomplete <- ChickWeight %>%
group_by(Chick) %>%
mutate(n_obs = n()) %>%
filter(n_obs != 12) %>%
distinct(Chick, n_obs)
# Check how many chicks have complete data
complete_chicks <- n_distinct(complete$Chick)
incomplete_chicks <- n_distinct(incomplete$Chick)
# Print the results
complete_chicks
## [1] 45
incomplete
## # A tibble: 5 × 2
## # Groups: Chick [5]
## Chick n_obs
## <ord> <int>
## 1 8 11
## 2 15 8
## 3 16 7
## 4 18 2
## 5 44 10
# 2. Introduce a new variable 'weightgain' measuring weight difference from Day 0
complete <- complete %>%
group_by(Chick) %>%
mutate(weightgain = weight - weight[Time == 0])
# 3. Create side-by-side boxplots of weightgain by Diet for day 21, ordered by median weightgain
day21_data <- complete %>%
filter(Time == 21)
ggplot(day21_data, aes(x = reorder(Diet, weightgain, median), y = weightgain, fill = Diet)) +
geom_boxplot() +
labs(title = "Weight Gain by Diet on Day 21", x = "Diet", y = "Weight Gain") +
theme(legend.position = "none")

# 4. Create a plot with Time on x-axis and weight on y-axis, faceted by Diet, with a line for each Chick
ggplot(complete, aes(x = Time, y = weight, color = Diet, group = Chick)) +
geom_point() +
geom_line() +
facet_wrap(~Diet) +
theme(legend.position = "bottom")

# 5. Select the Chick with maximum weight at Time 21 for each Diet
max_weight_chicks <- day21_data %>%
group_by(Diet) %>%
filter(weight == max(weight))
ggplot(max_weight_chicks, aes(x = Time, y = weight, color = Diet, group = Chick)) +
geom_point() +
geom_line() +
theme(legend.position = "bottom")
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

# 6. Compute average daily weights under each Diet and plot
avg_weights <- complete %>%
group_by(Diet, Time) %>%
summarize(avg_weight = mean(weight))
## `summarise()` has grouped output by 'Diet'. You can override using the
## `.groups` argument.
ggplot(avg_weights, aes(x = Time, y = avg_weight, color = Diet, group = Diet)) +
geom_point() +
geom_line() +
theme(legend.position = "bottom")

# 7. Create the residPlot function to fit a linear regression and plot residuals vs fitted values
residPlot <- function(dat, x, y, col) {
model <- lm(as.formula(paste(y, "~", x)), data = dat)
fitted_vals <- fitted(model)
residuals <- resid(model)
ggplot(data.frame(Fitted = fitted_vals, Residuals = residuals), aes(x = Fitted, y = Residuals)) +
geom_point() +
geom_hline(yintercept = 0, color = col) +
labs(x = "Fitted Value", y = "Residuals")
}
# Test the function with LifeCycleSavings dataset
data(LifeCycleSavings)
residPlot(dat = LifeCycleSavings, x = "sr", y = "ddpi", col = "red")
