library(tidyverse)
df_raw <- read_csv("Penguin_Measures_wIDs copy.csv")
# Pick columns
outcome_col <- "body_mass_g"
predictor_col <- "flipper_length_mm"
group_col <- "species"
df <- df_raw %>%
select(all_of(c(outcome_col, predictor_col, group_col))) %>%
rename(
outcome = all_of(outcome_col),
predictor = all_of(predictor_col),
group = all_of(group_col)
) %>%
mutate(
group = as.factor(group),
outcome = as.numeric(outcome),
predictor = as.numeric(predictor)
) %>%
filter(!is.na(outcome), !is.na(predictor), !is.na(group))
# Quick checks
df %>% slice_head(n = 6)
## # A tibble: 6 × 3
## outcome predictor group
## <dbl> <dbl> <fct>
## 1 3400 174 Adelie
## 2 3800 189 Adelie
## 3 3800 187 Adelie
## 4 3200 187 Adelie
## 5 3150 172 Adelie
## 6 4500 211 Gentoo
df %>% count(group)
## # A tibble: 3 × 2
## group n
## <fct> <int>
## 1 Adelie 151
## 2 Chinstrap 68
## 3 Gentoo 123
# Model
lm_result <- lm(outcome ~ predictor + group, data = df)
summary(lm_result)
##
## Call:
## lm(formula = outcome ~ predictor + group, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -927.70 -254.82 -23.92 241.16 1191.68
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4031.477 584.151 -6.901 2.55e-11 ***
## predictor 40.705 3.071 13.255 < 2e-16 ***
## groupChinstrap -206.510 57.731 -3.577 0.000398 ***
## groupGentoo 266.810 95.264 2.801 0.005392 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 375.5 on 338 degrees of freedom
## Multiple R-squared: 0.7826, Adjusted R-squared: 0.7807
## F-statistic: 405.7 on 3 and 338 DF, p-value: < 2.2e-16
# Plot (this will appear in the knitted HTML)
ggplot(df, aes(x = predictor, y = outcome, color = group)) +
geom_point(alpha = 0.75) +
geom_smooth(method = "lm", se = FALSE) +
facet_wrap(~ group) +
labs(
title = "Body mass vs Flipper length by Species",
x = "Flipper length (mm)",
y = "Body mass (g)",
color = "Species"
) +
theme_minimal()
