library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# --------------------------------------------------
# Read in the Dodgers data
# --------------------------------------------------
dodgers_data <- read_csv("DodgersData.csv")
## Rows: 81 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): month, day_of_week, opponent, skies, day_night, cap, shirt, firewor...
## dbl (3): day, attend, temp
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# --------------------------------------------------
# Quick preview of the data
# --------------------------------------------------
head(dodgers_data)
## # A tibble: 6 × 12
## month day attend day_of_week opponent temp skies day_night cap shirt
## <chr> <dbl> <dbl> <chr> <chr> <dbl> <chr> <chr> <chr> <chr>
## 1 APR 10 56000 Tuesday Pirates 67 Clear Day NO NO
## 2 APR 11 29729 Wednesday Pirates 58 Cloudy Night NO NO
## 3 APR 12 28328 Thursday Pirates 57 Cloudy Night NO NO
## 4 APR 13 31601 Friday Padres 54 Cloudy Night NO NO
## 5 APR 14 46549 Saturday Padres 57 Cloudy Night NO NO
## 6 APR 15 38359 Sunday Padres 65 Clear Day NO NO
## # ℹ 2 more variables: fireworks <chr>, bobblehead <chr>
# --------------------------------------------------
# Plot 1: Does Day of Week Impact Attendance?
# --------------------------------------------------
dodgers_data %>%
ggplot(aes(x = day_of_week, y = attend, fill = day_of_week)) +
geom_boxplot(show.legend = FALSE) +
labs(
title = "Attendance by Day of Week",
subtitle = "Box Plot of Dodgers Game Attendance",
x = "Day of Week",
y = "Attendance",
caption = "Source: DodgersData.csv"
) +
theme_minimal()

# --------------------------------------------------
# Plot 2: Does Temperature Impact Attendance?
# --------------------------------------------------
dodgers_data %>%
ggplot(aes(x = temp, y = attend)) +
geom_point(alpha = 0.5, color = "blue") +
geom_smooth(method = "lm", color = "red") +
labs(
title = "Attendance vs. Temperature",
subtitle = "Scatter Plot with Linear Trend",
x = "Temperature (°F)",
y = "Attendance",
caption = "Source: DodgersData.csv"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# --------------------------------------------------
# Plot 3: Monthly Attendance Trends
# --------------------------------------------------
# Convert month names (APR, MAY, etc.) to a factor so they are in chronological order.
dodgers_data %>%
mutate(
month = factor(
month,
levels = c("APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT")
)
) %>%
group_by(month) %>%
summarize(total_attendance = sum(attend), .groups = "drop") %>%
ggplot(aes(x = month, y = total_attendance, group = 1)) +
geom_line(color = "lightpink", size = 1) +
geom_point(color = "lightblue", size = 3) +
labs(
title = "Monthly Total Attendance",
subtitle = "Cumulative Dodgers Home Attendance by Month",
x = "Month",
y = "Total Attendance",
caption = "Source: DodgersData.csv"
) +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
