library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# --------------------------------------------------
# Read in the Dodgers data
# --------------------------------------------------
dodgers_data <- read_csv("DodgersData.csv")
## Rows: 81 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): month, day_of_week, opponent, skies, day_night, cap, shirt, firewor...
## dbl (3): day, attend, temp
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# --------------------------------------------------
# Quick preview of the data
# --------------------------------------------------
head(dodgers_data)
## # A tibble: 6 × 12
##   month   day attend day_of_week opponent  temp skies  day_night cap   shirt
##   <chr> <dbl>  <dbl> <chr>       <chr>    <dbl> <chr>  <chr>     <chr> <chr>
## 1 APR      10  56000 Tuesday     Pirates     67 Clear  Day       NO    NO   
## 2 APR      11  29729 Wednesday   Pirates     58 Cloudy Night     NO    NO   
## 3 APR      12  28328 Thursday    Pirates     57 Cloudy Night     NO    NO   
## 4 APR      13  31601 Friday      Padres      54 Cloudy Night     NO    NO   
## 5 APR      14  46549 Saturday    Padres      57 Cloudy Night     NO    NO   
## 6 APR      15  38359 Sunday      Padres      65 Clear  Day       NO    NO   
## # ℹ 2 more variables: fireworks <chr>, bobblehead <chr>
# --------------------------------------------------
# Plot 1: Does Day of Week Impact Attendance?
# --------------------------------------------------
dodgers_data %>%
  ggplot(aes(x = day_of_week, y = attend, fill = day_of_week)) +
  geom_boxplot(show.legend = FALSE) +
  labs(
    title    = "Attendance by Day of Week",
    subtitle = "Box Plot of Dodgers Game Attendance",
    x        = "Day of Week",
    y        = "Attendance",
    caption  = "Source: DodgersData.csv"
  ) +
  theme_minimal()

# --------------------------------------------------
# Plot 2: Does Temperature Impact Attendance?
# --------------------------------------------------
dodgers_data %>%
  ggplot(aes(x = temp, y = attend)) +
  geom_point(alpha = 0.5, color = "blue") +
  geom_smooth(method = "lm", color = "red") +
  labs(
    title    = "Attendance vs. Temperature",
    subtitle = "Scatter Plot with Linear Trend",
    x        = "Temperature (°F)",
    y        = "Attendance",
    caption  = "Source: DodgersData.csv"
  ) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# --------------------------------------------------
# Plot 3: Monthly Attendance Trends
# --------------------------------------------------
# Convert month names (APR, MAY, etc.) to a factor so they are in chronological order.
dodgers_data %>%
  mutate(
    month = factor(
      month, 
      levels = c("APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT")
    )
  ) %>%
  group_by(month) %>%
  summarize(total_attendance = sum(attend), .groups = "drop") %>%
  ggplot(aes(x = month, y = total_attendance, group = 1)) +
  geom_line(color = "lightpink", size = 1) +
  geom_point(color = "lightblue", size = 3) +
  labs(
    title    = "Monthly Total Attendance",
    subtitle = "Cumulative Dodgers Home Attendance by Month",
    x        = "Month",
    y        = "Total Attendance",
    caption  = "Source: DodgersData.csv"
  ) +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.