library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(corrplot)
## corrplot 0.95 loaded
library(ggplot2)
library(knitr)

setwd("C:/Users/nika/Downloads/R/Other/csv")
Players <- read_excel("Player Session Data.xlsx")

##Variables used: Session Date Day of Week Hour of Play: Morning (6AM to 12PM), Daytime (12PM to 5PM), Evening (5PM to 10PM) Average over Week Average over Month

#Clean the data
clean_player_data <- Players |>
  mutate(`Session Start Time` = format(`Session Start Time`, "%H:%M")) |>
  mutate('Hour of Play' = case_when (
    `Session Start Time` >= "06:00" & `Session Start Time` < "12:00" ~ "Morning",
    `Session Start Time` >= "12:00" & `Session Start Time` < "17:00" ~ "Daytime",
    `Session Start Time` >= "17:00" & `Session Start Time` < "23:00" ~ "Evening",
    TRUE ~ NA_character_)) |>
  mutate(Date = as.Date(`Session Date`), Year = year(Date), Month = month(Date, label = TRUE), Week = week(Date), YearMonth = format(Date, "%Y-%m")) |>
    select('Session Start Time', 'Session Type' , 'Coach', 'Day of Week', 'Hour of Play', 'Session Date', 'Date', 'Year', 'Month', 'Week', 'YearMonth')

Tabels and Graphs:

summary(clean_player_data)
##  Session Start Time Session Type          Coach           Day of Week       
##  Length:24711       Length:24711       Length:24711       Length:24711      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  Hour of Play        Session Date                      Date           
##  Length:24711       Min.   :2024-01-01 10:30:00   Min.   :2024-01-01  
##  Class :character   1st Qu.:2024-05-30 18:15:00   1st Qu.:2024-05-30  
##  Mode  :character   Median :2024-11-30 14:00:00   Median :2024-11-30  
##                     Mean   :2024-11-30 13:59:52   Mean   :2024-11-29  
##                     3rd Qu.:2025-05-23 13:00:00   3rd Qu.:2025-05-23  
##                     Max.   :2025-12-30 09:45:00   Max.   :2025-12-30  
##                     NA's   :2                     NA's   :2           
##       Year          Month            Week        YearMonth        
##  Min.   :2024   Mar    : 2540   Min.   : 1.00   Length:24711      
##  1st Qu.:2024   Jan    : 2539   1st Qu.:11.00   Class :character  
##  Median :2024   Feb    : 2290   Median :23.00   Mode  :character  
##  Mean   :2024   Oct    : 2262   Mean   :24.13                     
##  3rd Qu.:2025   Apr    : 2212   3rd Qu.:37.00                     
##  Max.   :2025   (Other):12866   Max.   :53.00                     
##  NA's   :2      NA's   :    2   NA's   :2
# 1. Weekly sessions by time of day - Year over Year
weekly_sessions <- clean_player_data |>
  filter(!is.na(`Hour of Play`)) |>
  group_by(Year, Week, `Hour of Play`) |>
  summarise(Sessions = n(), .groups = "drop")

ggplot(weekly_sessions, aes(x = Week, y = Sessions, color = `Hour of Play`)) +
  geom_line(linewidth = 1) +
  facet_wrap(~Year, ncol = 1) +
  labs(title = "Weekly Play Sessions by Time of Day",
       subtitle = "Year over Year Comparison",
       x = "Week of Year",
       y = "Number of Sessions",
       color = "Time of Day") +
  theme_minimal() +
  scale_color_manual(values = c("Morning" = "#FDB462", 
                                 "Daytime" = "#80B1D3", 
                                 "Evening" = "#8DD3C7"))

# 2. Total sessions per month by time of day
monthly_sessions <- clean_player_data |>
  group_by(YearMonth, `Hour of Play`) |>
  summarise(Sessions = n(), .groups = "drop") |>
  mutate(Date = as.Date(paste0(YearMonth, "-01")))

ggplot(monthly_sessions, aes(x = Date, y = Sessions, fill = `Hour of Play`)) +
  geom_col(position = "stack") +
  labs(title = "Monthly Play Sessions by Time of Day",
       subtitle = "Stacked view showing usage patterns",
       x = "Month",
       y = "Number of Sessions",
       fill = "Time of Day") +
  theme_minimal() +
  scale_fill_manual(values = c("Morning" = "#FDB462", 
                                "Daytime" = "#80B1D3", 
                                "Evening" = "#8DD3C7")) +
  scale_x_date(date_breaks = "1 month", date_labels = "%b")
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_col()`).

# 3. Day of week patterns - Average by month
day_patterns <- clean_player_data |>
  filter(!is.na(`Hour of Play`)) |>
  group_by(YearMonth, `Day of Week`, `Hour of Play`) |>
  summarise(Sessions = n(), .groups = "drop") |>
  separate(YearMonth, into = c("Year", "Month"), sep = "-", remove = FALSE) |>
  group_by(Year, Month, `Day of Week`, `Hour of Play`) |>
  summarise(Avg_Sessions = mean(Sessions), .groups = "drop")

# Order days of week properly
day_patterns$`Day of Week` <- factor(day_patterns$`Day of Week`, 
                                      levels = c("Monday", "Tuesday", "Wednesday", 
                                                "Thursday", "Friday", "Saturday", "Sunday"))

ggplot(day_patterns, aes(x = `Day of Week`, y = Avg_Sessions, fill = `Hour of Play`)) +
  geom_col(position = "dodge") +
  facet_grid(Month ~ Year) +
  labs(title = "Average Sessions by Day of Week (Monthly View)",
       subtitle = "Comparing patterns across months",
       x = "Day of Week",
       y = "Average Sessions",
       fill = "Time of Day") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        axis.text.y = element_text(size = 8),  # Adjust y-axis text size
    strip.text.y = element_text(angle = 0, hjust = 0)) +
  scale_fill_manual(values = c("Morning" = "#FDB462", 
                                "Daytime" = "#80B1D3", 
                                "Evening" = "#8DD3C7"))

# 4. November 2024 vs November 2025 - Monday comparison
nov_comparison <- clean_player_data |>
  filter(Month == "Nov", `Day of Week` == "Mon") |>
  group_by(Year, `Hour of Play`) |>
  summarise(Avg_Sessions = n()/4, .groups = "drop") # Assuming ~4 Mondays per month

ggplot(nov_comparison, aes(x = factor(Year), y = Avg_Sessions, fill = `Hour of Play`)) +
  geom_col(position = "dodge") +
  labs(title = "Average Monday Sessions in November",
       subtitle = "2024 vs 2025 Comparison",
       x = "Year",
       y = "Average Sessions per Monday",
       fill = "Time of Day") +
  theme_minimal() +
  scale_fill_manual(values = c("Morning" = "#FDB462", 
                                "Daytime" = "#80B1D3", 
                                "Evening" = "#8DD3C7"))

# 5. Overall trend - Total sessions over time
total_trend <- clean_player_data |>
  group_by(YearMonth) |>
  summarise(Total_Sessions = n(), .groups = "drop") |>
  mutate(Date = as.Date(paste0(YearMonth, "-01")))

ggplot(total_trend, aes(x = Date, y = Total_Sessions)) +
  geom_line(color = "#4575B4", linewidth = 1) +
  geom_point(color = "#4575B4", size = 2) +
  geom_smooth(method = "loess", se = TRUE, color = "#D73027") +
  labs(title = "Overall Play Session Trend",
       subtitle = "Total sessions per month with trend line",
       x = "Month",
       y = "Total Sessions") +
  theme_minimal() +
  scale_x_date(date_breaks = "1 month", date_labels = "%b")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

# 6. Monthly trend table
monthly_trend_table <- clean_player_data |>
  filter(!is.na(`Hour of Play`)) |>
  group_by(YearMonth, `Hour of Play`) |>
  summarise(Sessions = n(), .groups = "drop") |>
  pivot_wider(names_from = `Hour of Play`, values_from = Sessions, values_fill = 0) |>
  mutate(Total = Morning + Daytime + Evening) |>
  arrange(YearMonth)

kable(monthly_trend_table, 
      caption = "Monthly Play Sessions by Time of Day",
      col.names = c("Month", "Morning", "Daytime", "Evening", "Total"))
Monthly Play Sessions by Time of Day
Month Morning Daytime Evening Total
2024-01 466 460 448 1374
2024-02 423 425 456 1304
2024-03 468 459 417 1344
2024-04 363 393 355 1111
2024-05 423 361 315 1099
2024-06 362 291 359 1012
2024-07 324 395 351 1070
2024-08 330 315 286 931
2024-09 337 298 318 953
2024-10 380 382 368 1130
2024-11 383 284 362 1029
2024-12 354 214 371 939
2025-01 401 366 398 1165
2025-02 322 344 320 986
2025-03 403 422 371 1196
2025-04 370 382 349 1101
2025-05 337 431 322 1090
2025-06 410 332 324 1066
2025-07 287 370 317 974
2025-08 303 371 305 979
2025-09 332 367 333 1032
2025-10 358 407 367 1132
2025-11 208 198 216 622
2025-12 37 16 17 70
# 7. Trend table Monday to Friday
Mon_to_Fri_trend_table <- clean_player_data |>
  filter(!is.na(`Hour of Play`), !`Day of Week` %in% c("Sat", "Sun")) |>
  group_by(YearMonth, `Hour of Play`) |>
  summarise(Sessions = n(), .groups = "drop") |>
  pivot_wider(names_from = `Hour of Play`, values_from = Sessions, values_fill = 0) |>
  mutate(Total = Morning + Daytime + Evening) |>
  arrange(YearMonth)

kable(Mon_to_Fri_trend_table, 
      caption = "Weekday Play Sessions by Time of Day (Monday-Friday)",
      col.names = c("Month", "Morning", "Daytime", "Evening", "Total"))
Weekday Play Sessions by Time of Day (Monday-Friday)
Month Morning Daytime Evening Total
2024-01 270 386 268 924
2024-02 249 346 305 900
2024-03 236 382 231 849
2024-04 216 351 240 807
2024-05 318 333 219 870
2024-06 211 265 204 680
2024-07 220 350 229 799
2024-08 223 281 167 671
2024-09 201 266 195 662
2024-10 286 346 264 896
2024-11 233 240 217 690
2024-12 208 183 215 606
2025-01 232 298 243 773
2025-02 220 290 197 707
2025-03 204 351 206 761
2025-04 230 332 237 799
2025-05 243 363 210 816
2025-06 250 277 191 718
2025-07 205 318 198 721
2025-08 199 300 186 685
2025-09 232 323 220 775
2025-10 266 354 264 884
2025-11 135 160 128 423
2025-12 36 16 12 64
# 8. Trend table Saturday and Sunday
Sat_and_Sun_trend_table <- clean_player_data |>
  filter(!is.na(`Hour of Play`), `Day of Week` %in% c("Sat", "Sun")) |>
  group_by(YearMonth, `Hour of Play`) |>
  summarise(Sessions = n(), .groups = "drop") |>
  pivot_wider(names_from = `Hour of Play`, values_from = Sessions, values_fill = 0) |>
  mutate(Total = Morning + Daytime + Evening) |>
  arrange(YearMonth)

kable(Sat_and_Sun_trend_table, 
      caption = "Weekend Play Sessions by Time of Day (Saturday and Sunday)",
      col.names = c("Month", "Morning", "Daytime", "Evening", "Total"))
Weekend Play Sessions by Time of Day (Saturday and Sunday)
Month Morning Daytime Evening Total
2024-01 196 74 180 450
2024-02 174 79 151 404
2024-03 232 77 186 495
2024-04 147 42 115 304
2024-05 105 28 96 229
2024-06 151 26 155 332
2024-07 104 45 122 271
2024-08 107 34 119 260
2024-09 136 32 123 291
2024-10 94 36 104 234
2024-11 150 44 145 339
2024-12 146 31 156 333
2025-01 169 68 155 392
2025-02 102 54 123 279
2025-03 199 71 165 435
2025-04 140 50 112 302
2025-05 94 68 112 274
2025-06 160 55 133 348
2025-07 82 52 119 253
2025-08 104 71 119 294
2025-09 100 44 113 257
2025-10 92 53 103 248
2025-11 73 38 88 199
2025-12 1 0 5 6