# Load required libraries
library(tidyverse)
library(janitor)
library(lubridate)
library(kableExtra)
library(viridis)
library(DT)
library(purrr)
# Load and prepare the data
event_data <- read_csv("event_data_with_analysis.csv",
col_types = cols(),
na = c("", "NA", "N/A")) %>%
clean_names() %>%
mutate(
# Parse event dates and times in UTC
event_start_time = ymd_hms(event_start_time, tz = "UTC"),
# Convert to local time (Central Time)
local_start_time = with_tz(event_start_time, tzone = "America/Chicago"),
# Extract local time components
event_date = as.Date(local_start_time),
event_year = year(event_date),
event_month = month(event_date),
event_day_of_week = wday(local_start_time, label = TRUE),
event_hour_decimal = hour(local_start_time) + minute(local_start_time) / 60,
# Library hours filter
is_during_library_hours = case_when(
event_day_of_week %in% c("Mon", "Tue", "Wed", "Thu", "Fri") &
hour(local_start_time) >= 9 & hour(local_start_time) < 21 ~ TRUE,
event_day_of_week == "Sat" &
hour(local_start_time) >= 9 & hour(local_start_time) < 17 ~ TRUE,
event_day_of_week == "Sun" &
hour(local_start_time) >= 12 & hour(local_start_time) < 18 ~ TRUE,
TRUE ~ FALSE
),
# Create primary subject categorization
primary_subject = map_chr(as.character(event_subjects), function(subjects) {
if (is.na(subjects)) return("Uncategorized")
priority_subjects <- c(
"Books & Authors", "ESL", "Technology Classes",
"Business & Nonprofit", "Makerplace", "Senior Center",
"Genealogy", "Health & Wellness", "Arts & Culture"
)
tags <- str_split(subjects, ";")[[1]] %>%
str_trim() %>%
.[!str_detect(., "^\\d+$")]
for (subject in priority_subjects) {
if (any(str_detect(tags, fixed(subject)))) {
return(subject)
}
}
return(tags[1] %||% "Uncategorized")
}),
# Filter out children/youth events and cancelled events
is_valid_event =
!str_detect(tolower(event_title %||% ""), "child|kid|family|youth") &
!str_detect(tolower(primary_subject), "child|kid|family|youth") &
!str_detect(tolower(event_title %||% ""), "cancel") &
!is.na(event_date) &
!is.na(actual_registrations) &
actual_registrations > 0
) %>%
filter(is_valid_event & is_during_library_hours)
# Print summary of processed data
cat("Total events after filtering:", nrow(event_data), "\n")