Library Event Attendance and Performance Analysis

Author

Andrew Ledet

Published

September 19, 2025

Welcome to the Library Event Attendance and Performance Analysis. This report provides a comprehensive, data-driven look into our library’s adult event programming, helping us understand what drives event success and inform future planning.

We began by taking our raw event data and refining it. To focus on meaningful adult programs, we first cleaned the data by filtering out any events intended for children or youth, as well as cancelled events and those with zero registrations. We then applied a precise filter to include only events that occurred during our standard library operating hours for each day of the week. This process ensured our analysis was based on a realistic and relevant dataset.

A key step in our preparation was the creation of a new primary subject categorization. We developed a custom logic to group events by their main topic, such as “Technology Classes” or “Arts & Culture,” which allows for a more focused analysis of popular themes.

Analysis and Insights

Data Preparation and Processing

Show code
# Load required libraries
library(tidyverse)
library(janitor)
library(lubridate)
library(kableExtra)
library(viridis)
library(DT)
library(purrr)

# Load and prepare the data
event_data <- read_csv("event_data_with_analysis.csv", 
                       col_types = cols(), 
                       na = c("", "NA", "N/A")) %>%
  clean_names() %>%
  mutate(
    # Parse event dates and times in UTC
    event_start_time = ymd_hms(event_start_time, tz = "UTC"),
    
    # Convert to local time (Central Time)
    local_start_time = with_tz(event_start_time, tzone = "America/Chicago"),
    
    # Extract local time components
    event_date = as.Date(local_start_time),
    event_year = year(event_date),
    event_month = month(event_date),
    event_day_of_week = wday(local_start_time, label = TRUE),
    event_hour_decimal = hour(local_start_time) + minute(local_start_time) / 60,
    
    # Library hours filter
    is_during_library_hours = case_when(
      event_day_of_week %in% c("Mon", "Tue", "Wed", "Thu", "Fri") & 
        hour(local_start_time) >= 9 & hour(local_start_time) < 21 ~ TRUE,
      event_day_of_week == "Sat" & 
        hour(local_start_time) >= 9 & hour(local_start_time) < 17 ~ TRUE,
      event_day_of_week == "Sun" & 
        hour(local_start_time) >= 12 & hour(local_start_time) < 18 ~ TRUE,
      TRUE ~ FALSE
    ),
    
    # Create primary subject categorization
    primary_subject = map_chr(as.character(event_subjects), function(subjects) {
      if (is.na(subjects)) return("Uncategorized")
      
      priority_subjects <- c(
        "Books & Authors", "ESL", "Technology Classes", 
        "Business & Nonprofit", "Makerplace", "Senior Center", 
        "Genealogy", "Health & Wellness", "Arts & Culture"
      )
      
      tags <- str_split(subjects, ";")[[1]] %>% 
        str_trim() %>%
        .[!str_detect(., "^\\d+$")]
      
      for (subject in priority_subjects) {
        if (any(str_detect(tags, fixed(subject)))) {
          return(subject)
        }
      }
      
      return(tags[1] %||% "Uncategorized")
    }),
    
    # Filter out children/youth events and cancelled events
    is_valid_event = 
      !str_detect(tolower(event_title %||% ""), "child|kid|family|youth") &
      !str_detect(tolower(primary_subject), "child|kid|family|youth") &
      !str_detect(tolower(event_title %||% ""), "cancel") &
      !is.na(event_date) &
      !is.na(actual_registrations) & 
      actual_registrations > 0
  ) %>%
  filter(is_valid_event & is_during_library_hours)

# Print summary of processed data
cat("Total events after filtering:", nrow(event_data), "\n")
Total events after filtering: 5726 
Show code
cat("Unique subjects:", length(unique(event_data$primary_subject)), "\n")
Unique subjects: 24 

Focused Event Analysis

Show code
# Filter for specific events of interest
focused_events <- event_data %>%
  filter(
    primary_subject %in% c("Technology Classes", "Makerplace") |
    event_room_name %in% c("Makerplace - Kitchen Room", "Training Center Room")
  )

# Calculate event counts for focused areas
focused_event_counts <- focused_events %>%
  group_by(primary_subject, event_room_name) %>%
  summarise(
    total_events = n(),
    total_registrations = sum(actual_registrations, na.rm = TRUE),
    .groups = "drop"
  )

# Print focused event summary
print(focused_event_counts)
# A tibble: 17 × 4
   primary_subject    event_room_name           total_events total_registrations
   <chr>              <chr>                            <int>               <dbl>
 1 Makerplace         Makerplace - Creative Ar…          164                3502
 2 Makerplace         Makerplace - Flex Space 1           66                 872
 3 Makerplace         Makerplace - Flex Space 2          151                2341
 4 Makerplace         Makerplace - Kitchen               271               13372
 5 Makerplace         Makerplace - Sewing and …           41                 568
 6 Makerplace         Makerplace Building                  9                 277
 7 Makerplace         See Description for Loca…            3                 128
 8 Makerplace         Sewing Machine #3                    9                 346
 9 Makerplace         Sewing Machine #9                    6                2088
10 Makerplace         Zoom Event                          13                 276
11 Technology Classes Cardinal Room                       13                 146
12 Technology Classes Hendrickson South                    2                 336
13 Technology Classes Makerplace - Flex Space 2            2                  64
14 Technology Classes Senior Center                       25                 238
15 Technology Classes Sr Center Computer Room            341                3975
16 Technology Classes Training Center                    664                4358
17 Technology Classes Zoom Event                          99                2276

Library Card Registration Analysis

Show code
# Determine library card requirement
library_card_registrations <- focused_events %>%
  mutate(
    library_card_status = case_when(
      # Check text status column first
      !is.na(library_card_requirement_status) ~ 
        ifelse(tolower(library_card_requirement_status) %in% c("required", "yes"), 
               "Requires Library Card", 
               "Does Not Require Library Card"),
      
      # Then check numeric column
      !is.na(library_card_requirement_raw) ~ 
        ifelse(library_card_requirement_raw > 0, 
               "Requires Library Card", 
               "Does Not Require Library Card"),
      
      # Default if no information
      TRUE ~ "Unknown Library Card Requirement"
    )
  ) %>%
  group_by(library_card_status) %>%
  summarise(
    total_events = n(),
    total_registrations = sum(actual_registrations, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  # Sort descending by total events
  arrange(desc(total_events))

# Print library card registration summary
print(library_card_registrations)
# A tibble: 1 × 3
  library_card_status           total_events total_registrations
  <chr>                                <int>               <dbl>
1 Does Not Require Library Card         1879               35163

Detailed Heatmaps for Focused Events

Show code
# Function to create heatmaps for specific categories
create_detailed_heatmaps <- function(data, category_column, category_value, title_prefix) {
  # Filter data for specific category
  filtered_data <- data %>%
    filter(!!sym(category_column) == category_value)
  
  # Prepare heatmap data for average registrations
  avg_registrations_data <- filtered_data %>%
    group_by(event_day_of_week, event_hour_decimal = floor(event_hour_decimal)) %>%
    summarise(
      avg_registrations = mean(actual_registrations, na.rm = TRUE),
      .groups = "drop"
    )
  
  # Prepare heatmap data for event counts
  event_counts_data <- filtered_data %>%
    group_by(event_day_of_week, event_hour_decimal = floor(event_hour_decimal)) %>%
    summarise(
      event_count = n(),
      .groups = "drop"
    )
  
  # Create average registrations heatmap
  p1 <- ggplot(avg_registrations_data, 
         aes(x = event_day_of_week, 
             y = event_hour_decimal, 
             fill = avg_registrations)) +
    geom_tile(color = "white") +
    scale_fill_gradient(
      name = "Avg Registrations", 
      low = "lightblue", 
      high = "darkblue"
    ) +
    labs(title = paste(title_prefix, "- Average Registrations"),
         x = "Day of Week", 
         y = "Hour of Day") +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
  
  # Create event counts heatmap
  p2 <- ggplot(event_counts_data, 
         aes(x = event_day_of_week, 
             y = event_hour_decimal, 
             fill = event_count)) +
    geom_tile(color = "white") +
    scale_fill_gradient(
      name = "Number of Events", 
      low = "lightgreen", 
      high = "darkgreen"
    ) +
    labs(title = paste(title_prefix, "- Number of Events"),
         x = "Day of Week", 
         y = "Hour of Day") +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
  
  # Print both plots
  cat("\n\n###", title_prefix, "\n\n")
  print(p1)
  print(p2)
}

# Define the specific categories to analyze
categories_to_analyze <- list(
  list(column = "primary_subject", value = "Technology Classes", prefix = "Technology Classes Events"),
  list(column = "primary_subject", value = "Makerplace", prefix = "Makerplace Events"),
  list(column = "event_room_name", value = "Makerplace - Kitchen Room", prefix = "Makerplace - Kitchen Room"),
  list(column = "event_room_name", value = "Training Center Room", prefix = "Training Center Room")
)

# Generate heatmaps for each category
for (cat in categories_to_analyze) {
  create_detailed_heatmaps(
    focused_events, 
    cat$column, 
    cat$value, 
    cat$prefix
  )
}

Technology Classes Events

Makerplace Events

Makerplace - Kitchen Room

Training Center Room