Data visualizations

Below are some ways to visualize the parking violation data. All visualizations are interactive. Mouse over visualization elements to see pop-up details. Zoom in by dragging to highlight areas you want to inspect more closely. The “Autoscale” button on each graphic will restore the graphic’s default view. Each graphic’s “Download plot as a png” button will download the plot as an image file.


Code

Produced with a heavy assist from artificial intelligence, this R code extracted the data from the source .pdf, filled in dates with no violations, then produced the visualizations shown.

# ============================================================
# 1. INSTALL AND LOAD REQUIRED PACKAGES
# ============================================================

if (!require("pdftools")) install.packages("pdftools")
if (!require("dplyr")) install.packages("dplyr")
if (!require("stringr")) install.packages("stringr")

library(pdftools)
library(dplyr)
library(stringr)

# ============================================================
# 2. EXTRACT TEXT FROM PDF
# ============================================================

pdf_file <- "Citation Report with Time and Location.pdf"

# pdf_text returns a character vector: each element = one page
pdf_pages <- pdf_text(pdf_file)

length(pdf_pages)  # should be 404

# ============================================================
# 3. SPLIT PAGES INTO LINES
# ============================================================

all_lines <- unlist(lapply(pdf_pages, function(page) strsplit(page, "\n")[[1]]))
all_lines <- all_lines[all_lines != ""]  # remove empty lines

# ============================================================
# 4. EXTRACT FIELDS USING REGEX (24-HOUR FORMAT)
# ============================================================

# Example line format: "123456 1/22/2025 13:30 123 Main St Parking in No Parking Zone"
pattern <- "^\\s*(\\d+[A-Za-z]?)\\s+(\\d{1,2}/\\d{1,2}/\\d{4}\\s+\\d{1,2}:\\d{2})\\s+(.*?)\\s{2,}(.*)$"

matches <- str_match(all_lines, pattern)

# Build data frame
df_parking <- data.frame(
  CitationNumber = matches[,2],
  IssueDateTime = matches[,3],
  Location = matches[,4],
  ViolationDescription = matches[,5],
  stringsAsFactors = FALSE
)

# Remove rows where regex didn't match
df_parking <- df_parking[!is.na(df_parking$CitationNumber), ]

# ============================================================
# 5. CONVERT IssueDateTime TO POSIXct
# ============================================================

df_parking$IssueDateTime <- as.POSIXct(
  df_parking$IssueDateTime,
  format = "%m/%d/%Y %H:%M",
  tz = "America/New_York"
)

# ============================================================
# 6. INSPECT DATA
# ============================================================

head(df_parking)
str(df_parking)

# ============================================================
# 7. SAVE TO CSV
# ============================================================

write.csv(df_parking, "parking_violations_clean.csv", row.names = FALSE)

# ============================================================
# 1. INSTALL AND LOAD REQUIRED PACKAGES
# ============================================================

if (!require("dplyr")) install.packages("dplyr")
library(dplyr)

# ============================================================
# 2. IMPORT CSV
# ============================================================

df_parking <- read.csv("parking_violations_clean.csv", stringsAsFactors = FALSE)

# ============================================================
# 3. CONVERT IssueDateTime TO POSIXct
# ============================================================

df_parking$IssueDateTime <- as.POSIXct(
  df_parking$IssueDateTime,
  format = "%Y-%m-%d %H:%M:%S",  # default read.csv POSIXct format
  tz = "America/New_York"
)

# ============================================================
# 4. SPLIT INTO DATE AND TIME
# ============================================================

df_parking <- df_parking %>%
  mutate(
    IssueDate = as.Date(IssueDateTime),
    IssueTime = format(IssueDateTime, "%H:%M")
  )

# ============================================================
# 5. INSPECT RESULT
# ============================================================

head(df_parking)
str(df_parking)

# ============================================================
# 1. INSTALL AND LOAD REQUIRED PACKAGE
# ============================================================

if (!require("openxlsx")) install.packages("openxlsx")
library(openxlsx)

# ============================================================
# 2. ASSUME df_parking EXISTS
# ============================================================

# For example, df_parking has:
# CitationNumber, IssueDateTime, Location, ViolationDescription, IssueDate, IssueTime

# ============================================================
# 3. CREATE A NEW WORKBOOK
# ============================================================

wb <- createWorkbook()

addWorksheet(wb, "Parking Violations")

# ============================================================
# 4. WRITE DATA TO SHEET
# ============================================================

writeData(wb, sheet = "Parking Violations", df_parking)

# ============================================================
# 5. APPLY FORMATTING
# ============================================================

# Date column (IssueDate)
dateStyle <- createStyle(numFmt = "yyyy-mm-dd")
addStyle(wb, sheet = "Parking Violations", style = dateStyle, cols = which(names(df_parking)=="IssueDate"), rows = 2:(nrow(df_parking)+1), gridExpand = TRUE)

# Time column (IssueTime)
timeStyle <- createStyle(numFmt = "HH:MM")
addStyle(wb, sheet = "Parking Violations", style = timeStyle, cols = which(names(df_parking)=="IssueTime"), rows = 2:(nrow(df_parking)+1), gridExpand = TRUE)

# ============================================================
# 6. SAVE WORKBOOK
# ============================================================

saveWorkbook(wb, "parking_violations.xlsx", overwrite = TRUE)

# ============================================================
# 1. LOAD REQUIRED PACKAGES
# ============================================================

if (!require("dplyr")) install.packages("dplyr")
if (!require("plotly")) install.packages("plotly")
if (!require("lubridate")) install.packages("lubridate")

library(dplyr)
library(plotly)
library(lubridate)

# ============================================================
# 2. PREPARE DATA
# ============================================================

# 2.1 Ensure IssueDateTime is POSIXct
df_parking$IssueDateTime <- as.POSIXct(df_parking$IssueDateTime, tz = "America/New_York")

# 2.2 Extract Hour for time-of-day analysis
df_parking <- df_parking %>%
  mutate(Hour = hour(IssueDateTime))

# ============================================================
# 3. SUMMARY TABLES
# ============================================================

# 3.1 Daily violations
table_daily <- df_parking %>%
  count(IssueDate) %>%
  arrange(IssueDate)

# 3.2 Violations by hour
table_hour <- df_parking %>%
  count(Hour) %>%
  arrange(Hour)

# 3.3 Top 10 violation locations
table_top_locations <- df_parking %>%
  count(Location, sort = TRUE) %>%
  slice_head(n = 10)

# 3.4 Top 10 violation descriptions
table_top_violations <- df_parking %>%
  count(ViolationDescription, sort = TRUE) %>%
  slice_head(n = 10)

# 3.5 Heatmap: location vs hour
table_location_hour <- df_parking %>%
  count(Location, Hour)

# ============================================================
# 4. PLOTLY GRAPHS (CONSISTENT COLOR SCHEME)
# ============================================================

# Define a single consistent color for bars and lines
color_main <- 'steelblue'

# 4.1 Violations over time (daily) - original scatter plot
plotly_daily <- plot_ly(table_daily, x = ~IssueDate, y = ~n, type = 'scatter', mode = 'lines+markers',
                        line = list(color = color_main),
                        marker = list(color = color_main),
                        text = ~paste("Date:", IssueDate, "<br>Count:", n),
                        hoverinfo = "text") %>%
  layout(title = "Parking Violations Over Time",
         xaxis = list(title = "Date"),
         yaxis = list(title = "Count"))

# 4.2 Violations by hour
plotly_hour <- plot_ly(table_hour, x = ~Hour, y = ~n, type = 'bar',
                       text = ~paste("Hour:", Hour, "<br>Count:", n),
                       hoverinfo = "text",
                       marker = list(color = color_main)) %>%
  layout(title = "Parking Violations by Hour",
         xaxis = list(title = "Hour of Day"),
         yaxis = list(title = "Count"))

# 4.3 Top 10 violation locations
plotly_top_locations <- plot_ly(table_top_locations,
                                x = ~reorder(Location, n),
                                y = ~n,
                                type = 'bar',
                                text = ~paste("Location:", Location, "<br>Count:", n),
                                hoverinfo = "text",
                                marker = list(color = color_main)) %>%
  layout(title = "Top 10 Parking Violation Locations",
         xaxis = list(title = ""),
         yaxis = list(title = "Count"))

# 4.4 Top 10 violation descriptions
plotly_top_violations <- plot_ly(table_top_violations,
                                 x = ~reorder(ViolationDescription, n),
                                 y = ~n,
                                 type = 'bar',
                                 text = ~paste("Violation:", ViolationDescription, "<br>Count:", n),
                                 hoverinfo = "text",
                                 marker = list(color = color_main)) %>%
  layout(title = "Top 10 Parking Violations",
         xaxis = list(title = ""),
         yaxis = list(title = "Count"))

# 4.5 Heatmap of location vs hour
plotly_heatmap <- plot_ly(table_location_hour,
                          x = ~Hour,
                          y = ~Location,
                          z = ~n,
                          type = "heatmap",
                          colorscale = list(c(0,1), c('white', color_main)),
                          text = ~paste("Location:", Location, "<br>Hour:", Hour, "<br>Count:", n),
                          hoverinfo = "text") %>%
  layout(title = "Heatmap of Violations by Location and Hour",
         xaxis = list(title = "Hour"),
         yaxis = list(title = "Location"))

# ============================================================
# 5. COMPLETE DAILY TIME SERIES
# ============================================================

# 5.1 Generate complete sequence of dates
full_dates <- data.frame(
  IssueDate = seq(min(table_daily$IssueDate),
                  max(table_daily$IssueDate),
                  by = "day")
)

# 5.2 Left join to fill missing dates with 0
table_daily_complete <- full_dates %>%
  left_join(table_daily, by = "IssueDate") %>%
  mutate(n = ifelse(is.na(n), 0, n))

# 5.3 Updated daily histogram-style bar chart
plotly_daily_complete <- plot_ly(table_daily_complete,
                                 x = ~IssueDate,
                                 y = ~n,
                                 type = 'bar',
                                 text = ~paste("Date:", IssueDate, "<br>Count:", n),
                                 hoverinfo = "text",
                                 marker = list(color = color_main)) %>%
  layout(title = "Parking Violations Over Time (Complete Dates)",
         xaxis = list(title = "Date"),
         yaxis = list(title = "Count"))

# ============================================================
# 6. COMPLETE HOUR TIME SERIES
# ============================================================

# 6.1 Create complete hour sequence (0–23)
full_hours <- data.frame(Hour = 0:23)

# 6.2 Left join to fill missing hours with 0
table_hour_complete <- full_hours %>%
  left_join(table_hour, by = "Hour") %>%
  mutate(n = ifelse(is.na(n), 0, n))

# 6.3 Updated Plotly bar chart for hours
plotly_hour_complete <- plot_ly(table_hour_complete,
                                x = ~Hour,
                                y = ~n,
                                type = 'bar',
                                text = ~paste("Hour:", Hour, "<br>Count:", n),
                                hoverinfo = "text",
                                marker = list(color = color_main)) %>%
  layout(title = "Parking Violations by Hour (Complete Hours)",
         xaxis = list(title = "Hour of Day", dtick = 1),
         yaxis = list(title = "Count"))

# ============================================================
# 7. OBJECTS FOR R MARKDOWN
# ============================================================

# # Tables
# table_daily
# table_daily_complete
# table_hour
# table_hour_complete
# table_top_locations
# table_top_violations
# table_location_hour

# Plots
# plotly_daily
plotly_daily_complete
# plotly_hour
plotly_hour_complete
plotly_top_locations
plotly_top_violations
plotly_heatmap