Below are some ways to visualize the parking violation data. All visualizations are interactive. Mouse over visualization elements to see pop-up details. Zoom in by dragging to highlight areas you want to inspect more closely. The “Autoscale” button on each graphic will restore the graphic’s default view. Each graphic’s “Download plot as a png” button will download the plot as an image file.
Code
Produced with a heavy assist from artificial intelligence, this R code extracted the data from the source .pdf, filled in dates with no violations, then produced the visualizations shown.
# ============================================================
# 1. INSTALL AND LOAD REQUIRED PACKAGES
# ============================================================
if (!require("pdftools")) install.packages("pdftools")
if (!require("dplyr")) install.packages("dplyr")
if (!require("stringr")) install.packages("stringr")
library(pdftools)
library(dplyr)
library(stringr)
# ============================================================
# 2. EXTRACT TEXT FROM PDF
# ============================================================
pdf_file <- "Citation Report with Time and Location.pdf"
# pdf_text returns a character vector: each element = one page
pdf_pages <- pdf_text(pdf_file)
length(pdf_pages) # should be 404
# ============================================================
# 3. SPLIT PAGES INTO LINES
# ============================================================
all_lines <- unlist(lapply(pdf_pages, function(page) strsplit(page, "\n")[[1]]))
all_lines <- all_lines[all_lines != ""] # remove empty lines
# ============================================================
# 4. EXTRACT FIELDS USING REGEX (24-HOUR FORMAT)
# ============================================================
# Example line format: "123456 1/22/2025 13:30 123 Main St Parking in No Parking Zone"
pattern <- "^\\s*(\\d+[A-Za-z]?)\\s+(\\d{1,2}/\\d{1,2}/\\d{4}\\s+\\d{1,2}:\\d{2})\\s+(.*?)\\s{2,}(.*)$"
matches <- str_match(all_lines, pattern)
# Build data frame
df_parking <- data.frame(
CitationNumber = matches[,2],
IssueDateTime = matches[,3],
Location = matches[,4],
ViolationDescription = matches[,5],
stringsAsFactors = FALSE
)
# Remove rows where regex didn't match
df_parking <- df_parking[!is.na(df_parking$CitationNumber), ]
# ============================================================
# 5. CONVERT IssueDateTime TO POSIXct
# ============================================================
df_parking$IssueDateTime <- as.POSIXct(
df_parking$IssueDateTime,
format = "%m/%d/%Y %H:%M",
tz = "America/New_York"
)
# ============================================================
# 6. INSPECT DATA
# ============================================================
head(df_parking)
str(df_parking)
# ============================================================
# 7. SAVE TO CSV
# ============================================================
write.csv(df_parking, "parking_violations_clean.csv", row.names = FALSE)
# ============================================================
# 1. INSTALL AND LOAD REQUIRED PACKAGES
# ============================================================
if (!require("dplyr")) install.packages("dplyr")
library(dplyr)
# ============================================================
# 2. IMPORT CSV
# ============================================================
df_parking <- read.csv("parking_violations_clean.csv", stringsAsFactors = FALSE)
# ============================================================
# 3. CONVERT IssueDateTime TO POSIXct
# ============================================================
df_parking$IssueDateTime <- as.POSIXct(
df_parking$IssueDateTime,
format = "%Y-%m-%d %H:%M:%S", # default read.csv POSIXct format
tz = "America/New_York"
)
# ============================================================
# 4. SPLIT INTO DATE AND TIME
# ============================================================
df_parking <- df_parking %>%
mutate(
IssueDate = as.Date(IssueDateTime),
IssueTime = format(IssueDateTime, "%H:%M")
)
# ============================================================
# 5. INSPECT RESULT
# ============================================================
head(df_parking)
str(df_parking)
# ============================================================
# 1. INSTALL AND LOAD REQUIRED PACKAGE
# ============================================================
if (!require("openxlsx")) install.packages("openxlsx")
library(openxlsx)
# ============================================================
# 2. ASSUME df_parking EXISTS
# ============================================================
# For example, df_parking has:
# CitationNumber, IssueDateTime, Location, ViolationDescription, IssueDate, IssueTime
# ============================================================
# 3. CREATE A NEW WORKBOOK
# ============================================================
wb <- createWorkbook()
addWorksheet(wb, "Parking Violations")
# ============================================================
# 4. WRITE DATA TO SHEET
# ============================================================
writeData(wb, sheet = "Parking Violations", df_parking)
# ============================================================
# 5. APPLY FORMATTING
# ============================================================
# Date column (IssueDate)
dateStyle <- createStyle(numFmt = "yyyy-mm-dd")
addStyle(wb, sheet = "Parking Violations", style = dateStyle, cols = which(names(df_parking)=="IssueDate"), rows = 2:(nrow(df_parking)+1), gridExpand = TRUE)
# Time column (IssueTime)
timeStyle <- createStyle(numFmt = "HH:MM")
addStyle(wb, sheet = "Parking Violations", style = timeStyle, cols = which(names(df_parking)=="IssueTime"), rows = 2:(nrow(df_parking)+1), gridExpand = TRUE)
# ============================================================
# 6. SAVE WORKBOOK
# ============================================================
saveWorkbook(wb, "parking_violations.xlsx", overwrite = TRUE)
# ============================================================
# 1. LOAD REQUIRED PACKAGES
# ============================================================
if (!require("dplyr")) install.packages("dplyr")
if (!require("plotly")) install.packages("plotly")
if (!require("lubridate")) install.packages("lubridate")
library(dplyr)
library(plotly)
library(lubridate)
# ============================================================
# 2. PREPARE DATA
# ============================================================
# 2.1 Ensure IssueDateTime is POSIXct
df_parking$IssueDateTime <- as.POSIXct(df_parking$IssueDateTime, tz = "America/New_York")
# 2.2 Extract Hour for time-of-day analysis
df_parking <- df_parking %>%
mutate(Hour = hour(IssueDateTime))
# ============================================================
# 3. SUMMARY TABLES
# ============================================================
# 3.1 Daily violations
table_daily <- df_parking %>%
count(IssueDate) %>%
arrange(IssueDate)
# 3.2 Violations by hour
table_hour <- df_parking %>%
count(Hour) %>%
arrange(Hour)
# 3.3 Top 10 violation locations
table_top_locations <- df_parking %>%
count(Location, sort = TRUE) %>%
slice_head(n = 10)
# 3.4 Top 10 violation descriptions
table_top_violations <- df_parking %>%
count(ViolationDescription, sort = TRUE) %>%
slice_head(n = 10)
# 3.5 Heatmap: location vs hour
table_location_hour <- df_parking %>%
count(Location, Hour)
# ============================================================
# 4. PLOTLY GRAPHS (CONSISTENT COLOR SCHEME)
# ============================================================
# Define a single consistent color for bars and lines
color_main <- 'steelblue'
# 4.1 Violations over time (daily) - original scatter plot
plotly_daily <- plot_ly(table_daily, x = ~IssueDate, y = ~n, type = 'scatter', mode = 'lines+markers',
line = list(color = color_main),
marker = list(color = color_main),
text = ~paste("Date:", IssueDate, "<br>Count:", n),
hoverinfo = "text") %>%
layout(title = "Parking Violations Over Time",
xaxis = list(title = "Date"),
yaxis = list(title = "Count"))
# 4.2 Violations by hour
plotly_hour <- plot_ly(table_hour, x = ~Hour, y = ~n, type = 'bar',
text = ~paste("Hour:", Hour, "<br>Count:", n),
hoverinfo = "text",
marker = list(color = color_main)) %>%
layout(title = "Parking Violations by Hour",
xaxis = list(title = "Hour of Day"),
yaxis = list(title = "Count"))
# 4.3 Top 10 violation locations
plotly_top_locations <- plot_ly(table_top_locations,
x = ~reorder(Location, n),
y = ~n,
type = 'bar',
text = ~paste("Location:", Location, "<br>Count:", n),
hoverinfo = "text",
marker = list(color = color_main)) %>%
layout(title = "Top 10 Parking Violation Locations",
xaxis = list(title = ""),
yaxis = list(title = "Count"))
# 4.4 Top 10 violation descriptions
plotly_top_violations <- plot_ly(table_top_violations,
x = ~reorder(ViolationDescription, n),
y = ~n,
type = 'bar',
text = ~paste("Violation:", ViolationDescription, "<br>Count:", n),
hoverinfo = "text",
marker = list(color = color_main)) %>%
layout(title = "Top 10 Parking Violations",
xaxis = list(title = ""),
yaxis = list(title = "Count"))
# 4.5 Heatmap of location vs hour
plotly_heatmap <- plot_ly(table_location_hour,
x = ~Hour,
y = ~Location,
z = ~n,
type = "heatmap",
colorscale = list(c(0,1), c('white', color_main)),
text = ~paste("Location:", Location, "<br>Hour:", Hour, "<br>Count:", n),
hoverinfo = "text") %>%
layout(title = "Heatmap of Violations by Location and Hour",
xaxis = list(title = "Hour"),
yaxis = list(title = "Location"))
# ============================================================
# 5. COMPLETE DAILY TIME SERIES
# ============================================================
# 5.1 Generate complete sequence of dates
full_dates <- data.frame(
IssueDate = seq(min(table_daily$IssueDate),
max(table_daily$IssueDate),
by = "day")
)
# 5.2 Left join to fill missing dates with 0
table_daily_complete <- full_dates %>%
left_join(table_daily, by = "IssueDate") %>%
mutate(n = ifelse(is.na(n), 0, n))
# 5.3 Updated daily histogram-style bar chart
plotly_daily_complete <- plot_ly(table_daily_complete,
x = ~IssueDate,
y = ~n,
type = 'bar',
text = ~paste("Date:", IssueDate, "<br>Count:", n),
hoverinfo = "text",
marker = list(color = color_main)) %>%
layout(title = "Parking Violations Over Time (Complete Dates)",
xaxis = list(title = "Date"),
yaxis = list(title = "Count"))
# ============================================================
# 6. COMPLETE HOUR TIME SERIES
# ============================================================
# 6.1 Create complete hour sequence (0–23)
full_hours <- data.frame(Hour = 0:23)
# 6.2 Left join to fill missing hours with 0
table_hour_complete <- full_hours %>%
left_join(table_hour, by = "Hour") %>%
mutate(n = ifelse(is.na(n), 0, n))
# 6.3 Updated Plotly bar chart for hours
plotly_hour_complete <- plot_ly(table_hour_complete,
x = ~Hour,
y = ~n,
type = 'bar',
text = ~paste("Hour:", Hour, "<br>Count:", n),
hoverinfo = "text",
marker = list(color = color_main)) %>%
layout(title = "Parking Violations by Hour (Complete Hours)",
xaxis = list(title = "Hour of Day", dtick = 1),
yaxis = list(title = "Count"))
# ============================================================
# 7. OBJECTS FOR R MARKDOWN
# ============================================================
# # Tables
# table_daily
# table_daily_complete
# table_hour
# table_hour_complete
# table_top_locations
# table_top_violations
# table_location_hour
# Plots
# plotly_daily
plotly_daily_complete
# plotly_hour
plotly_hour_complete
plotly_top_locations
plotly_top_violations
plotly_heatmap