library(tidyverse)
## Warning: package 'purrr' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
offenses <- read.csv("Offenses.csv", encoding = "UTF-8")
outcomes <- read.csv("Outcomes.csv", encoding = "UTF-8")
head(offenses)
## Fiscal.Year Offense.Category
## 1 2024 Traffic
## 2 2024 Traffic
## 3 2024 Traffic
## 4 2024 Traffic
## 5 2024 Traffic
## 6 2024 Traffic
## Offense.Name Offense.Count
## 1 8-16 YEARS OF AGE-IMPROPER USE OR FAILURE TO USE SAFETY BELT 1
## 2 DISPLAY UNCLEAN/OBSCURED LICENSE PLATES 4
## 3 DISPLAY WRONG LICENSE PLATE OR REGISTRATION INSIGNIA 4
## 4 DISREGARD OTHER TRAFFIC SIGNS & SIGNALS 2
## 5 DISREGARD RED LIGHT 2
## 6 DISREGARD STOP SIGN 14
head(outcomes)
## Fiscal.Year Outcome.Type Measure Count
## 1 2023 Teen Court Cases Referred 20
## 2 2023 Teen Court Dismissals 16
## 3 2023 Court Wide Juvenile Cases Filed 1311
## 4 2023 Court Wide Closed Cases 1083
## 5 2023 Court Wide Number of Convictions 281
## 6 2024 Teen Court Cases Referred 63
names(offenses)
## [1] "Fiscal.Year" "Offense.Category" "Offense.Name" "Offense.Count"
names(outcomes)
## [1] "Fiscal.Year" "Outcome.Type" "Measure" "Count"
offenses <- offenses |>
dplyr::rename(FiscalYear = Fiscal.Year)
names(offenses)
## [1] "FiscalYear" "Offense.Category" "Offense.Name" "Offense.Count"
str(offenses)
## 'data.frame': 180 obs. of 4 variables:
## $ FiscalYear : int 2024 2024 2024 2024 2024 2024 2024 2024 2024 2024 ...
## $ Offense.Category: chr "Traffic" "Traffic" "Traffic" "Traffic" ...
## $ Offense.Name : chr "8-16 YEARS OF AGE-IMPROPER USE OR FAILURE TO USE SAFETY BELT" "DISPLAY UNCLEAN/OBSCURED LICENSE PLATES" "DISPLAY WRONG LICENSE PLATE OR REGISTRATION INSIGNIA" "DISREGARD OTHER TRAFFIC SIGNS & SIGNALS" ...
## $ Offense.Count : int 1 4 4 2 2 14 5 6 2 1 ...
str(outcomes)
## 'data.frame': 10 obs. of 4 variables:
## $ Fiscal.Year : int 2023 2023 2023 2023 2023 2024 2024 2024 2024 2024
## $ Outcome.Type: chr "Teen Court" "Teen Court" "Court Wide" "Court Wide" ...
## $ Measure : chr "Cases Referred" "Dismissals" "Juvenile Cases Filed" "Closed Cases" ...
## $ Count : int 20 16 1311 1083 281 63 55 1345 1056 194
offense_categories <- offenses %>%
group_by(FiscalYear, Offense.Category) %>%
summarise(Total = sum(Offense.Count))
## `summarise()` has grouped output by 'FiscalYear'. You can override using the
## `.groups` argument.
offense_categories
## # A tibble: 4 × 3
## # Groups: FiscalYear [2]
## FiscalYear Offense.Category Total
## <int> <chr> <int>
## 1 2023 Nontraffic 494
## 2 2023 Traffic 817
## 3 2024 Nontraffic 298
## 4 2024 Traffic 1047
library(dplyr)
total_offenses <- offenses %>%
group_by(FiscalYear) %>%
summarise(TotalOffenses = sum(Offense.Count))
library(ggplot2)
ggplot(total_offenses, aes(x = factor(FiscalYear), y = TotalOffenses)) +
geom_bar(stat = "identity", fill = "steelblue") +
labs(title = "Total Juvenile Offenses by Fiscal Year",
x = "Fiscal Year", y = "Total Offenses")

ggplot(offense_categories, aes(x = factor(FiscalYear), y = Total, fill = Offense.Category)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Traffic vs Non-Traffic Offenses by Fiscal Year",
x = "Fiscal Year", y = "Total Offenses")

names(outcomes)
## [1] "Fiscal.Year" "Outcome.Type" "Measure" "Count"
outcomes <- outcomes |>
dplyr::rename(
FiscalYear = Fiscal.Year,
OutcomeType = Outcome.Type
)
names(outcomes)
## [1] "FiscalYear" "OutcomeType" "Measure" "Count"
teen_court <- outcomes %>%
dplyr::filter(OutcomeType == "Teen Court") %>%
dplyr::select(FiscalYear, Measure, Count)
teen_court
## FiscalYear Measure Count
## 1 2023 Cases Referred 20
## 2 2023 Dismissals 16
## 3 2024 Cases Referred 63
## 4 2024 Dismissals 55
library(tidyr)
teen_court_summary <- teen_court %>%
pivot_wider(
id_cols = FiscalYear,
names_from = Measure,
values_from = Count
)
teen_court_summary
## # A tibble: 2 × 3
## FiscalYear `Cases Referred` Dismissals
## <int> <int> <int>
## 1 2023 20 16
## 2 2024 63 55
courtwide <- outcomes %>%
dplyr::filter(OutcomeType == "Court Wide") %>%
dplyr::select(FiscalYear, Measure, Count) %>%
tidyr::pivot_wider(
id_cols = FiscalYear,
names_from = Measure,
values_from = Count
)
courtwide
## # A tibble: 2 × 4
## FiscalYear `Juvenile Cases Filed` `Closed Cases` `Number of Convictions`
## <int> <int> <int> <int>
## 1 2023 1311 1083 281
## 2 2024 1345 1056 194
combined <- dplyr::left_join(total_offenses, courtwide, by = "FiscalYear")
traffic_tab <- offense_categories %>%
tidyr::pivot_wider(
id_cols = FiscalYear,
names_from = Offense.Category,
values_from = Total
)
traffic_tab
## # A tibble: 2 × 3
## # Groups: FiscalYear [2]
## FiscalYear Nontraffic Traffic
## <int> <int> <int>
## 1 2023 494 817
## 2 2024 298 1047
tab <- as.matrix(traffic_tab[, -1]) # drop FiscalYear column
rownames(tab) <- traffic_tab$FiscalYear
tab
## Nontraffic Traffic
## 2023 494 817
## 2024 298 1047
chisq_traffic <- chisq.test(tab)
chisq_traffic
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: tab
## X-squared = 75.722, df = 1, p-value < 2.2e-16
dismissals <- teen_court %>%
dplyr::filter(Measure == "Dismissals") %>%
dplyr::arrange(FiscalYear) %>%
dplyr::pull(Count)
referred <- teen_court %>%
dplyr::filter(Measure == "Cases Referred") %>%
dplyr::arrange(FiscalYear) %>%
dplyr::pull(Count)
dismissals
## [1] 16 55
referred
## [1] 20 63
teen_court_prop <- prop.test(dismissals, referred)
## Warning in prop.test(dismissals, referred): Chi-squared approximation may be
## incorrect
teen_court_prop
##
## 2-sample test for equality of proportions with continuity correction
##
## data: dismissals out of referred
## X-squared = 0.19717, df = 1, p-value = 0.657
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.2995793 0.1535475
## sample estimates:
## prop 1 prop 2
## 0.8000000 0.8730159
courtwide <- outcomes %>%
dplyr::filter(OutcomeType == "Court Wide") %>%
dplyr::select(FiscalYear, Measure, Count) %>%
tidyr::pivot_wider(
id_cols = FiscalYear,
names_from = Measure,
values_from = Count
)
courtwide
## # A tibble: 2 × 4
## FiscalYear `Juvenile Cases Filed` `Closed Cases` `Number of Convictions`
## <int> <int> <int> <int>
## 1 2023 1311 1083 281
## 2 2024 1345 1056 194
names(courtwide)
## [1] "FiscalYear" "Juvenile Cases Filed" "Closed Cases"
## [4] "Number of Convictions"
convictions <- courtwide %>%
dplyr::arrange("FiscalYear") %>%
dplyr::pull("Number of Convictions")
filings <- courtwide %>%
dplyr::arrange("FiscalYear") %>%
dplyr::pull("Juvenile Cases Filed")
convictions
## [1] 281 194
filings
## [1] 1311 1345
conviction_prop <- prop.test(convictions, filings)
conviction_prop
##
## 2-sample test for equality of proportions with continuity correction
##
## data: convictions out of filings
## X-squared = 21.741, df = 1, p-value = 3.12e-06
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## 0.04026349 0.09994107
## sample estimates:
## prop 1 prop 2
## 0.2143402 0.1442379
combined <- dplyr::left_join(total_offenses, courtwide, by = "FiscalYear")
combined
## # A tibble: 2 × 5
## FiscalYear TotalOffenses `Juvenile Cases Filed` `Closed Cases`
## <int> <int> <int> <int>
## 1 2023 1311 1311 1083
## 2 2024 1345 1345 1056
## # ℹ 1 more variable: `Number of Convictions` <int>
names(combined)
## [1] "FiscalYear" "TotalOffenses" "Juvenile Cases Filed"
## [4] "Closed Cases" "Number of Convictions"
library(dplyr)
# Look at the combined table just to confirm
combined
## # A tibble: 2 × 5
## FiscalYear TotalOffenses `Juvenile Cases Filed` `Closed Cases`
## <int> <int> <int> <int>
## 1 2023 1311 1311 1083
## 2 2024 1345 1345 1056
## # ℹ 1 more variable: `Number of Convictions` <int>
# Extract convictions and filings in order of FiscalYear
convictions <- combined %>%
arrange(FiscalYear) %>%
pull(`Number of Convictions`)
filings <- combined %>%
arrange(FiscalYear) %>%
pull(`Juvenile Cases Filed`)
convictions
## [1] 281 194
filings
## [1] 1311 1345
# Proportion test: are conviction rates different across years?
conviction_prop <- prop.test(convictions, filings)
conviction_prop
##
## 2-sample test for equality of proportions with continuity correction
##
## data: convictions out of filings
## X-squared = 21.741, df = 1, p-value = 3.12e-06
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## 0.04026349 0.09994107
## sample estimates:
## prop 1 prop 2
## 0.2143402 0.1442379