library(tidyverse)
## Warning: package 'purrr' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
offenses <- read.csv("Offenses.csv", encoding = "UTF-8")
outcomes <- read.csv("Outcomes.csv", encoding = "UTF-8")
head(offenses)
##   Fiscal.Year Offense.Category
## 1        2024          Traffic
## 2        2024          Traffic
## 3        2024          Traffic
## 4        2024          Traffic
## 5        2024          Traffic
## 6        2024          Traffic
##                                                   Offense.Name Offense.Count
## 1 8-16 YEARS OF AGE-IMPROPER USE OR FAILURE TO USE SAFETY BELT             1
## 2                      DISPLAY UNCLEAN/OBSCURED LICENSE PLATES             4
## 3         DISPLAY WRONG LICENSE PLATE OR REGISTRATION INSIGNIA             4
## 4                      DISREGARD OTHER TRAFFIC SIGNS & SIGNALS             2
## 5                                          DISREGARD RED LIGHT             2
## 6                                          DISREGARD STOP SIGN            14
head(outcomes)
##   Fiscal.Year Outcome.Type               Measure Count
## 1        2023   Teen Court        Cases Referred    20
## 2        2023   Teen Court            Dismissals    16
## 3        2023   Court Wide  Juvenile Cases Filed  1311
## 4        2023   Court Wide          Closed Cases  1083
## 5        2023   Court Wide Number of Convictions   281
## 6        2024   Teen Court        Cases Referred    63
names(offenses)
## [1] "Fiscal.Year"      "Offense.Category" "Offense.Name"     "Offense.Count"
names(outcomes)
## [1] "Fiscal.Year"  "Outcome.Type" "Measure"      "Count"
offenses <- offenses |>
  dplyr::rename(FiscalYear = Fiscal.Year)
names(offenses)
## [1] "FiscalYear"       "Offense.Category" "Offense.Name"     "Offense.Count"
str(offenses)
## 'data.frame':    180 obs. of  4 variables:
##  $ FiscalYear      : int  2024 2024 2024 2024 2024 2024 2024 2024 2024 2024 ...
##  $ Offense.Category: chr  "Traffic" "Traffic" "Traffic" "Traffic" ...
##  $ Offense.Name    : chr  "8-16 YEARS OF AGE-IMPROPER USE OR FAILURE TO USE SAFETY BELT" "DISPLAY UNCLEAN/OBSCURED LICENSE PLATES" "DISPLAY WRONG LICENSE PLATE OR REGISTRATION INSIGNIA" "DISREGARD OTHER TRAFFIC SIGNS & SIGNALS" ...
##  $ Offense.Count   : int  1 4 4 2 2 14 5 6 2 1 ...
str(outcomes)
## 'data.frame':    10 obs. of  4 variables:
##  $ Fiscal.Year : int  2023 2023 2023 2023 2023 2024 2024 2024 2024 2024
##  $ Outcome.Type: chr  "Teen Court" "Teen Court" "Court Wide" "Court Wide" ...
##  $ Measure     : chr  "Cases Referred" "Dismissals" "Juvenile Cases Filed" "Closed Cases" ...
##  $ Count       : int  20 16 1311 1083 281 63 55 1345 1056 194
offense_categories <- offenses %>%
  group_by(FiscalYear, Offense.Category) %>%
  summarise(Total = sum(Offense.Count))
## `summarise()` has grouped output by 'FiscalYear'. You can override using the
## `.groups` argument.
offense_categories
## # A tibble: 4 × 3
## # Groups:   FiscalYear [2]
##   FiscalYear Offense.Category Total
##        <int> <chr>            <int>
## 1       2023 Nontraffic         494
## 2       2023 Traffic            817
## 3       2024 Nontraffic         298
## 4       2024 Traffic           1047
library(dplyr)

total_offenses <- offenses %>%
  group_by(FiscalYear) %>%
  summarise(TotalOffenses = sum(Offense.Count))
library(ggplot2)

ggplot(total_offenses, aes(x = factor(FiscalYear), y = TotalOffenses)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  labs(title = "Total Juvenile Offenses by Fiscal Year",
       x = "Fiscal Year", y = "Total Offenses")

ggplot(offense_categories, aes(x = factor(FiscalYear), y = Total, fill = Offense.Category)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Traffic vs Non-Traffic Offenses by Fiscal Year",
       x = "Fiscal Year", y = "Total Offenses")

names(outcomes)
## [1] "Fiscal.Year"  "Outcome.Type" "Measure"      "Count"
outcomes <- outcomes |>
  dplyr::rename(
    FiscalYear  = Fiscal.Year,
    OutcomeType = Outcome.Type
  )

names(outcomes)
## [1] "FiscalYear"  "OutcomeType" "Measure"     "Count"
teen_court <- outcomes %>%
  dplyr::filter(OutcomeType == "Teen Court") %>%
  dplyr::select(FiscalYear, Measure, Count)

teen_court
##   FiscalYear        Measure Count
## 1       2023 Cases Referred    20
## 2       2023     Dismissals    16
## 3       2024 Cases Referred    63
## 4       2024     Dismissals    55
library(tidyr)

teen_court_summary <- teen_court %>%
  pivot_wider(
    id_cols = FiscalYear,
    names_from = Measure,
    values_from = Count
  )

teen_court_summary
## # A tibble: 2 × 3
##   FiscalYear `Cases Referred` Dismissals
##        <int>            <int>      <int>
## 1       2023               20         16
## 2       2024               63         55
courtwide <- outcomes %>%
  dplyr::filter(OutcomeType == "Court Wide") %>%
  dplyr::select(FiscalYear, Measure, Count) %>%
  tidyr::pivot_wider(
    id_cols = FiscalYear,
    names_from = Measure,
    values_from = Count
  )

courtwide
## # A tibble: 2 × 4
##   FiscalYear `Juvenile Cases Filed` `Closed Cases` `Number of Convictions`
##        <int>                  <int>          <int>                   <int>
## 1       2023                   1311           1083                     281
## 2       2024                   1345           1056                     194
combined <- dplyr::left_join(total_offenses, courtwide, by = "FiscalYear")
traffic_tab <- offense_categories %>%
tidyr::pivot_wider(
id_cols = FiscalYear,
names_from = Offense.Category,
values_from = Total
)

traffic_tab
## # A tibble: 2 × 3
## # Groups:   FiscalYear [2]
##   FiscalYear Nontraffic Traffic
##        <int>      <int>   <int>
## 1       2023        494     817
## 2       2024        298    1047
tab <- as.matrix(traffic_tab[, -1]) # drop FiscalYear column
rownames(tab) <- traffic_tab$FiscalYear

tab
##      Nontraffic Traffic
## 2023        494     817
## 2024        298    1047
chisq_traffic <- chisq.test(tab)
chisq_traffic
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  tab
## X-squared = 75.722, df = 1, p-value < 2.2e-16
dismissals <- teen_court %>%
dplyr::filter(Measure == "Dismissals") %>%
dplyr::arrange(FiscalYear) %>%
dplyr::pull(Count)

referred <- teen_court %>%
dplyr::filter(Measure == "Cases Referred") %>%
dplyr::arrange(FiscalYear) %>%
dplyr::pull(Count)

dismissals
## [1] 16 55
referred
## [1] 20 63
teen_court_prop <- prop.test(dismissals, referred)
## Warning in prop.test(dismissals, referred): Chi-squared approximation may be
## incorrect
teen_court_prop
## 
##  2-sample test for equality of proportions with continuity correction
## 
## data:  dismissals out of referred
## X-squared = 0.19717, df = 1, p-value = 0.657
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.2995793  0.1535475
## sample estimates:
##    prop 1    prop 2 
## 0.8000000 0.8730159
courtwide <- outcomes %>%
dplyr::filter(OutcomeType == "Court Wide") %>%
dplyr::select(FiscalYear, Measure, Count) %>%
tidyr::pivot_wider(
id_cols = FiscalYear,
names_from = Measure,
values_from = Count
)

courtwide
## # A tibble: 2 × 4
##   FiscalYear `Juvenile Cases Filed` `Closed Cases` `Number of Convictions`
##        <int>                  <int>          <int>                   <int>
## 1       2023                   1311           1083                     281
## 2       2024                   1345           1056                     194
names(courtwide)
## [1] "FiscalYear"            "Juvenile Cases Filed"  "Closed Cases"         
## [4] "Number of Convictions"
convictions <- courtwide %>%
dplyr::arrange("FiscalYear") %>%
dplyr::pull("Number of Convictions")

filings <- courtwide %>%
dplyr::arrange("FiscalYear") %>%
dplyr::pull("Juvenile Cases Filed")

convictions
## [1] 281 194
filings
## [1] 1311 1345
conviction_prop <- prop.test(convictions, filings)
conviction_prop
## 
##  2-sample test for equality of proportions with continuity correction
## 
## data:  convictions out of filings
## X-squared = 21.741, df = 1, p-value = 3.12e-06
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  0.04026349 0.09994107
## sample estimates:
##    prop 1    prop 2 
## 0.2143402 0.1442379
combined <- dplyr::left_join(total_offenses, courtwide, by = "FiscalYear")
combined
## # A tibble: 2 × 5
##   FiscalYear TotalOffenses `Juvenile Cases Filed` `Closed Cases`
##        <int>         <int>                  <int>          <int>
## 1       2023          1311                   1311           1083
## 2       2024          1345                   1345           1056
## # ℹ 1 more variable: `Number of Convictions` <int>
names(combined)
## [1] "FiscalYear"            "TotalOffenses"         "Juvenile Cases Filed" 
## [4] "Closed Cases"          "Number of Convictions"
library(dplyr)

# Look at the combined table just to confirm
combined
## # A tibble: 2 × 5
##   FiscalYear TotalOffenses `Juvenile Cases Filed` `Closed Cases`
##        <int>         <int>                  <int>          <int>
## 1       2023          1311                   1311           1083
## 2       2024          1345                   1345           1056
## # ℹ 1 more variable: `Number of Convictions` <int>
# Extract convictions and filings in order of FiscalYear
convictions <- combined %>%
  arrange(FiscalYear) %>%
  pull(`Number of Convictions`)

filings <- combined %>%
  arrange(FiscalYear) %>%
  pull(`Juvenile Cases Filed`)

convictions
## [1] 281 194
filings
## [1] 1311 1345
# Proportion test: are conviction rates different across years?
conviction_prop <- prop.test(convictions, filings)
conviction_prop
## 
##  2-sample test for equality of proportions with continuity correction
## 
## data:  convictions out of filings
## X-squared = 21.741, df = 1, p-value = 3.12e-06
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  0.04026349 0.09994107
## sample estimates:
##    prop 1    prop 2 
## 0.2143402 0.1442379