library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(stringr)
# Main individual-level dataset
juvenile_raw <- read_excel("juvenile_dataset.xlsx")
# Section totals (optional for context)
section_totals <- read_excel("juvenile_section_totals.xlsx")
# Info / metadata (optional for context)
info <- read_excel("juvenile_data_information.xlsx")
# Quick look at the raw case-level data
dplyr::glimpse(juvenile_raw)
## Rows: 649
## Columns: 23
## $ FiscalYear <dbl> 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2…
## $ ViolationDate <dttm> 2023-01-04, 2023-01-05, 2023-01-05, 2023-01-09, …
## $ ViolTime <dttm> 1899-12-31 15:02:00, 1899-12-31 10:31:00, 1899-1…
## $ Filed <dttm> 2023-08-28, 2023-03-09, 2023-03-09, 2023-08-28, …
## $ StatusDate <dttm> 2025-01-14, 2023-10-19, 2023-10-06, 2024-10-23, …
## $ ConvictionDate <dttm> NA, 2023-10-19, NA, 2024-10-23, 2024-06-10, 2025…
## $ PleaDate <dttm> NA, 2023-04-04, 2023-05-02, 2023-10-10, 2023-08-…
## $ Plea <chr> NA, "NC", "NC", "NC", "NC", "NC", NA, "NC", "NC",…
## $ DefendantZipcode1 <dbl> 78229, 78228, 78228, 78228, 78224, 78224, 78242, …
## $ OffenseCode <chr> "2024A", "2017A", "2017A", "3006A", "6309A", "630…
## $ OffenseDesc <chr> "DISORDERLY CONDUCT-FIGHTING", "FAIL TO INDENTIFY…
## $ ViolPaid <dbl> 0, 0, 0, 0, 0, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 212…
## $ ViolFine <dbl> 0, 100, 0, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 131,…
## $ ViolFees <dbl> 81, 81, 0, 81, 81, 96, 0, 81, 0, 0, 150, 0, 0, 81…
## $ AmtDue <dbl> 81, 0, 0, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 81, 0, …
## $ Status <chr> "JR", "CL", "WD", "CL", "CL", "CL", "DP", "CL", "…
## $ StatusDescription <chr> "AUTO RESET-JUVENILE APPEARANCE", "DOCKET CLOSED"…
## $ AgeAtTimeOfViolation <dbl> 14, 14, 14, 12, 15, 15, 16, 15, 13, 16, 16, 16, 1…
## $ Race <chr> "W", "H", "H", "W", "W", "W", "H", "H", "W", "H",…
## $ Gender <chr> "M", "M", "M", "M", "M", "M", "F", "M", "M", "M",…
## $ CaseType <chr> "NTRF", "NTRF", "NTRF", "NTRF", "NTRF", "NTRF", "…
## $ PersonViolCtAll <dbl> 1, 1, 1, 1, 5, 5, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ PersonViolCtNTRF <dbl> 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
juvenile <- juvenile_raw %>%
mutate(
FiscalYear = as.factor(FiscalYear),
ViolationDate = as.Date(ViolationDate),
Filed = as.Date(Filed),
StatusDate = as.Date(StatusDate),
ConvictionDate = as.Date(ConvictionDate),
PleaDate = as.Date(PleaDate),
age = AgeAtTimeOfViolation,
gender = str_to_title(Gender),
race = str_to_title(Race),
zipcode = sprintf("%05d", DefendantZipcode1),
total_violations = PersonViolCtAll,
cited_violations = PersonViolCtNTRF,
repeat_offender = ifelse(total_violations > 1, 1, 0)
) %>%
filter(CaseType == "NTRF")
research_data <- juvenile %>%
select(
FiscalYear,
age,
race,
gender,
zipcode,
total_violations,
cited_violations,
repeat_offender,
)
head(research_data)
## # A tibble: 6 × 8
## FiscalYear age race gender zipcode total_violations cited_violations
## <fct> <dbl> <chr> <chr> <chr> <dbl> <dbl>
## 1 2023 14 W M 78229 1 1
## 2 2023 14 H M 78228 1 1
## 3 2023 14 H M 78228 1 1
## 4 2023 12 W M 78228 1 1
## 5 2023 15 W M 78224 5 2
## 6 2023 15 W M 78224 5 2
## # ℹ 1 more variable: repeat_offender <dbl>
# Number of cases (rows)
n_cases <- nrow(research_data)
# Number of unique zip codes
n_zip <- dplyr::n_distinct(research_data$zipcode)
# Number of unique races and genders
n_race <- dplyr::n_distinct(research_data$race)
n_gender <- dplyr::n_distinct(research_data$gender)
cat("Total cases:", n_cases, "\n")
## Total cases: 649
cat("Unique zip codes:", n_zip, "\n")
## Unique zip codes: 77
cat("Race groups:", n_race, "\n")
## Race groups: 7
cat("Gender groups:", n_gender, "\n")
## Gender groups: 4
age_summary <- research_data %>%
summarise(
n = sum(!is.na(age)),
mean = mean(age, na.rm = TRUE),
sd = sd(age, na.rm = TRUE),
min = min(age, na.rm = TRUE),
q1 = quantile(age, 0.25, na.rm = TRUE),
median = median(age, na.rm = TRUE),
q3 = quantile(age, 0.75, na.rm = TRUE),
max = max(age, na.rm = TRUE)
)
age_summary
## # A tibble: 1 × 8
## n mean sd min q1 median q3 max
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 649 14.2 1.58 2 13 14 15 16
gender_summary <- research_data %>%
count(gender) %>%
mutate(percent = round(100 * n / sum(n), 1))
gender_summary
## # A tibble: 4 × 3
## gender n percent
## <chr> <int> <dbl>
## 1 F 270 41.6
## 2 M 333 51.3
## 3 U 23 3.5
## 4 <NA> 23 3.5
race_summary <- research_data %>%
count(race) %>%
mutate(percent = round(100 * n / sum(n), 1))
race_summary
## # A tibble: 7 × 3
## race n percent
## <chr> <int> <dbl>
## 1 A 7 1.1
## 2 B 68 10.5
## 3 H 240 37
## 4 U 26 4
## 5 W 275 42.4
## 6 X 1 0.2
## 7 <NA> 32 4.9
viol_summary <- research_data %>%
summarise(
mean_total = mean(total_violations, na.rm = TRUE),
sd_total = sd(total_violations, na.rm = TRUE),
min_total = min(total_violations, na.rm = TRUE),
max_total = max(total_violations, na.rm = TRUE),
mean_cited = mean(cited_violations, na.rm = TRUE),
sd_cited = sd(cited_violations, na.rm = TRUE),
min_cited = min(cited_violations, na.rm = TRUE),
max_cited = max(cited_violations, na.rm = TRUE),
repeat_rate = mean(repeat_offender) * 100
)
viol_summary
## # A tibble: 1 × 9
## mean_total sd_total min_total max_total mean_cited sd_cited min_cited
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1.24 0.692 1 7 1.14 0.440 1
## # ℹ 2 more variables: max_cited <dbl>, repeat_rate <dbl>
corr_vars <- research_data %>%
select(age, total_violations, cited_violations, repeat_offender)
cor_matrix <- cor(corr_vars, use = "pairwise.complete.obs")
cor_matrix
## age total_violations cited_violations repeat_offender
## age 1.000000000 0.06401434 -0.008361656 0.05107145
## total_violations 0.064014343 1.00000000 0.691738067 0.82899804
## cited_violations -0.008361656 0.69173807 1.000000000 0.77682409
## repeat_offender 0.051071446 0.82899804 0.776824094 1.00000000
cor.test(research_data$age, research_data$total_violations)
##
## Pearson's product-moment correlation
##
## data: research_data$age and research_data$total_violations
## t = 1.6316, df = 647, p-value = 0.1032
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.01301102 0.14028447
## sample estimates:
## cor
## 0.06401434
cor.test(research_data$age, research_data$cited_violations)
##
## Pearson's product-moment correlation
##
## data: research_data$age and research_data$cited_violations
## t = -0.2127, df = 647, p-value = 0.8316
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.08526805 0.06864378
## sample estimates:
## cor
## -0.008361656
cor.test(research_data$total_violations, research_data$cited_violations)
##
## Pearson's product-moment correlation
##
## data: research_data$total_violations and research_data$cited_violations
## t = 24.365, df = 647, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6493461 0.7298446
## sample estimates:
## cor
## 0.6917381
ggplot(research_data, aes(x = age)) +
geom_histogram(bins = 10) +
labs(title = "Age Distribution",
x = "Age",
y = "Case Count")

ggplot(research_data, aes(x = total_violations)) +
geom_histogram(binwidth = 1) +
labs(title = "Total Violations per Person",
x = "Total Violations",
y = "Count")

ggplot(research_data, aes(x = gender, y = total_violations)) +
geom_boxplot() +
labs(title = "Total Violations by Gender",
x = "Gender",
y = "Total Violations")

ggplot(research_data, aes(x = race, y = total_violations)) +
geom_boxplot() +
coord_flip() +
labs(title = "Total Violations by Race",
x = "Race",
y = "Total Violations")

ggplot(research_data, aes(x = age, y = total_violations)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "Age vs Total Violations",
x = "Age",
y = "Total Violations")
## `geom_smooth()` using formula = 'y ~ x'

model_data <- research_data %>%
mutate(
gender = factor(gender),
race = factor(race),
repeat_offender = factor(repeat_offender)
)
logit_model <- glm(
repeat_offender ~ age + gender + race,
data = model_data,
family = binomial
)
summary(logit_model)
##
## Call:
## glm(formula = repeat_offender ~ age + gender + race, family = binomial,
## data = model_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -17.31422 543.87728 -0.032 0.97460
## age 0.08400 0.07745 1.085 0.27809
## genderM 0.73197 0.24938 2.935 0.00333 **
## genderU 1.02225 1.15438 0.886 0.37586
## raceB 14.44249 543.87619 0.027 0.97881
## raceH 13.95809 543.87614 0.026 0.97953
## raceU 13.53256 543.87728 0.025 0.98015
## raceW 13.78665 543.87614 0.025 0.97978
## raceX 0.57217 1553.69986 0.000 0.99971
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 523.18 on 616 degrees of freedom
## Residual deviance: 506.53 on 608 degrees of freedom
## (32 observations deleted due to missingness)
## AIC: 524.53
##
## Number of Fisher Scoring iterations: 14
lm_model <- lm(
total_violations ~ age + gender + race,
data = model_data
)
summary(lm_model)
##
## Call:
## lm(formula = total_violations ~ age + gender + race, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5481 -0.3429 -0.1656 -0.0701 5.6100
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.48474 0.36579 1.325 0.186
## age 0.02389 0.01778 1.344 0.179
## genderM 0.24828 0.05766 4.306 1.94e-05 ***
## genderU 0.17977 0.28554 0.630 0.529
## raceB 0.43278 0.27644 1.566 0.118
## raceH 0.29859 0.26680 1.119 0.264
## raceU 0.16994 0.37460 0.454 0.650
## raceW 0.25151 0.26639 0.944 0.345
## raceX 0.18075 0.74467 0.243 0.808
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6955 on 608 degrees of freedom
## (32 observations deleted due to missingness)
## Multiple R-squared: 0.04059, Adjusted R-squared: 0.02797
## F-statistic: 3.216 on 8 and 608 DF, p-value: 0.001372
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.4.3
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
corr_vars <- research_data %>%
select(age, total_violations, cited_violations, repeat_offender)
cor_matrix <- cor(corr_vars, use = "pairwise.complete.obs")
# Convert to long format for ggplot
cor_melt <- melt(cor_matrix)
ggplot(cor_melt, aes(x = Var1, y = Var2, fill = value)) +
geom_tile() +
geom_text(aes(label = round(value, 2)), color = "white") +
scale_fill_gradient2(
low = "blue",
mid = "white",
high = "red",
midpoint = 0
) +
labs(
title = "Correlation Heatmap",
x = "Variable",
y = "Variable",
fill = "Correlation"
) +
theme_minimal()

heat_data <- research_data %>%
count(race, repeat_offender)
ggplot(heat_data, aes(x = race, y = repeat_offender, fill = n)) +
geom_tile(color = "white") +
geom_text(aes(label = n), color = "white") +
scale_fill_gradient(low = "lightblue", high = "red") +
labs(
title = "Heatmap of Repeat Offenders by Race",
x = "Race",
y = "Repeat Offender",
fill = "Count"
) +
theme_minimal()

shapiro.test(research_data$age)
##
## Shapiro-Wilk normality test
##
## data: research_data$age
## W = 0.87602, p-value < 2.2e-16
shapiro.test(research_data$total_violations)
##
## Shapiro-Wilk normality test
##
## data: research_data$total_violations
## W = 0.39798, p-value < 2.2e-16
shapiro.test(research_data$cited_violations)
##
## Shapiro-Wilk normality test
##
## data: research_data$cited_violations
## W = 0.36295, p-value < 2.2e-16
# QQ plots for age, total_violations, cited_violations
par(mfrow = c(1, 3))
qqnorm(research_data$age, main = "QQ Plot: Age")
qqline(research_data$age)
qqnorm(research_data$total_violations, main = "QQ Plot: Total Violations")
qqline(research_data$total_violations)
qqnorm(research_data$cited_violations, main = "QQ Plot: Cited Violations")
qqline(research_data$cited_violations)

par(mfrow = c(1, 1))
gender_tf <- research_data %>%
filter(gender %in% c("M", "F"))
table(gender_tf$gender)
##
## F M
## 270 333
t_test_gender <- t.test(total_violations ~ gender, data = gender_tf)
t_test_gender
##
## Welch Two Sample t-test
##
## data: total_violations by gender
## t = -4.4234, df = 454.82, p-value = 1.217e-05
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
## -0.3406104 -0.1310612
## sample estimates:
## mean in group F mean in group M
## 1.118519 1.354354
wilcox_gender <- wilcox.test(total_violations ~ gender, data = gender_tf)
wilcox_gender
##
## Wilcoxon rank sum test with continuity correction
##
## data: total_violations by gender
## W = 41020, p-value = 0.002949
## alternative hypothesis: true location shift is not equal to 0
# One-way ANOVA: total violations by race
anova_race <- aov(total_violations ~ race, data = research_data)
summary(anova_race)
## Df Sum Sq Mean Sq F value Pr(>F)
## race 5 2.53 0.5059 1.017 0.407
## Residuals 611 304.02 0.4976
## 32 observations deleted due to missingness
kruskal_race <- kruskal.test(total_violations ~ race, data = research_data)
kruskal_race
##
## Kruskal-Wallis rank sum test
##
## data: total_violations by race
## Kruskal-Wallis chi-squared = 5.2211, df = 5, p-value = 0.3895
# Select numeric variables
corr_vars <- research_data %>%
select(age, total_violations, cited_violations, repeat_offender)
cor_matrix <- cor(corr_vars, use = "pairwise.complete.obs")
cor_matrix
## age total_violations cited_violations repeat_offender
## age 1.000000000 0.06401434 -0.008361656 0.05107145
## total_violations 0.064014343 1.00000000 0.691738067 0.82899804
## cited_violations -0.008361656 0.69173807 1.000000000 0.77682409
## repeat_offender 0.051071446 0.82899804 0.776824094 1.00000000
# Correlation significance tests
cor.test(research_data$age, research_data$total_violations)
##
## Pearson's product-moment correlation
##
## data: research_data$age and research_data$total_violations
## t = 1.6316, df = 647, p-value = 0.1032
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.01301102 0.14028447
## sample estimates:
## cor
## 0.06401434
cor.test(research_data$age, research_data$cited_violations)
##
## Pearson's product-moment correlation
##
## data: research_data$age and research_data$cited_violations
## t = -0.2127, df = 647, p-value = 0.8316
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.08526805 0.06864378
## sample estimates:
## cor
## -0.008361656
cor.test(research_data$total_violations, research_data$cited_violations)
##
## Pearson's product-moment correlation
##
## data: research_data$total_violations and research_data$cited_violations
## t = 24.365, df = 647, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6493461 0.7298446
## sample estimates:
## cor
## 0.6917381
model_data <- research_data %>%
mutate(
gender = factor(gender),
race = factor(race),
repeat_offender = factor(repeat_offender)
) %>%
filter(!is.na(repeat_offender))
logit_model <- glm(
repeat_offender ~ age + gender + race,
data = model_data,
family = binomial
)
summary(logit_model)
##
## Call:
## glm(formula = repeat_offender ~ age + gender + race, family = binomial,
## data = model_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -17.31422 543.87728 -0.032 0.97460
## age 0.08400 0.07745 1.085 0.27809
## genderM 0.73197 0.24938 2.935 0.00333 **
## genderU 1.02225 1.15438 0.886 0.37586
## raceB 14.44249 543.87619 0.027 0.97881
## raceH 13.95809 543.87614 0.026 0.97953
## raceU 13.53256 543.87728 0.025 0.98015
## raceW 13.78665 543.87614 0.025 0.97978
## raceX 0.57217 1553.69986 0.000 0.99971
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 523.18 on 616 degrees of freedom
## Residual deviance: 506.53 on 608 degrees of freedom
## (32 observations deleted due to missingness)
## AIC: 524.53
##
## Number of Fisher Scoring iterations: 14
lm_model <- lm(
total_violations ~ age + gender + race,
data = model_data
)
summary(lm_model)
##
## Call:
## lm(formula = total_violations ~ age + gender + race, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5481 -0.3429 -0.1656 -0.0701 5.6100
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.48474 0.36579 1.325 0.186
## age 0.02389 0.01778 1.344 0.179
## genderM 0.24828 0.05766 4.306 1.94e-05 ***
## genderU 0.17977 0.28554 0.630 0.529
## raceB 0.43278 0.27644 1.566 0.118
## raceH 0.29859 0.26680 1.119 0.264
## raceU 0.16994 0.37460 0.454 0.650
## raceW 0.25151 0.26639 0.944 0.345
## raceX 0.18075 0.74467 0.243 0.808
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6955 on 608 degrees of freedom
## (32 observations deleted due to missingness)
## Multiple R-squared: 0.04059, Adjusted R-squared: 0.02797
## F-statistic: 3.216 on 8 and 608 DF, p-value: 0.001372
# Load diversion outcomes
info_all <- readxl::read_excel("juvenile_data_information.xlsx", sheet = "Outcomes")
info_all
## # A tibble: 12 × 3
## `Youth Diversion (Teen Court)` `FY 2023` `FY 2024`
## <chr> <chr> <chr>
## 1 <NA> 10/1/2022-9/30/2023 10/1/2023-9/30/2024
## 2 # Cases Referred 20 63
## 3 # Dismissals 16 55
## 4 % Cases ending in dismissal 0.8 0.87301587301587302
## 5 <NA> <NA> <NA>
## 6 <NA> <NA> <NA>
## 7 Juvenile Case Data FY 2023 FY 2024
## 8 <NA> 10/1/2022-9/30/2023 10/1/2023-9/30/2024
## 9 Juvenile Filings 1311 1345
## 10 Closed Cases 1083 1056
## 11 Number of Convictions 281 194
## 12 % Cases ending in conviction 0.25946445060018469 0.18371212121212119
diversion <- info_all[1:4, ] # first 4 rows contain diversion data
names(diversion) <- c("Metric", "FY2023", "FY2024")
diversion
## # A tibble: 4 × 3
## Metric FY2023 FY2024
## <chr> <chr> <chr>
## 1 <NA> 10/1/2022-9/30/2023 10/1/2023-9/30/2024
## 2 # Cases Referred 20 63
## 3 # Dismissals 16 55
## 4 % Cases ending in dismissal 0.8 0.87301587301587302
library(tidyr)
library(ggplot2)
diversion_long <- diversion %>%
pivot_longer(cols = c(FY2023, FY2024),
names_to = "FiscalYear",
values_to = "Value")
ggplot(diversion_long[-1,], aes(x = Metric, y = as.numeric(Value), fill = FiscalYear)) +
geom_col(position = "dodge") +
labs(
title = "Diversion Program Outcomes by Fiscal Year",
x = "Metric",
y = "Count"
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_col()`).

library(readxl)
library(dplyr)
library(tidyr)
section_totals <- read_excel("juvenile_section_totals.xlsx", sheet = "Sheet1")
diversion_cats <- c(
"Peer Court",
"Teen Court",
"Mediations",
"Mediation Participation",
"Youth Enrichment Class",
"Youth Enrichment Class (YEC)",
"School Based Agreements",
"UTSA Classes",
"Parenting Classes"
)
diversion_totals <- section_totals %>%
filter(Category %in% diversion_cats)
diversion_totals
## # A tibble: 23 × 4
## FiscalYear Category Metric Total
## <chr> <chr> <chr> <dbl>
## 1 FY2023 Peer Court Referred 33
## 2 FY2023 Peer Court Completed 9
## 3 FY2023 Mediations Letters Sent 2615
## 4 FY2023 Mediations Showed 1201
## 5 FY2023 Youth Enrichment Class Number of Youth Served 332
## 6 FY2023 Mediation Participation % of Youth/Family attending medi… 0.46
## 7 FY2024 Mediations Letters Sent 5159
## 8 FY2024 Mediations Appeared/Resolved 2265
## 9 FY2024 Mediations Percentage Appeared/Resolved 3.03
## 10 FY2024 School Based Agreements Letters Sent 2099
## # ℹ 13 more rows
diversion_wide <- diversion_totals %>%
pivot_wider(
id_cols = c(FiscalYear, Category),
names_from = Metric,
values_from = Total
)
diversion_wide
## # A tibble: 10 × 21
## FiscalYear Category Referred Completed `Letters Sent` Showed
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 FY2023 Peer Court 33 9 NA NA
## 2 FY2023 Mediations NA NA 2615 1201
## 3 FY2023 Youth Enrichment Class NA NA NA NA
## 4 FY2023 Mediation Participation NA NA NA NA
## 5 FY2024 Mediations NA NA 5159 NA
## 6 FY2024 School Based Agreements NA NA 2099 NA
## 7 FY2024 Youth Enrichment Class (… NA NA NA NA
## 8 FY2024 Teen Court NA NA NA NA
## 9 FY2024 UTSA Classes NA NA NA NA
## 10 FY2024 Parenting Classes NA NA NA NA
## # ℹ 15 more variables: `Number of Youth Served` <dbl>,
## # `% of Youth/Family attending mediation` <dbl>, `Appeared/Resolved` <dbl>,
## # `Percentage Appeared/Resolved` <dbl>, Attended <dbl>,
## # `Percentage Appeared` <dbl>, Assigned <dbl>, `Attended/Completed` <dbl>,
## # `Percentage Attended/Completed` <dbl>, `Cases Referred` <dbl>,
## # `Cases Processed` <dbl>, `Cases Closed` <dbl>, `Appeared/Completed` <dbl>,
## # `Percentage Appeared/Completed` <dbl>, Participants <dbl>
# Load section totals
section_totals <- readxl::read_excel("juvenile_section_totals.xlsx", sheet = "Sheet1")
# Load info file (contains diversion outcomes)
info_outcomes <- readxl::read_excel("juvenile_data_information.xlsx", sheet = "Outcomes")
# Clean outcomes into readable format
diversion_outcomes <- info_outcomes[1:4, ]
colnames(diversion_outcomes) <- c("Metric", "FY2023", "FY2024")
diversion_outcomes
## # A tibble: 4 × 3
## Metric FY2023 FY2024
## <chr> <chr> <chr>
## 1 <NA> 10/1/2022-9/30/2023 10/1/2023-9/30/2024
## 2 # Cases Referred 20 63
## 3 # Dismissals 16 55
## 4 % Cases ending in dismissal 0.8 0.87301587301587302
# Identify diversion program categories
diversion_cats <- c(
"Peer Court",
"Teen Court",
"Mediations",
"Mediation Participation",
"Youth Enrichment Class",
"Youth Enrichment Class (YEC)",
"School Based Agreements",
"UTSA Classes",
"Parenting Classes"
)
div_programs <- section_totals %>%
filter(Category %in% diversion_cats)
# Reshape to wide format for easier year comparisons
div_programs_wide <- div_programs %>%
tidyr::pivot_wider(
id_cols = c(FiscalYear, Category),
names_from = Metric,
values_from = Total
)
div_programs_wide
## # A tibble: 10 × 21
## FiscalYear Category Referred Completed `Letters Sent` Showed
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 FY2023 Peer Court 33 9 NA NA
## 2 FY2023 Mediations NA NA 2615 1201
## 3 FY2023 Youth Enrichment Class NA NA NA NA
## 4 FY2023 Mediation Participation NA NA NA NA
## 5 FY2024 Mediations NA NA 5159 NA
## 6 FY2024 School Based Agreements NA NA 2099 NA
## 7 FY2024 Youth Enrichment Class (… NA NA NA NA
## 8 FY2024 Teen Court NA NA NA NA
## 9 FY2024 UTSA Classes NA NA NA NA
## 10 FY2024 Parenting Classes NA NA NA NA
## # ℹ 15 more variables: `Number of Youth Served` <dbl>,
## # `% of Youth/Family attending mediation` <dbl>, `Appeared/Resolved` <dbl>,
## # `Percentage Appeared/Resolved` <dbl>, Attended <dbl>,
## # `Percentage Appeared` <dbl>, Assigned <dbl>, `Attended/Completed` <dbl>,
## # `Percentage Attended/Completed` <dbl>, `Cases Referred` <dbl>,
## # `Cases Processed` <dbl>, `Cases Closed` <dbl>, `Appeared/Completed` <dbl>,
## # `Percentage Appeared/Completed` <dbl>, Participants <dbl>
recid_summary <- research_data %>%
group_by(FiscalYear) %>%
summarise(
avg_violations = mean(total_violations, na.rm = TRUE),
median_violations = median(total_violations, na.rm = TRUE),
repeat_rate = mean(repeat_offender, na.rm = TRUE) * 100,
n_cases = n()
)
recid_summary
## # A tibble: 2 × 5
## FiscalYear avg_violations median_violations repeat_rate n_cases
## <fct> <dbl> <dbl> <dbl> <int>
## 1 2023 1.23 1 13.6 418
## 2 2024 1.26 1 17.3 231
# Extract key diversion totals: Teen Court, YEC, Mediations
key_div <- div_programs %>%
filter(
Category %in% c("Teen Court", "Youth Enrichment Class (YEC)", "Mediations"),
Metric %in% c("Cases Referred", "Assigned", "Attended/Completed", "Appeared/Resolved")
) %>%
select(FiscalYear, Category, Metric, Total)
key_div
## # A tibble: 4 × 4
## FiscalYear Category Metric Total
## <chr> <chr> <chr> <dbl>
## 1 FY2024 Mediations Appeared/Resolved 2265
## 2 FY2024 Youth Enrichment Class (YEC) Assigned 794
## 3 FY2024 Youth Enrichment Class (YEC) Attended/Completed 378
## 4 FY2024 Teen Court Cases Referred 46
library(ggplot2)
# Bar chart: diversion referrals by year (example: Teen Court)
teen_court <- div_programs %>%
filter(Category == "Teen Court", Metric == "Cases Referred")
ggplot(teen_court, aes(x = FiscalYear, y = Total, fill = FiscalYear)) +
geom_col() +
labs(
title = "Teen Court Referrals by Year",
x = "Fiscal Year",
y = "Number of Referrals"
) +
theme_minimal()

# Line chart: repeat offender rate by fiscal year
ggplot(recid_summary, aes(x = FiscalYear, y = repeat_rate, group = 1)) +
geom_line(size = 1.4, color = "red") +
geom_point(size = 4, color = "darkred") +
labs(
title = "Repeat Offender Rate by Fiscal Year",
x = "Fiscal Year",
y = "Repeat Offender Rate (%)"
) +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

combined_summary <- recid_summary %>%
left_join(
diversion_outcomes %>%
filter(Metric == "% Cases ending in dismissal") %>%
tidyr::pivot_longer(cols = c("FY2023", "FY2024"),
names_to = "FiscalYear",
values_to = "diversion_dismissal_rate"),
by = "FiscalYear"
)
combined_summary
## # A tibble: 2 × 7
## FiscalYear avg_violations median_violations repeat_rate n_cases Metric
## <chr> <dbl> <dbl> <dbl> <int> <chr>
## 1 2023 1.23 1 13.6 418 <NA>
## 2 2024 1.26 1 17.3 231 <NA>
## # ℹ 1 more variable: diversion_dismissal_rate <chr>
library(dplyr)
library(tidyr)
# Reload section totals (your diversion program dataset)
section_totals <- readxl::read_excel("juvenile_section_totals.xlsx", sheet = "Sheet1")
# Use only the programs you are including
programs_to_use <- c(
"Parenting Classes",
"Teen Court",
"UTSA Classes",
"Youth Enrichment Class (YEC)"
)
program_data <- section_totals %>%
filter(Category %in% programs_to_use)
# Use only assignment / completion type metrics
metrics_to_use <- c(
"Assigned",
"Attended/Completed",
"Cases Closed"
)
program_summary <- program_data %>%
filter(Metric %in% metrics_to_use) %>%
select(FiscalYear, Category, Metric, Total) %>%
pivot_wider(
names_from = Metric,
values_from = Total
) %>%
mutate(
completion_rate = (`Attended/Completed` / Assigned) * 100,
closure_rate = (`Cases Closed` / Assigned) * 100
)
program_summary
## # A tibble: 4 × 7
## FiscalYear Category Assigned `Attended/Completed` `Cases Closed`
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 FY2024 Youth Enrichment Clas… 794 378 NA
## 2 FY2024 Teen Court NA NA 30
## 3 FY2024 UTSA Classes 121 NA NA
## 4 FY2024 Parenting Classes 405 NA NA
## # ℹ 2 more variables: completion_rate <dbl>, closure_rate <dbl>
library(dplyr)
library(tidyr)
section_totals <- readxl::read_excel("juvenile_section_totals.xlsx", sheet = "Sheet1")
# Map program-specific metrics
program_summary <- section_totals %>%
filter(Category %in% c("Youth Enrichment Class (YEC)",
"Parenting Classes",
"UTSA Classes",
"Teen Court")) %>%
select(FiscalYear, Category, Metric, Total) %>%
mutate(
assigned = case_when(
Category == "Youth Enrichment Class (YEC)" ~ ifelse(Metric == "Assigned", Total, NA),
Category == "UTSA Classes" ~ ifelse(Metric == "Assigned", Total, NA),
Category == "Parenting Classes" ~ ifelse(Metric == "Assigned", Total, NA),
TRUE ~ NA
),
completed = case_when(
Category == "Youth Enrichment Class (YEC)" ~ ifelse(Metric == "Attended/Completed", Total, NA),
Category == "UTSA Classes" ~ ifelse(Metric == "Appeared/Completed", Total, NA),
Category == "Parenting Classes" ~ ifelse(Metric == "Participants", Total, NA),
TRUE ~ NA
),
processed = case_when(
Category == "Teen Court" ~ ifelse(Metric == "Cases Processed", Total, NA),
TRUE ~ NA
),
closed = case_when(
Category == "Teen Court" ~ ifelse(Metric == "Cases Closed", Total, NA),
TRUE ~ NA
)
) %>%
group_by(FiscalYear, Category) %>%
summarize(
Assigned = sum(assigned, na.rm = TRUE),
Completed = sum(completed, na.rm = TRUE),
Processed = sum(processed, na.rm = TRUE),
Closed = sum(closed, na.rm = TRUE),
completion_rate = case_when(
Category %in% c("Youth Enrichment Class (YEC)", "UTSA Classes", "Parenting Classes") ~
(Completed / Assigned) * 100,
Category == "Teen Court" ~ (Closed / Processed) * 100
)
)
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'FiscalYear', 'Category'. You can override
## using the `.groups` argument.
program_summary
## # A tibble: 11 × 7
## # Groups: FiscalYear, Category [4]
## FiscalYear Category Assigned Completed Processed Closed completion_rate
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 FY2024 Parenting Cla… 405 143 0 0 35.3
## 2 FY2024 Parenting Cla… 405 143 0 0 35.3
## 3 FY2024 Teen Court 0 0 54 30 55.6
## 4 FY2024 Teen Court 0 0 54 30 55.6
## 5 FY2024 Teen Court 0 0 54 30 55.6
## 6 FY2024 UTSA Classes 121 80 0 0 66.1
## 7 FY2024 UTSA Classes 121 80 0 0 66.1
## 8 FY2024 UTSA Classes 121 80 0 0 66.1
## 9 FY2024 Youth Enrichm… 794 378 0 0 47.6
## 10 FY2024 Youth Enrichm… 794 378 0 0 47.6
## 11 FY2024 Youth Enrichm… 794 378 0 0 47.6
library(ggplot2)
ggplot(program_summary,
aes(x = Category, y = completion_rate, fill = FiscalYear)) +
geom_col(position = "dodge") +
labs(
title = "Completion / Participation Rates by Diversion Program",
y = "Completion Rate (%)",
x = "Diversion Program"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

juveniles <- juvenile %>%
mutate(
paid_fine = ifelse(ViolPaid > 0, 1, 0),
owes_money = ifelse(AmtDue > 0, 1, 0),
financial_resolution = case_when(
ViolPaid > 0 ~ "Paid Fine",
AmtDue > 0 ~ "Still Owes",
TRUE ~ "No Payment"
)
)
finance_summary <- juveniles %>%
group_by(financial_resolution) %>%
summarise(
n = n(),
repeat_rate = mean(repeat_offender, na.rm = TRUE) * 100,
avg_total_viol = mean(PersonViolCtAll, na.rm = TRUE)
)
finance_year <- juveniles %>%
group_by(FiscalYear) %>%
summarise(
pct_paid = mean(paid_fine, na.rm = TRUE) * 100,
pct_owes = mean(owes_money, na.rm = TRUE) * 100,
repeat_rate = mean(repeat_offender, na.rm = TRUE) * 100
)
diversion_year <- program_summary %>%
group_by(FiscalYear) %>%
summarise(total_assigned = sum(Assigned, na.rm = TRUE))
# Create financial behavior flags in the CLEANED juvenile dataset
juveniles <- juvenile %>%
mutate(
paid_fine = ifelse(ViolPaid > 0, 1, 0),
owes_money = ifelse(AmtDue > 0, 1, 0),
financial_resolution = case_when(
ViolPaid > 0 ~ "Paid Fine",
AmtDue > 0 ~ "Still Owes",
TRUE ~ "No Payment"
)
)
finance_summary <- juveniles %>%
group_by(financial_resolution) %>%
summarise(
n = n(),
pct_of_cases = round(100 * n / nrow(juvenile), 1),
avg_total_viol = round(mean(PersonViolCtAll, na.rm = TRUE), 2),
repeat_rate = round(mean(repeat_offender, na.rm = TRUE) * 100, 1)
)
finance_summary
## # A tibble: 3 × 5
## financial_resolution n pct_of_cases avg_total_viol repeat_rate
## <chr> <int> <dbl> <dbl> <dbl>
## 1 No Payment 405 62.4 1.25 15.8
## 2 Paid Fine 59 9.1 1.24 13.6
## 3 Still Owes 185 28.5 1.22 13.5
diversion_year <- program_summary %>%
group_by(FiscalYear) %>%
summarise(
total_assigned = sum(Assigned, na.rm = TRUE),
total_completed = sum(Completed, na.rm = TRUE),
overall_completion_rate = round(100 * total_completed / total_assigned, 1)
)
diversion_year
## # A tibble: 1 × 4
## FiscalYear total_assigned total_completed overall_completion_rate
## <chr> <dbl> <dbl> <dbl>
## 1 FY2024 3555 1660 46.7
finance_by_year <- juveniles %>%
group_by(FiscalYear) %>%
summarise(
pct_paid = round(mean(paid_fine, na.rm = TRUE) * 100, 1),
pct_owes = round(mean(owes_money, na.rm = TRUE) * 100, 1),
repeat_rate = round(mean(repeat_offender, na.rm = TRUE) * 100, 1)
)
diversion_year <- program_summary %>%
group_by(FiscalYear) %>%
summarise(
total_assigned = sum(Assigned, na.rm = TRUE),
total_completed = sum(Completed, na.rm = TRUE),
overall_completion_rate = round(100 * total_completed / total_assigned, 1)
)
finance_diversion_year <- finance_by_year %>%
left_join(diversion_year, by = "FiscalYear")
finance_diversion_year
## # A tibble: 2 × 7
## FiscalYear pct_paid pct_owes repeat_rate total_assigned total_completed
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2023 8.6 24.2 13.6 NA NA
## 2 2024 10 36.4 17.3 NA NA
## # ℹ 1 more variable: overall_completion_rate <dbl>
library(ggplot2)
ggplot(finance_summary,
aes(x = financial_resolution, y = repeat_rate)) +
geom_col(fill = "steelblue") +
labs(
title = "Repeat Offender Rate by Financial Resolution",
x = "How the Case Was Resolved Financially",
y = "Repeat Offender Rate (%)"
) +
theme_minimal()

finance_year_long <- finance_by_year %>%
tidyr::pivot_longer(
cols = c(pct_paid, pct_owes),
names_to = "type",
values_to = "percent"
) %>%
mutate(
type = dplyr::recode(
type,
pct_paid = "Paid Fine",
pct_owes = "Still Owes"
)
)
ggplot(finance_year_long,
aes(x = FiscalYear, y = percent, fill = type)) +
geom_col(position = "dodge") +
labs(
title = "Financial Resolution Patterns by Fiscal Year",
x = "Fiscal Year",
y = "Percent of Cases",
fill = "Resolution Type"
) +
theme_minimal()

juvenile <- juvenile %>%
mutate(
paid_fine = ifelse(ViolPaid > 0, "Paid Fine", "Did Not Pay")
)
status_payment_table <- table(juvenile$StatusDescription, juvenile$paid_fine)
status_payment_table
##
## Did Not Pay Paid Fine
## AUTO RESET-JUVENILE APPEARANCE 118 0
## DEFERRED DISPOSITION 0 3
## DEFERRED DISPOSITION P/P 9 0
## DISM PROS/PLEA BARGAIN 27 0
## DISMISS WAIVE FEE 279 5
## DISMISSED AFTER DEFERRED DISP. 1 40
## DISMISSED YOUTH DIVERSION 0 1
## DOCKET CLOSED 81 9
## JUDGMENT/CONVICTION 2 0
## JUVENILE APPEARANCE 36 0
## PAYMENT EXTENSION 6 0
## PLEA BARGAIN APPEARANCE 2 0
## RESET - MINOR APPEARANCE 1 0
## RESET - QOL APPEARANCE 1 0
## REVOKED PROB/DSC 10 1
## VOIDED DOCKET 1 0
## WAIVE JUVENILE JURISDICTION 16 0
chisq.test(status_payment_table)
## Warning in chisq.test(status_payment_table): Chi-squared approximation may be
## incorrect
##
## Pearson's Chi-squared test
##
## data: status_payment_table
## X-squared = 468.75, df = 16, p-value < 2.2e-16
library(lsr)
## Warning: package 'lsr' was built under R version 4.4.3
cramersV(status_payment_table)
## Warning in stats::chisq.test(...): Chi-squared approximation may be incorrect
## [1] 0.849862
library(ggplot2)
status_payment_df <- as.data.frame(status_payment_table)
colnames(status_payment_df) <- c("StatusDescription", "Payment", "Count")
ggplot(status_payment_df,
aes(x = Payment, y = StatusDescription, fill = Count)) +
geom_tile(color = "white") +
scale_fill_gradient(low = "lightblue", high = "darkblue") +
labs(
title = "Heatmap of Payment Behavior by Status Description",
x = "Payment Status",
y = "Case Status",
fill = "Number of Cases"
) +
theme_minimal()

viol_vs_payment <- juvenile %>%
group_by(paid_fine) %>%
summarise(
n = n(),
avg_total_viol = mean(PersonViolCtAll, na.rm = TRUE),
median_total_viol = median(PersonViolCtAll, na.rm = TRUE),
repeat_rate = mean(repeat_offender, na.rm = TRUE) * 100
)
viol_vs_payment
## # A tibble: 2 × 5
## paid_fine n avg_total_viol median_total_viol repeat_rate
## <chr> <int> <dbl> <dbl> <dbl>
## 1 Did Not Pay 590 1.24 1 15.1
## 2 Paid Fine 59 1.24 1 13.6
viol_vs_status <- juvenile %>%
group_by(StatusDescription) %>%
summarise(
n = n(),
avg_total_viol = mean(PersonViolCtAll, na.rm = TRUE),
median_total_viol = median(PersonViolCtAll, na.rm = TRUE),
repeat_rate = mean(repeat_offender, na.rm = TRUE) * 100
) %>%
arrange(desc(avg_total_viol))
viol_vs_status
## # A tibble: 17 × 5
## StatusDescription n avg_total_viol median_total_viol repeat_rate
## <chr> <int> <dbl> <dbl> <dbl>
## 1 DEFERRED DISPOSITION P/P 9 2 1 22.2
## 2 PLEA BARGAIN APPEARANCE 2 2 2 100
## 3 RESET - QOL APPEARANCE 1 2 2 100
## 4 VOIDED DOCKET 1 2 2 100
## 5 WAIVE JUVENILE JURISDICTI… 16 1.69 1 31.2
## 6 DISM PROS/PLEA BARGAIN 27 1.56 1 40.7
## 7 DEFERRED DISPOSITION 3 1.33 1 33.3
## 8 DOCKET CLOSED 90 1.24 1 12.2
## 9 AUTO RESET-JUVENILE APPEA… 118 1.23 1 15.3
## 10 DISMISS WAIVE FEE 284 1.21 1 14.1
## 11 PAYMENT EXTENSION 6 1.17 1 16.7
## 12 DISMISSED AFTER DEFERRED … 41 1.10 1 7.32
## 13 JUVENILE APPEARANCE 36 1.03 1 2.78
## 14 DISMISSED YOUTH DIVERSION 1 1 1 0
## 15 JUDGMENT/CONVICTION 2 1 1 0
## 16 RESET - MINOR APPEARANCE 1 1 1 0
## 17 REVOKED PROB/DSC 11 1 1 0
library(ggplot2)
ggplot(juvenile,
aes(x = paid_fine, y = PersonViolCtAll)) +
geom_boxplot(fill = "skyblue") +
labs(
title = "Total Violations by Payment Behavior",
x = "Payment Behavior",
y = "Total Violations"
) +
theme_minimal()

juvenile <- juvenile %>%
mutate(
paid_fine_numeric = ifelse(paid_fine == "Paid Fine", 1, 0)
)
cor.test(juvenile$PersonViolCtAll, juvenile$paid_fine_numeric)
##
## Pearson's product-moment correlation
##
## data: juvenile$PersonViolCtAll and juvenile$paid_fine_numeric
## t = -0.035837, df = 647, p-value = 0.9714
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.07836165 0.07556057
## sample estimates:
## cor
## -0.001408884
viol_by_year <- juvenile %>%
group_by(FiscalYear) %>%
summarise(
avg_total_viol = mean(PersonViolCtAll, na.rm = TRUE),
repeat_rate = mean(repeat_offender, na.rm = TRUE) * 100
)
viol_by_year
## # A tibble: 2 × 3
## FiscalYear avg_total_viol repeat_rate
## <fct> <dbl> <dbl>
## 1 2023 1.23 13.6
## 2 2024 1.26 17.3
year_cross_reference <- viol_by_year %>%
left_join(diversion_year, by = "FiscalYear")
year_cross_reference
## # A tibble: 2 × 6
## FiscalYear avg_total_viol repeat_rate total_assigned total_completed
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 2023 1.23 13.6 NA NA
## 2 2024 1.26 17.3 NA NA
## # ℹ 1 more variable: overall_completion_rate <dbl>
year_cross_reference_clean <- year_cross_reference %>%
filter(!is.na(total_assigned), !is.na(avg_total_viol))
ggplot(year_cross_reference_clean,
aes(x = total_assigned, y = avg_total_viol)) +
geom_point(size = 3, color = "blue") +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(
title = "Diversion Assignments vs Average Violations by Fiscal Year",
x = "Total Diversion Assignments",
y = "Average Total Violations"
) +
theme_minimal()

year_cross_reference
## # A tibble: 2 × 6
## FiscalYear avg_total_viol repeat_rate total_assigned total_completed
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 2023 1.23 13.6 NA NA
## 2 2024 1.26 17.3 NA NA
## # ℹ 1 more variable: overall_completion_rate <dbl>
year_cross_reference
## # A tibble: 2 × 6
## FiscalYear avg_total_viol repeat_rate total_assigned total_completed
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 2023 1.23 13.6 NA NA
## 2 2024 1.26 17.3 NA NA
## # ℹ 1 more variable: overall_completion_rate <dbl>
str(year_cross_reference)
## tibble [2 × 6] (S3: tbl_df/tbl/data.frame)
## $ FiscalYear : chr [1:2] "2023" "2024"
## $ avg_total_viol : num [1:2] 1.23 1.26
## $ repeat_rate : num [1:2] 13.6 17.3
## $ total_assigned : num [1:2] NA NA
## $ total_completed : num [1:2] NA NA
## $ overall_completion_rate: num [1:2] NA NA
ggplot(year_cross_reference,
aes(x = total_assigned, y = avg_total_viol)) +
geom_point(size = 3, na.rm = TRUE) +
geom_smooth(method = "lm", se = FALSE, na.rm = TRUE) +
labs(
title = "Diversion Assignments vs Average Violations by Fiscal Year",
x = "Total Diversion Assignments",
y = "Average Total Violations"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

program_summary
## # A tibble: 11 × 7
## # Groups: FiscalYear, Category [4]
## FiscalYear Category Assigned Completed Processed Closed completion_rate
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 FY2024 Parenting Cla… 405 143 0 0 35.3
## 2 FY2024 Parenting Cla… 405 143 0 0 35.3
## 3 FY2024 Teen Court 0 0 54 30 55.6
## 4 FY2024 Teen Court 0 0 54 30 55.6
## 5 FY2024 Teen Court 0 0 54 30 55.6
## 6 FY2024 UTSA Classes 121 80 0 0 66.1
## 7 FY2024 UTSA Classes 121 80 0 0 66.1
## 8 FY2024 UTSA Classes 121 80 0 0 66.1
## 9 FY2024 Youth Enrichm… 794 378 0 0 47.6
## 10 FY2024 Youth Enrichm… 794 378 0 0 47.6
## 11 FY2024 Youth Enrichm… 794 378 0 0 47.6
# In juvenile: turn 2023 -> "FY2023", 2024 -> "FY2024"
juvenile <- juvenile %>%
mutate(FY = paste0("FY", FiscalYear))
# In program_summary: just copy FiscalYear into FY
program_summary <- program_summary %>%
mutate(FY = FiscalYear)
# Summarise once per FY & category
program_summary_fy_cat <- program_summary %>%
group_by(FY, Category) %>%
summarise(
Assigned = max(Assigned, na.rm = TRUE),
Completed = max(Completed, na.rm = TRUE),
Processed = max(Processed, na.rm = TRUE),
Closed = max(Closed, na.rm = TRUE),
.groups = "drop"
) %>%
mutate(
completion_rate = dplyr::case_when(
Category == "Teen Court" ~ 100 * Closed / Processed,
TRUE ~ 100 * Completed / Assigned
)
)
program_summary_fy_cat
## # A tibble: 4 × 7
## FY Category Assigned Completed Processed Closed completion_rate
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 FY2024 Parenting Classes 405 143 0 0 35.3
## 2 FY2024 Teen Court 0 0 54 30 55.6
## 3 FY2024 UTSA Classes 121 80 0 0 66.1
## 4 FY2024 Youth Enrichment C… 794 378 0 0 47.6
diversion_year <- program_summary_fy_cat %>%
group_by(FY) %>%
summarise(
total_assigned = sum(Assigned, na.rm = TRUE),
total_completed = sum(Completed, na.rm = TRUE),
overall_completion_rate = 100 * total_completed / total_assigned,
.groups = "drop"
)
diversion_year
## # A tibble: 1 × 4
## FY total_assigned total_completed overall_completion_rate
## <chr> <dbl> <dbl> <dbl>
## 1 FY2024 1320 601 45.5
viol_by_year <- juvenile %>%
group_by(FY) %>%
summarise(
avg_total_viol = mean(PersonViolCtAll, na.rm = TRUE),
repeat_rate = mean(repeat_offender, na.rm = TRUE) * 100,
.groups = "drop"
)
viol_by_year
## # A tibble: 2 × 3
## FY avg_total_viol repeat_rate
## <chr> <dbl> <dbl>
## 1 FY2023 1.23 13.6
## 2 FY2024 1.26 17.3
year_cross_reference <- viol_by_year %>%
left_join(diversion_year, by = "FY")
year_cross_reference
## # A tibble: 2 × 6
## FY avg_total_viol repeat_rate total_assigned total_completed
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 FY2023 1.23 13.6 NA NA
## 2 FY2024 1.26 17.3 1320 601
## # ℹ 1 more variable: overall_completion_rate <dbl>
cor.test(juveniles$PersonViolCtAll, juveniles$paid_fine, method = "pearson")
##
## Pearson's product-moment correlation
##
## data: juveniles$PersonViolCtAll and juveniles$paid_fine
## t = -0.035837, df = 647, p-value = 0.9714
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.07836165 0.07556057
## sample estimates:
## cor
## -0.001408884
library(DescTools)
## Warning: package 'DescTools' was built under R version 4.4.3
status_payment_table <- table(juvenile$StatusDescription, juvenile$paid_fine)
status_payment_table
##
## Did Not Pay Paid Fine
## AUTO RESET-JUVENILE APPEARANCE 118 0
## DEFERRED DISPOSITION 0 3
## DEFERRED DISPOSITION P/P 9 0
## DISM PROS/PLEA BARGAIN 27 0
## DISMISS WAIVE FEE 279 5
## DISMISSED AFTER DEFERRED DISP. 1 40
## DISMISSED YOUTH DIVERSION 0 1
## DOCKET CLOSED 81 9
## JUDGMENT/CONVICTION 2 0
## JUVENILE APPEARANCE 36 0
## PAYMENT EXTENSION 6 0
## PLEA BARGAIN APPEARANCE 2 0
## RESET - MINOR APPEARANCE 1 0
## RESET - QOL APPEARANCE 1 0
## REVOKED PROB/DSC 10 1
## VOIDED DOCKET 1 0
## WAIVE JUVENILE JURISDICTION 16 0
DescTools::CramerV(status_payment_table)
## [1] 0.849862
recid_model <- glm(
repeat_offender ~ paid_fine + PersonViolCtAll + age + race + gender,
data = juvenile,
family = binomial
)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(recid_model)
##
## Call:
## glm(formula = repeat_offender ~ paid_fine + PersonViolCtAll +
## age + race + gender, family = binomial, data = juvenile)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -7.707e+01 1.680e+05 0.000 1.000
## paid_finePaid Fine 5.054e-02 3.658e+04 0.000 1.000
## PersonViolCtAll 5.074e+01 2.154e+04 0.002 0.998
## age -1.039e-02 6.921e+03 0.000 1.000
## raceB 3.264e-01 1.387e+05 0.000 1.000
## raceH 3.076e-01 1.360e+05 0.000 1.000
## raceU 4.478e-01 1.505e+05 0.000 1.000
## raceW 3.300e-01 1.358e+05 0.000 1.000
## raceX -9.448e-02 3.811e+05 0.000 1.000
## genderM -1.520e-01 2.166e+04 0.000 1.000
## genderU 1.226e-01 7.109e+04 0.000 1.000
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 5.2318e+02 on 616 degrees of freedom
## Residual deviance: 6.9035e-09 on 606 degrees of freedom
## (32 observations deleted due to missingness)
## AIC: 22
##
## Number of Fisher Scoring iterations: 25
juveniles$diversion_binary <- ifelse(juveniles$StatusDescription %in% c("Teen Court Completed",
"Completed Counseling",
"YEC Completed",
"Mediation Successful",
"Dismissal (Completed Program)"),
1, 0)
cases <- juveniles
cases$diversion_binary <- ifelse(cases$StatusDescription %in% c("Teen Court Completed",
"Completed Counseling",
"YEC Completed",
"Mediation Successful",
"Dismissal (Completed Program)"),
1, 0)
cases$punitive_binary <- ifelse(cases$diversion_binary == 0, 1, 0)
# create a recidivism flag: 1 = more than one violation, 0 = one or fewer
cases <- cases |>
dplyr::mutate(
recid_binary = if_else(PersonViolCtAll > 1, 1L, 0L)
)
# quick check
table(cases$recid_binary)
##
## 0 1
## 552 97
tab_rd <- table(cases$recid_binary, cases$diversion_binary)
tab_rd
##
## 0
## 0 552
## 1 97
prop.table(tab_rd, 2)
##
## 0
## 0 0.8505393
## 1 0.1494607
tab_rd <- table(cases$recid_binary, cases$diversion_binary)
tab_rd
##
## 0
## 0 552
## 1 97
prop.table(tab_rd, 2)
##
## 0
## 0 0.8505393
## 1 0.1494607
unique(cases$StatusDescription)
## [1] "AUTO RESET-JUVENILE APPEARANCE" "DOCKET CLOSED"
## [3] "DISMISS WAIVE FEE" "DISM PROS/PLEA BARGAIN"
## [5] "REVOKED PROB/DSC" "DISMISSED AFTER DEFERRED DISP."
## [7] "JUVENILE APPEARANCE" "WAIVE JUVENILE JURISDICTION"
## [9] "DEFERRED DISPOSITION P/P" "DEFERRED DISPOSITION"
## [11] "PLEA BARGAIN APPEARANCE" "RESET - MINOR APPEARANCE"
## [13] "PAYMENT EXTENSION" "RESET - QOL APPEARANCE"
## [15] "DISMISSED YOUTH DIVERSION" "JUDGMENT/CONVICTION"
## [17] "VOIDED DOCKET"
# create diversion flag based on StatusDescription text
table(cases$StatusDescription) # (optional: just to inspect)
##
## AUTO RESET-JUVENILE APPEARANCE DEFERRED DISPOSITION
## 118 3
## DEFERRED DISPOSITION P/P DISM PROS/PLEA BARGAIN
## 9 27
## DISMISS WAIVE FEE DISMISSED AFTER DEFERRED DISP.
## 284 41
## DISMISSED YOUTH DIVERSION DOCKET CLOSED
## 1 90
## JUDGMENT/CONVICTION JUVENILE APPEARANCE
## 2 36
## PAYMENT EXTENSION PLEA BARGAIN APPEARANCE
## 6 2
## RESET - MINOR APPEARANCE RESET - QOL APPEARANCE
## 1 1
## REVOKED PROB/DSC VOIDED DOCKET
## 11 1
## WAIVE JUVENILE JURISDICTION
## 16
cases <- cases |>
dplyr::mutate(
diversion_binary = if_else(
grepl("DIVERSION", StatusDescription, ignore.case = TRUE),
1L, 0L
)
)
# check that we have both 0 and 1
table(cases$diversion_binary)
##
## 0 1
## 648 1
str(cases)
## tibble [649 × 36] (S3: tbl_df/tbl/data.frame)
## $ FiscalYear : Factor w/ 2 levels "2023","2024": 1 1 1 1 1 1 1 1 1 1 ...
## $ ViolationDate : Date[1:649], format: "2023-01-04" "2023-01-05" ...
## $ ViolTime : POSIXct[1:649], format: "1899-12-31 15:02:00" "1899-12-31 10:31:00" ...
## $ Filed : Date[1:649], format: "2023-08-28" "2023-03-09" ...
## $ StatusDate : Date[1:649], format: "2025-01-14" "2023-10-19" ...
## $ ConvictionDate : Date[1:649], format: NA "2023-10-19" ...
## $ PleaDate : Date[1:649], format: NA "2023-04-04" ...
## $ Plea : chr [1:649] NA "NC" "NC" "NC" ...
## $ DefendantZipcode1 : num [1:649] 78229 78228 78228 78228 78224 ...
## $ OffenseCode : chr [1:649] "2024A" "2017A" "2017A" "3006A" ...
## $ OffenseDesc : chr [1:649] "DISORDERLY CONDUCT-FIGHTING" "FAIL TO INDENTIFY (REFUSED TO REPORT NAME RESIDENCE ADDRESS OR D.O.B.)" "FAIL TO INDENTIFY (REFUSED TO REPORT NAME RESIDENCE ADDRESS OR D.O.B.)" "POSSESSING DRUG PARAPHERNALIA" ...
## $ ViolPaid : num [1:649] 0 0 0 0 0 96 0 0 0 0 ...
## $ ViolFine : num [1:649] 0 100 0 19 0 0 0 0 0 0 ...
## $ ViolFees : num [1:649] 81 81 0 81 81 96 0 81 0 0 ...
## $ AmtDue : num [1:649] 81 0 0 0 0 0 0 0 0 0 ...
## $ Status : chr [1:649] "JR" "CL" "WD" "CL" ...
## $ StatusDescription : chr [1:649] "AUTO RESET-JUVENILE APPEARANCE" "DOCKET CLOSED" "DISMISS WAIVE FEE" "DOCKET CLOSED" ...
## $ AgeAtTimeOfViolation: num [1:649] 14 14 14 12 15 15 16 15 13 16 ...
## $ Race : chr [1:649] "W" "H" "H" "W" ...
## $ Gender : chr [1:649] "M" "M" "M" "M" ...
## $ CaseType : chr [1:649] "NTRF" "NTRF" "NTRF" "NTRF" ...
## $ PersonViolCtAll : num [1:649] 1 1 1 1 5 5 2 1 1 1 ...
## $ PersonViolCtNTRF : num [1:649] 1 1 1 1 2 2 1 1 1 1 ...
## $ age : num [1:649] 14 14 14 12 15 15 16 15 13 16 ...
## $ gender : chr [1:649] "M" "M" "M" "M" ...
## $ race : chr [1:649] "W" "H" "H" "W" ...
## $ zipcode : chr [1:649] "78229" "78228" "78228" "78228" ...
## $ total_violations : num [1:649] 1 1 1 1 5 5 2 1 1 1 ...
## $ cited_violations : num [1:649] 1 1 1 1 2 2 1 1 1 1 ...
## $ repeat_offender : num [1:649] 0 0 0 0 1 1 1 0 0 0 ...
## $ paid_fine : num [1:649] 0 0 0 0 0 1 0 0 0 0 ...
## $ owes_money : num [1:649] 1 0 0 0 0 0 0 0 0 0 ...
## $ financial_resolution: chr [1:649] "Still Owes" "No Payment" "No Payment" "No Payment" ...
## $ diversion_binary : int [1:649] 0 0 0 0 0 0 0 0 0 0 ...
## $ punitive_binary : num [1:649] 1 1 1 1 1 1 1 1 1 1 ...
## $ recid_binary : int [1:649] 0 0 0 0 1 1 1 0 0 0 ...
table(cases$StatusDescription)
##
## AUTO RESET-JUVENILE APPEARANCE DEFERRED DISPOSITION
## 118 3
## DEFERRED DISPOSITION P/P DISM PROS/PLEA BARGAIN
## 9 27
## DISMISS WAIVE FEE DISMISSED AFTER DEFERRED DISP.
## 284 41
## DISMISSED YOUTH DIVERSION DOCKET CLOSED
## 1 90
## JUDGMENT/CONVICTION JUVENILE APPEARANCE
## 2 36
## PAYMENT EXTENSION PLEA BARGAIN APPEARANCE
## 6 2
## RESET - MINOR APPEARANCE RESET - QOL APPEARANCE
## 1 1
## REVOKED PROB/DSC VOIDED DOCKET
## 11 1
## WAIVE JUVENILE JURISDICTION
## 16
names(cases)
## [1] "FiscalYear" "ViolationDate" "ViolTime"
## [4] "Filed" "StatusDate" "ConvictionDate"
## [7] "PleaDate" "Plea" "DefendantZipcode1"
## [10] "OffenseCode" "OffenseDesc" "ViolPaid"
## [13] "ViolFine" "ViolFees" "AmtDue"
## [16] "Status" "StatusDescription" "AgeAtTimeOfViolation"
## [19] "Race" "Gender" "CaseType"
## [22] "PersonViolCtAll" "PersonViolCtNTRF" "age"
## [25] "gender" "race" "zipcode"
## [28] "total_violations" "cited_violations" "repeat_offender"
## [31] "paid_fine" "owes_money" "financial_resolution"
## [34] "diversion_binary" "punitive_binary" "recid_binary"
names(research_data)
## [1] "FiscalYear" "age" "race" "gender"
## [5] "zipcode" "total_violations" "cited_violations" "repeat_offender"
## Warning: package 'knitr' was built under R version 4.4.3
Table 1. Descriptive statistics for key numeric
variables.
| Age |
649 |
14.16 |
14 |
2 |
16 |
1.58 |
| Total violations |
649 |
1.24 |
1 |
1 |
7 |
0.69 |
| Cited violations |
649 |
1.14 |
1 |
1 |
4 |
0.44 |
| Repeat offender (0/1) |
649 |
0.15 |
0 |
0 |
1 |
0.36 |
## 2️⃣ Correlation Matrix Table (with significance stars)
library(Hmisc)
## Warning: package 'Hmisc' was built under R version 4.4.3
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:DescTools':
##
## %nin%, Label, Mean, Quantile
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
library(knitr)
vars_for_corr <- research_data %>%
select(age, total_violations, cited_violations, repeat_offender)
corr_res <- rcorr(as.matrix(vars_for_corr)) # r and p
r_mat <- corr_res$r
p_mat <- corr_res$P
# build matrix with stars
stars <- ifelse(p_mat < 0.001, "***",
ifelse(p_mat < 0.01, "**",
ifelse(p_mat < 0.05, "*", "")))
r_with_stars <- paste0(round(r_mat, 2), stars)
dim(r_with_stars) <- dim(r_mat)
rownames(r_with_stars) <- rownames(r_mat)
colnames(r_with_stars) <- colnames(r_mat)
kable(r_with_stars,
caption = "Table 2. Correlation matrix for key variables (Pearson r with significance).")
Table 2. Correlation matrix for key variables (Pearson r with
significance).
| age |
1NA |
0.06 |
-0.01 |
0.05 |
| total_violations |
0.06 |
1NA |
0.69*** |
0.83*** |
| cited_violations |
-0.01 |
0.69*** |
1NA |
0.78*** |
| repeat_offender |
0.05 |
0.83*** |
0.78*** |
1NA |
## Warning: package 'broom' was built under R version 4.4.3
Table 3. Linear regression predicting total violations from
age, gender, and race.
| Intercept |
0.485 |
0.366 |
1.325 |
0.186 |
| Age |
0.024 |
0.018 |
1.344 |
0.179 |
| genderM |
0.248 |
0.058 |
4.306 |
0.000 |
| genderU |
0.180 |
0.286 |
0.630 |
0.529 |
| raceB |
0.433 |
0.276 |
1.566 |
0.118 |
| raceH |
0.299 |
0.267 |
1.119 |
0.264 |
| raceU |
0.170 |
0.375 |
0.454 |
0.650 |
| raceW |
0.252 |
0.266 |
0.944 |
0.345 |
| raceX |
0.181 |
0.745 |
0.243 |
0.808 |
## \nModel fit: R-squared = 0.041, Adjusted R-squared = 0.028, F(8, 608) = 3.22, p = 0.00137.\n
# Descriptive statistics for numeric variables used in analysis
library(dplyr)
library(tidyr)
numeric_vars <- c("age",
"total_violations",
"cited_violations",
"repeat_offender")
descriptives_table <- research_data |>
summarise(across(
.cols = all_of(numeric_vars),
.fns = list(
n = ~sum(!is.na(.)),
mean = ~mean(., na.rm = TRUE),
sd = ~sd(., na.rm = TRUE),
min = ~min(., na.rm = TRUE),
median = ~median(., na.rm = TRUE),
max = ~max(., na.rm = TRUE)
),
.names = "{.col}_{.fn}"
)) |>
pivot_longer(
cols = everything(),
names_to = c("variable", "stat"),
names_sep = "_",
values_to = "value"
) |>
pivot_wider(
names_from = stat,
values_from = value
)
## Warning: Expected 2 pieces. Additional pieces discarded in 18 rows [7, 8, 9, 10, 11, 12,
## 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24].
## Warning: Values from `value` are not uniquely identified; output will contain list-cols.
## • Use `values_fn = list` to suppress this warning.
## • Use `values_fn = {summary_fun}` to summarise duplicates.
## • Use the following dplyr code to identify duplicates.
## {data} |>
## dplyr::summarise(n = dplyr::n(), .by = c(variable, stat)) |>
## dplyr::filter(n > 1L)
descriptives_table
## # A tibble: 4 × 9
## variable n mean sd min median max violations offender
## <chr> <list> <list> <list> <list> <list> <list> <list> <list>
## 1 age <dbl [1]> <dbl [1]> <dbl> <dbl> <dbl> <dbl> <NULL> <NULL>
## 2 total <NULL> <NULL> <NULL> <NULL> <NULL> <NULL> <dbl [6]> <NULL>
## 3 cited <NULL> <NULL> <NULL> <NULL> <NULL> <NULL> <dbl [6]> <NULL>
## 4 repeat <NULL> <NULL> <NULL> <NULL> <NULL> <NULL> <NULL> <dbl [6]>
# install.packages("Hmisc") # run once in Console
library(Hmisc)
corr_vars <- research_data |>
dplyr::select(age, total_violations, cited_violations, repeat_offender)
corr_results <- Hmisc::rcorr(as.matrix(corr_vars)) # r and p-value matrices
# Helper to create significance stars
star_fun <- function(p) {
ifelse(p < 0.001, "***",
ifelse(p < 0.01, "**",
ifelse(p < 0.05, "*",
ifelse(p < 0.10, ".", ""))))
}
r_mat <- corr_results$r
p_mat <- corr_results$P
star_mat <- matrix(
star_fun(p_mat),
nrow = nrow(p_mat),
dimnames = dimnames(p_mat)
)
# Correlations with significance stars (e.g., 0.83***)
corr_with_stars <- matrix(
paste0(round(r_mat, 2), star_mat),
nrow = nrow(r_mat),
dimnames = dimnames(r_mat)
)
corr_with_stars
## age total_violations cited_violations repeat_offender
## age "1NA" "0.06" "-0.01" "0.05"
## total_violations "0.06" "1NA" "0.69***" "0.83***"
## cited_violations "-0.01" "0.69***" "1NA" "0.78***"
## repeat_offender "0.05" "0.83***" "0.78***" "1NA"
# install.packages("broom") # run once in Console
library(broom)
library(dplyr)
# Coefficient-level table
lm_coef_table <- broom::tidy(lm_model) |>
mutate(
sig = dplyr::case_when(
p.value < 0.001 ~ "***",
p.value < 0.01 ~ "**",
p.value < 0.05 ~ "*",
p.value < 0.10 ~ ".",
TRUE ~ ""
)
) |>
dplyr::select(
term, # (Intercept), age, genderM, genderU, raceB, raceH, raceU, raceW, raceX
estimate,
std.error,
statistic,
p.value,
sig
)
lm_coef_table
## # A tibble: 9 × 6
## term estimate std.error statistic p.value sig
## <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 (Intercept) 0.485 0.366 1.33 0.186 ""
## 2 age 0.0239 0.0178 1.34 0.179 ""
## 3 genderM 0.248 0.0577 4.31 0.0000194 "***"
## 4 genderU 0.180 0.286 0.630 0.529 ""
## 5 raceB 0.433 0.276 1.57 0.118 ""
## 6 raceH 0.299 0.267 1.12 0.264 ""
## 7 raceU 0.170 0.375 0.454 0.650 ""
## 8 raceW 0.252 0.266 0.944 0.345 ""
## 9 raceX 0.181 0.745 0.243 0.808 ""
lm_fit_stats <- broom::glance(lm_model) |>
transmute(
adj_r_squared = adj.r.squared,
F_statistic = statistic,
df1 = df, # numerator df
df2 = df.residual, # denominator df
model_p = p.value
)
lm_fit_stats
## # A tibble: 1 × 5
## adj_r_squared F_statistic df1 df2 model_p
## <dbl> <dbl> <dbl> <int> <dbl>
## 1 0.0280 3.22 8 608 0.00137
summary(logit_model)
##
## Call:
## glm(formula = repeat_offender ~ age + gender + race, family = binomial,
## data = model_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -17.31422 543.87728 -0.032 0.97460
## age 0.08400 0.07745 1.085 0.27809
## genderM 0.73197 0.24938 2.935 0.00333 **
## genderU 1.02225 1.15438 0.886 0.37586
## raceB 14.44249 543.87619 0.027 0.97881
## raceH 13.95809 543.87614 0.026 0.97953
## raceU 13.53256 543.87728 0.025 0.98015
## raceW 13.78665 543.87614 0.025 0.97978
## raceX 0.57217 1553.69986 0.000 0.99971
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 523.18 on 616 degrees of freedom
## Residual deviance: 506.53 on 608 degrees of freedom
## (32 observations deleted due to missingness)
## AIC: 524.53
##
## Number of Fisher Scoring iterations: 14
library(pscl)
## Warning: package 'pscl' was built under R version 4.4.3
## Classes and Methods for R originally developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University (2002-2015),
## by and under the direction of Simon Jackman.
## hurdle and zeroinfl functions by Achim Zeileis.
# Pseudo R-squared values (McFadden, etc.)
pR2(logit_model)
## fitting null model for pseudo-r2
## llh llhNull G2 McFadden r2ML
## -253.26459246 -261.59079230 16.65239969 0.03182910 0.02662835
## r2CU
## 0.04657703
# Likelihood Ratio Chi-square test
anova(logit_model, test = "Chisq")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: repeat_offender
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 616 523.18
## age 1 1.6300 615 521.55 0.20171
## gender 2 8.7861 613 512.77 0.01236 *
## race 5 6.2363 608 506.53 0.28390
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1