library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(pastecs)
##
## Attaching package: 'pastecs'
##
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## The following object is masked from 'package:tidyr':
##
## extract
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(readr)
library(MASS)
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
CPI_3_3 <- read_csv("CPI 3.3.csv")
## Rows: 14774 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): County, Region, Disposition, Family Violence Indicated
## dbl (1): Fiscal Year
## num (1): Completed Investigations
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
CPI_3_3$Disposition<- as.factor(CPI_3_3$Disposition)
CPI_3_3$`Family Violence Indicated` <- as.factor(CPI_3_3$`Family Violence Indicated`)
CPI_3_3$`Completed Investigations` <- as.numeric(CPI_3_3$`Completed Investigations`)
library(dplyr)
CPI_3_3$DV <- ifelse(CPI_3_3$`Family Violence Indicated` == "Y", 1, 0)
CPI_3_3$RTB <- ifelse(CPI_3_3$Disposition == "Reason to Believe", 1, 0)
CPI_3_3$Region <- as.factor(CPI_3_3$Region)
log_model <- glm(RTB ~ DV + Region,
data = CPI_3_3,
family = binomial)
summary(log_model)
##
## Call:
## glm(formula = RTB ~ DV + Region, family = binomial, data = CPI_3_3)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.817579 0.049554 -16.499 < 2e-16 ***
## DV 0.335274 0.035838 9.355 < 2e-16 ***
## Region10-El Paso 0.123674 0.133631 0.925 0.354712
## Region11-Edinburg -0.179309 0.080279 -2.234 0.025511 *
## Region2-Abilene 0.006429 0.071531 0.090 0.928389
## Region3-Arlington -0.325618 0.076753 -4.242 2.21e-05 ***
## Region4-Tyler -0.179772 0.073616 -2.442 0.014606 *
## Region5-Beaumont -0.145491 0.084523 -1.721 0.085192 .
## Region6-Houston -0.294759 0.087479 -3.369 0.000753 ***
## Region7-Austin -0.095785 0.068865 -1.391 0.164249
## Region8-San Antonio -0.100099 0.070717 -1.415 0.156927
## Region9-Midland 0.001482 0.074164 0.020 0.984055
## RegionOut of State -0.248763 0.308265 -0.807 0.419680
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 18390 on 14773 degrees of freedom
## Residual deviance: 18268 on 14761 degrees of freedom
## AIC: 18294
##
## Number of Fisher Scoring iterations: 4
exp(coef(log_model))
## (Intercept) DV Region10-El Paso Region11-Edinburg
## 0.4414990 1.3983238 1.1316473 0.8358480
## Region2-Abilene Region3-Arlington Region4-Tyler Region5-Beaumont
## 1.0064493 0.7220806 0.8354611 0.8645978
## Region6-Houston Region7-Austin Region8-San Antonio Region9-Midland
## 0.7447113 0.9086590 0.9047481 1.0014833
## RegionOut of State
## 0.7797648
prop.table(table(CPI_3_3$DV, CPI_3_3$RTB), 1)
##
## 0 1
## 0 0.7154920 0.2845080
## 1 0.6454298 0.3545702
chisq.test(table(CPI_3_3$DV, CPI_3_3$RTB))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table(CPI_3_3$DV, CPI_3_3$RTB)
## X-squared = 81.821, df = 1, p-value < 2.2e-16
library(ggplot2)
ggplot(CPI_3_3, aes(x = factor(DV), y = RTB, weight = `Completed Investigations`)) +
stat_summary(fun = "mean", geom = "bar") +
labs(x = "Family Violence (0 = No, 1 = Yes)",
y = "Proportion of RTB",
title = "Figure 1: RTB Rates by Family Violence")

ggplot(CPI_3_3, aes(x = Region, y = RTB, fill = factor(DV),
weight = `Completed Investigations`)) +
stat_summary(fun = "mean", geom = "bar", position = "dodge") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(fill = "DV",
title = "Figure 2: RTB by Region and Family Violence")
