library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(pastecs)
##
## Attaching package: 'pastecs'
##
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## The following object is masked from 'package:tidyr':
##
## extract
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(readr)
CPI_3_3 <- read_csv("CPI 3.3.csv")
## Rows: 14774 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): County, Region, Disposition, Family Violence Indicated
## dbl (1): Fiscal Year
## num (1): Completed Investigations
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
CPI_3_3$Disposition<- as.factor(CPI_3_3$Disposition)
CPI_3_3$`Family Violence Indicated` <- as.factor(CPI_3_3$`Family Violence Indicated`)
CPI_3_3$`Completed Investigations` <- as.numeric(CPI_3_3$`Completed Investigations`)
library(dplyr)
CPI_3_3$RTB <- ifelse(CPI_3_3$Disposition == "Reason to Believe", 1, 0)
CPI_3_3$DV <- ifelse(CPI_3_3$`Family Violence Indicated` == "Y", 1, 0)
model <- lm(RTB ~ `DV`, data = CPI_3_3, weights = `Completed Investigations`)
summary(model)
##
## Call:
## lm(formula = RTB ~ DV, data = CPI_3_3, weights = `Completed Investigations`)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -26.182 -1.360 -0.555 1.090 51.372
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.198090 0.003619 54.74 <2e-16 ***
## DV 0.356993 0.010325 34.58 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.285 on 14772 degrees of freedom
## Multiple R-squared: 0.07487, Adjusted R-squared: 0.07481
## F-statistic: 1196 on 1 and 14772 DF, p-value: < 2.2e-16
#The family violence variable "DV" is significant based in the very small p value < 2.2e-16. There is more of a chance that family violence indicated would lead to a disposition of reason to believe.
#The model explains roughly 7.4% of what leads to a Reason to Believe disposition. Other factors could include substance use, mental health, family history with CPS, region, and caseworker bias.
plot(model, which = 1)

#Based on this chart, I would say that this is non-linear. The data does not seem to fit well with a linear model.