library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
NIJ_s_Recidivism_Challenge_Full_Dataset<-read.csv("NIJ_s_Recidivism_Challenge_Full_Dataset.csv")
read_csv("NIJ_s_Recidivism_Challenge_Full_Dataset.csv")
## Rows: 25835 Columns: 54
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (22): Gender, Race, Age_at_Release, Supervision_Level_First, Education_L...
## dbl (11): ID, Residence_PUMA, Supervision_Risk_Score_First, Avg_Days_per_Dru...
## lgl (21): Gang_Affiliated, Prior_Arrest_Episodes_DVCharges, Prior_Arrest_Epi...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## # A tibble: 25,835 × 54
## ID Gender Race Age_at_Release Residence_PUMA Gang_Affiliated
## <dbl> <chr> <chr> <chr> <dbl> <lgl>
## 1 1 M BLACK 43-47 16 FALSE
## 2 2 M BLACK 33-37 16 FALSE
## 3 3 M BLACK 48 or older 24 FALSE
## 4 4 M WHITE 38-42 16 FALSE
## 5 5 M WHITE 33-37 16 FALSE
## 6 6 M WHITE 38-42 17 FALSE
## 7 7 M BLACK 48 or older 18 FALSE
## 8 8 M BLACK 38-42 16 FALSE
## 9 9 F BLACK 43-47 5 NA
## 10 10 M BLACK 43-47 16 FALSE
## # ℹ 25,825 more rows
## # ℹ 48 more variables: Supervision_Risk_Score_First <dbl>,
## # Supervision_Level_First <chr>, Education_Level <chr>, Dependents <chr>,
## # Prison_Offense <chr>, Prison_Years <chr>,
## # Prior_Arrest_Episodes_Felony <chr>, Prior_Arrest_Episodes_Misd <chr>,
## # Prior_Arrest_Episodes_Violent <chr>, Prior_Arrest_Episodes_Property <chr>,
## # Prior_Arrest_Episodes_Drug <chr>, …
summary((NIJ_s_Recidivism_Challenge_Full_Dataset))
## ID Gender Race Age_at_Release
## Min. : 1 Length:25835 Length:25835 Length:25835
## 1st Qu.: 6626 Class :character Class :character Class :character
## Median :13270 Mode :character Mode :character Mode :character
## Mean :13314
## 3rd Qu.:20022
## Max. :26761
##
## Residence_PUMA Gang_Affiliated Supervision_Risk_Score_First
## Min. : 1.00 Length:25835 Min. : 1.000
## 1st Qu.: 6.00 Class :character 1st Qu.: 4.000
## Median :12.00 Mode :character Median : 6.000
## Mean :12.36 Mean : 6.082
## 3rd Qu.:18.00 3rd Qu.: 8.000
## Max. :25.00 Max. :10.000
## NA's :475
## Supervision_Level_First Education_Level Dependents
## Length:25835 Length:25835 Length:25835
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## Prison_Offense Prison_Years Prior_Arrest_Episodes_Felony
## Length:25835 Length:25835 Length:25835
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## Prior_Arrest_Episodes_Misd Prior_Arrest_Episodes_Violent
## Length:25835 Length:25835
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
## Prior_Arrest_Episodes_Property Prior_Arrest_Episodes_Drug
## Length:25835 Length:25835
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
## Prior_Arrest_Episodes_PPViolationCharges Prior_Arrest_Episodes_DVCharges
## Length:25835 Length:25835
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
## Prior_Arrest_Episodes_GunCharges Prior_Conviction_Episodes_Felony
## Length:25835 Length:25835
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
## Prior_Conviction_Episodes_Misd Prior_Conviction_Episodes_Viol
## Length:25835 Length:25835
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
## Prior_Conviction_Episodes_Prop Prior_Conviction_Episodes_Drug
## Length:25835 Length:25835
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
## Prior_Conviction_Episodes_PPViolationCharges
## Length:25835
## Class :character
## Mode :character
##
##
##
##
## Prior_Conviction_Episodes_DomesticViolenceCharges
## Length:25835
## Class :character
## Mode :character
##
##
##
##
## Prior_Conviction_Episodes_GunCharges Prior_Revocations_Parole
## Length:25835 Length:25835
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
## Prior_Revocations_Probation Condition_MH_SA Condition_Cog_Ed
## Length:25835 Length:25835 Length:25835
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## Condition_Other Violations_ElectronicMonitoring Violations_Instruction
## Length:25835 Length:25835 Length:25835
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## Violations_FailToReport Violations_MoveWithoutPermission Delinquency_Reports
## Length:25835 Length:25835 Length:25835
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## Program_Attendances Program_UnexcusedAbsences Residence_Changes
## Length:25835 Length:25835 Length:25835
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## Avg_Days_per_DrugTest DrugTests_THC_Positive DrugTests_Cocaine_Positive
## Min. : 0.50 Min. :0.000 Min. :0.000
## 1st Qu.: 28.84 1st Qu.:0.000 1st Qu.:0.000
## Median : 55.42 Median :0.000 Median :0.000
## Mean : 93.89 Mean :0.063 Mean :0.014
## 3rd Qu.: 110.33 3rd Qu.:0.071 3rd Qu.:0.000
## Max. :1088.50 Max. :1.000 Max. :1.000
## NA's :6103 NA's :5172 NA's :5172
## DrugTests_Meth_Positive DrugTests_Other_Positive Percent_Days_Employed
## Min. :0.000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.000 Median :0.000 Median :0.4757
## Mean :0.013 Mean :0.008 Mean :0.4823
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.9693
## Max. :1.000 Max. :1.000 Max. :1.0000
## NA's :5172 NA's :5172 NA's :462
## Jobs_Per_Year Employment_Exempt Recidivism_Within_3years
## Min. :0.0000 Length:25835 Length:25835
## 1st Qu.:0.0000 Class :character Class :character
## Median :0.6352 Mode :character Mode :character
## Mean :0.7693
## 3rd Qu.:1.0000
## Max. :8.0000
## NA's :808
## Recidivism_Arrest_Year1 Recidivism_Arrest_Year2 Recidivism_Arrest_Year3
## Length:25835 Length:25835 Length:25835
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## Training_Sample
## Min. :0.0000
## 1st Qu.:0.0000
## Median :1.0000
## Mean :0.6978
## 3rd Qu.:1.0000
## Max. :1.0000
##
str(data)
## function (..., list = character(), package = NULL, lib.loc = NULL, verbose = getOption("verbose"),
## envir = .GlobalEnv, overwrite = TRUE)
data <- read.csv("NIJ_s_Recidivism_Challenge_Full_Dataset.csv")
summary(data$Recidivism_Within_3years)
## Length Class Mode
## 25835 character character
summary(data$Supervision_Risk_Score_First)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.000 4.000 6.000 6.082 8.000 10.000 475
my_data <- read.csv("NIJ_s_Recidivism_Challenge_Full_Dataset.csv")
model <- lm(Percent_Days_Employed ~ Age_at_Release + Supervision_Risk_Score_First + Jobs_Per_Year, data = my_data)
summary(model)
##
## Call:
## lm(formula = Percent_Days_Employed ~ Age_at_Release + Supervision_Risk_Score_First +
## Jobs_Per_Year, data = my_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.9635 -0.2455 -0.1115 0.2797 0.8058
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1680346 0.0103390 16.253 < 2e-16 ***
## Age_at_Release23-27 0.0570724 0.0083333 6.849 7.63e-12 ***
## Age_at_Release28-32 0.0897878 0.0084400 10.638 < 2e-16 ***
## Age_at_Release33-37 0.1116539 0.0087448 12.768 < 2e-16 ***
## Age_at_Release38-42 0.1301109 0.0094687 13.741 < 2e-16 ***
## Age_at_Release43-47 0.1409572 0.0098145 14.362 < 2e-16 ***
## Age_at_Release48 or older 0.1344625 0.0095512 14.078 < 2e-16 ***
## Supervision_Risk_Score_First -0.0058470 0.0009671 -6.046 1.50e-09 ***
## Jobs_Per_Year 0.3418593 0.0024851 137.565 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3162 on 24548 degrees of freedom
## (1278 observations deleted due to missingness)
## Multiple R-squared: 0.4421, Adjusted R-squared: 0.4419
## F-statistic: 2432 on 8 and 24548 DF, p-value: < 2.2e-16
plot(model, which = 1)
#The p-values is < 2.2e-16 and this means that the p-value is signifcant.Particularly to the percent of days employed. # There are no insignificant variables in this model. All of the included variables have significant p-values which are Age at Release, Supervision Risk Score, and Jobs per Year. #As age increases,employment percentage rises slightly (by about 0.02%). #The higher the risk score, employment percentage decreases by about 0.005%. #For each additional job, the percentage of days employed increases by about 0.34%. #I think my data is close to being linear. Majority of the Data points (bubbles) line up with the line and then change their directory.