setwd("C:/Users/Skye_/OneDrive/APQ")
library(readr)
cpi_data <- read_csv("CPI 3.3.csv")
## Rows: 14774 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): County, Region, Disposition, Family Violence Indicated
## dbl (1): Fiscal Year
## num (1): Completed Investigations
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#this data is the child protective investigations that were completed across the state, the dispositions, and if family violence as a factor.
head(cpi_data)
## # A tibble: 6 × 6
##   `Fiscal Year` County   Region  Disposition              Completed Investigat…¹
##           <dbl> <chr>    <chr>   <chr>                                     <dbl>
## 1          2025 Anderson 4-Tyler Reason to Believe                            64
## 2          2025 Anderson 4-Tyler Reason to Believe                            23
## 3          2025 Anderson 4-Tyler Ruled Out                                   168
## 4          2025 Anderson 4-Tyler Ruled Out                                     9
## 5          2025 Anderson 4-Tyler Unable to Complete Disp…                      3
## 6          2025 Anderson 4-Tyler Unable to Complete Disp…                      1
## # ℹ abbreviated name: ¹​`Completed Investigations`
## # ℹ 1 more variable: `Family Violence Indicated` <chr>
summary(cpi_data$`Completed Investigations`)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     1.0     3.0    11.0   108.2    46.0 17469.0
hist(cpi_data$`Completed Investigations`)

#Need help with evaluating the characters in the dataset
proportions(table(cpi_data$Disposition))
## 
##              Reason to Believe                      Ruled Out 
##                      0.3141329                      0.3091241 
## Unable to Complete Disposition            Unable to Determine 
##                      0.1112766                      0.2654664
#am I on to something???


cpi_data$Disposition <- as.factor(cpi_data$Disposition)
#a little AI assistance with this factor function
plot(cpi_data$Disposition,cpi_data$`Completed Investigations`)

#dang it. 
cpi_data$Disposition <- as.numeric(as.factor(cpi_data$Disposition))

cor(cpi_data$Disposition,cpi_data$`Completed Investigations`)
## [1] -0.05557733
#i think i did something... 

cor(cpi_data$Disposition,cpi_data$`Completed Investigations`,use = "complete.obs")
## [1] -0.05557733
#I'm not 100% sure what i did, but i think it worked?