library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
knitr::opts_chunk$set(echo = TRUE)

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

Including Plots

You can also embed plots, for example:

htd <- read.csv("C:\\Users\\moore\\OneDrive\\Desktop\\Fall 2023\\Intro to statistics\\project\\Statistics Project\\Statistics Project\\Human Trafficking data.csv")
head(htd)
##   DATA_YEAR       ORI                    PUB_AGENCY_NAME PUB_AGENCY_UNIT
## 1      2013 MO0950000 St. Louis County Police Department                
## 2      2013 OH0020000                              Allen                
## 3      2013 OH0020000                              Allen                
## 4      2013 TN0320100                         Morristown                
## 5      2013 TNMPD0000                            Memphis                
## 6      2013 WA0173600                        Federal Way                
##   AGENCY_TYPE_NAME STATE_ABBR STATE_NAME      DIVISION_NAME        COUNTY_NAME
## 1           County         MO   Missouri West North Central           ST LOUIS
## 2           County         OH       Ohio East North Central              ALLEN
## 3           County         OH       Ohio East North Central              ALLEN
## 4             City         TN  Tennessee East South Central HAMBLEN, JEFFERSON
## 5             City         TN  Tennessee East South Central             SHELBY
## 6             City         WA Washington            Pacific               KING
##   REGION_NAME POPULATION_GROUP_CODE                POPULATION_GROUP_DESC
## 1     Midwest                    9A         MSA counties 100,000 or over
## 2     Midwest                    9C MSA counties from 10,000 thru 24,999
## 3     Midwest                    9C MSA counties from 10,000 thru 24,999
## 4       South                     4       Cities from 25,000 thru 49,999
## 5       South                    1B     Cities from 500,000 thru 999,999
## 6        West                     3       Cities from 50,000 thru 99,999
##   OFFENSE_SUBCAT_ID      OFFENSE_NAME   OFFENSE_SUBCAT_NAME ACTUAL_COUNT
## 1             HTCSA Human Trafficking   Commercial Sex Acts          384
## 2             HTCSA Human Trafficking   Commercial Sex Acts           64
## 3              HTIS Human Trafficking Involuntary Servitude           64
## 4             HTCSA Human Trafficking   Commercial Sex Acts           64
## 5             HTCSA Human Trafficking   Commercial Sex Acts          192
## 6             HTCSA Human Trafficking   Commercial Sex Acts           64
##   UNFOUNDED_COUNT CLEARED_COUNT JUVENILE_CLEARED_COUNT
## 1               0           384                      0
## 2               0            64                      0
## 3               0             0                      0
## 4               0            64                      0
## 5               0           128                      0
## 6               0            64                      0
summary(htd)
##    DATA_YEAR        ORI            PUB_AGENCY_NAME    PUB_AGENCY_UNIT   
##  Min.   :2013   Length:3098        Length:3098        Length:3098       
##  1st Qu.:2017   Class :character   Class :character   Class :character  
##  Median :2019   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :2019                                                           
##  3rd Qu.:2020                                                           
##  Max.   :2021                                                           
##                                                                         
##  AGENCY_TYPE_NAME    STATE_ABBR         STATE_NAME        DIVISION_NAME     
##  Length:3098        Length:3098        Length:3098        Length:3098       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  COUNTY_NAME        REGION_NAME        POPULATION_GROUP_CODE
##  Length:3098        Length:3098        Length:3098          
##  Class :character   Class :character   Class :character     
##  Mode  :character   Mode  :character   Mode  :character     
##                                                             
##                                                             
##                                                             
##                                                             
##  POPULATION_GROUP_DESC OFFENSE_SUBCAT_ID  OFFENSE_NAME      
##  Length:3098           Length:3098        Length:3098       
##  Class :character      Class :character   Class :character  
##  Mode  :character      Mode  :character   Mode  :character  
##                                                             
##                                                             
##                                                             
##                                                             
##  OFFENSE_SUBCAT_NAME  ACTUAL_COUNT     UNFOUNDED_COUNT   CLEARED_COUNT    
##  Length:3098         Min.   :  -64.0   Min.   :   0.00   Min.   :   0.00  
##  Class :character    1st Qu.:   64.0   1st Qu.:   0.00   1st Qu.:   0.00  
##  Mode  :character    Median :   64.0   Median :   0.00   Median :   0.00  
##                      Mean   :  233.1   Mean   :  33.54   Mean   :  99.83  
##                      3rd Qu.:  168.0   3rd Qu.:   0.00   3rd Qu.:  64.00  
##                      Max.   :13056.0   Max.   :3264.00   Max.   :6976.00  
##                                        NA's   :1969                       
##  JUVENILE_CLEARED_COUNT
##  Min.   :   0.000      
##  1st Qu.:   0.000      
##  Median :   0.000      
##  Mean   :   5.641      
##  3rd Qu.:   0.000      
##  Max.   :2176.000      
## 
table(htd$OFFENSE_SUBCAT_NAME)
## 
##   Commercial Sex Acts Involuntary Servitude 
##                  2296                   802
n_distinct(pluck(htd,"POPULATION_GROUP_DESC"))
## [1] 20
table(pluck(htd,"POPULATION_GROUP_DESC"))
## 
##                                          
##                                        3 
##                 Cities 1,000,000 or over 
##                                       77 
##           Cities from 10,000 thru 24,999 
##                                      319 
##         Cities from 100,000 thru 249,999 
##                                      378 
##             Cities from 2,500 thru 9,999 
##                                      229 
##           Cities from 25,000 thru 49,999 
##                                      282 
##         Cities from 250,000 thru 499,999 
##                                      164 
##           Cities from 50,000 thru 99,999 
##                                      370 
##         Cities from 500,000 thru 999,999 
##                                      141 
##                       Cities under 2,500 
##                                       82 
##             MSA counties 100,000 or over 
##                                      312 
##     MSA counties from 10,000 thru 24,999 
##                                       50 
##     MSA counties from 25,000 thru 99,999 
##                                      210 
##                MSA counties under 10,000 
##                                      138 
##                         MSA State Police 
##                                        1 
##         Non-MSA counties 100,000 or over 
##                                       11 
## Non-MSA counties from 10,000 thru 24,999 
##                                       86 
## Non-MSA counties from 25,000 thru 99,999 
##                                       68 
##            Non-MSA counties under 10,000 
##                                      141 
##                     Non-MSA State Police 
##                                       36
table(htd$STATE_NAME)
## 
##        Alabama         Alaska        Arizona       Arkansas       Colorado 
##             12             15             42             18            111 
##    Connecticut       Delaware        Federal        Florida        Georgia 
##             46             24              3            166            137 
##         Hawaii          Idaho       Illinois        Indiana           Iowa 
##              9              7            100             52             10 
##         Kansas       Kentucky      Louisiana          Maine       Maryland 
##              3            108             40             11             57 
##  Massachusetts       Michigan      Minnesota    Mississippi       Missouri 
##            113            115            273              7             86 
##        Montana       Nebraska         Nevada  New Hampshire     New Jersey 
##             14             18             33             15             14 
##     New Mexico       New York North Carolina   North Dakota           Ohio 
##             10              6             94             36             36 
##       Oklahoma         Oregon   Pennsylvania   Rhode Island South Carolina 
##             38             31              9             31             80 
##   South Dakota      Tennessee          Texas           Utah        Vermont 
##              9            117            478             16             12 
##       Virginia     Washington  West Virginia      Wisconsin        Wyoming 
##             89             90             95            144             18
n_distinct(htd$STATE_NAME)
## [1] 50
table(htd$REGION_NAME)
## 
##   Midwest Northeast     Other     South      West 
##       882       257         3      1560       396
n_distinct(htd$REGION_NAME)
## [1] 5
table(htd$DIVISION_NAME)
## 
## East North Central East South Central    Middle Atlantic           Mountain 
##                447                244                 29                251 
##        New England              Other            Pacific     South Atlantic 
##                228                  3                145                742 
## West North Central West South Central 
##                435                574
n_distinct(htd$DIVISION_NAME)
## [1] 10
n_distinct(htd$DATA_YEAR)
## [1] 9
#What state has the most Actual Counts?
#What year did the highest actual count occur?
#Based on year what state experience the most actual counts 
#Is there a relationship between the population group description and actual counts?
#Which offense occurred the most in each region
#The goal of this project is to observe if there any relationships with human trafficking by area, year, demographics, and count of offenses. 
htd |> 
  group_by(POPULATION_GROUP_DESC) |>
  summarise(mean_actual = mean(ACTUAL_COUNT),
            mean_unfounded = mean(UNFOUNDED_COUNT, na.rm = TRUE))
## # A tibble: 20 × 3
##    POPULATION_GROUP_DESC                      mean_actual mean_unfounded
##    <chr>                                            <dbl>          <dbl>
##  1 ""                                                16           NaN   
##  2 "Cities 1,000,000 or over"                      2043.           79.7 
##  3 "Cities from 10,000 thru 24,999"                  88.1           7.23
##  4 "Cities from 100,000 thru 249,999"               223.           41.6 
##  5 "Cities from 2,500 thru 9,999"                    77.1          10.4 
##  6 "Cities from 25,000 thru 49,999"                  99.3           6.84
##  7 "Cities from 250,000 thru 499,999"               366.           71.3 
##  8 "Cities from 50,000 thru 99,999"                 156.            5.19
##  9 "Cities from 500,000 thru 999,999"               631.           27.4 
## 10 "Cities under 2,500"                              68.2           1.26
## 11 "MSA counties 100,000 or over"                   310.          102.  
## 12 "MSA counties from 10,000 thru 24,999"            92.2           4.92
## 13 "MSA counties from 25,000 thru 99,999"           176.            6.04
## 14 "MSA counties under 10,000"                      138.            0   
## 15 "MSA State Police"                                19           NaN   
## 16 "Non-MSA counties 100,000 or over"                89.8           1.67
## 17 "Non-MSA counties from 10,000 thru 24,999"       134.            0   
## 18 "Non-MSA counties from 25,000 thru 99,999"        96.5           0   
## 19 "Non-MSA counties under 10,000"                  122.           17.6 
## 20 "Non-MSA State Police"                            23.5           0
htd |> 
  group_by(OFFENSE_SUBCAT_NAME) |>
  summarise(mean_actual = mean(ACTUAL_COUNT),
            mean_unfounded = mean(UNFOUNDED_COUNT, na.rm = TRUE))
## # A tibble: 2 × 3
##   OFFENSE_SUBCAT_NAME   mean_actual mean_unfounded
##   <chr>                       <dbl>          <dbl>
## 1 Commercial Sex Acts          260.           38.9
## 2 Involuntary Servitude        156.           19.4
htd |>                     # get data frame
  filter(DATA_YEAR >= 2021) |>  # then, filter it by the year column
  pluck("ACTUAL_COUNT") |>          # then, select the "cty" column
  mean()     
## [1] 197.718
htd |>                     
  filter(DATA_YEAR >= 2021) |>  
  pluck("ACTUAL_COUNT") |>          
  max()  
## [1] 10560
offense <- htd |>
  group_by(OFFENSE_SUBCAT_NAME) |>
  summarize(region = n_distinct(REGION_NAME)) |>
  arrange(desc(region)) |>
  filter(region >= 3) |>  
  select(OFFENSE_SUBCAT_NAME) |>
  as_vector()
p <- htd |>
  filter(OFFENSE_SUBCAT_NAME %in% offense) |>  
  ggplot() +  
  geom_bar(mapping = aes(x = OFFENSE_SUBCAT_NAME, fill = REGION_NAME)) +
  theme_minimal() +
  scale_fill_brewer(palette = 'Dark2')

p

htd |>
  ggplot() +
  geom_point(mapping = aes(x = ACTUAL_COUNT, y = DATA_YEAR)) +
  theme_classic()

avg_unfounded <- mean(htd$UNFOUNDED_COUNT, na.rm = TRUE)
mean_ratio <- mean(htd$ACTUAL_COUNT/avg_unfounded)

htd |>
  mutate(actual_to_unfounded = ACTUAL_COUNT / UNFOUNDED_COUNT) |>
  ggplot() +
  geom_histogram(mapping = aes(x = actual_to_unfounded), color = 'white') +
  geom_vline(xintercept = mean_ratio, color = 'orange') +
  annotate("text",  # the type of annotation
           x = 1.425, y = 24.5, label = "Average", color = 'orange') +
  theme_classic()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2949 rows containing non-finite values (stat_bin).

avg_counts <- htd |> 
  group_by(REGION_NAME) |>
  summarize(avg_actual_counts = mean(ACTUAL_COUNT))

avg_counts |>
  ggplot() +
  geom_bar(mapping = aes(x = REGION_NAME, y = avg_actual_counts), stat = "identity") +
  theme_minimal() +
  scale_fill_brewer(palette = 'Dark2')

new_data <- htd |>
  group_by(STATE_ABBR) |>
  summarise(mean_count = mean(ACTUAL_COUNT))
  ggplot(new_data, aes(x = STATE_ABBR, y = mean_count)) +
  geom_bar(stat = "identity", fill = "skyblue") +
    labs(title = "Mean Actual Count by State", x = "State", y = "Mean Actual Count") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggplot(htd, aes(REGION_NAME)) +
  geom_bar()

ggplot(htd, aes(OFFENSE_SUBCAT_NAME)) +
  geom_bar()

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.