library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
knitr::opts_chunk$set(echo = TRUE)
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
You can also embed plots, for example:
htd <- read.csv("C:\\Users\\moore\\OneDrive\\Desktop\\Fall 2023\\Intro to statistics\\project\\Statistics Project\\Statistics Project\\Human Trafficking data.csv")
head(htd)
## DATA_YEAR ORI PUB_AGENCY_NAME PUB_AGENCY_UNIT
## 1 2013 MO0950000 St. Louis County Police Department
## 2 2013 OH0020000 Allen
## 3 2013 OH0020000 Allen
## 4 2013 TN0320100 Morristown
## 5 2013 TNMPD0000 Memphis
## 6 2013 WA0173600 Federal Way
## AGENCY_TYPE_NAME STATE_ABBR STATE_NAME DIVISION_NAME COUNTY_NAME
## 1 County MO Missouri West North Central ST LOUIS
## 2 County OH Ohio East North Central ALLEN
## 3 County OH Ohio East North Central ALLEN
## 4 City TN Tennessee East South Central HAMBLEN, JEFFERSON
## 5 City TN Tennessee East South Central SHELBY
## 6 City WA Washington Pacific KING
## REGION_NAME POPULATION_GROUP_CODE POPULATION_GROUP_DESC
## 1 Midwest 9A MSA counties 100,000 or over
## 2 Midwest 9C MSA counties from 10,000 thru 24,999
## 3 Midwest 9C MSA counties from 10,000 thru 24,999
## 4 South 4 Cities from 25,000 thru 49,999
## 5 South 1B Cities from 500,000 thru 999,999
## 6 West 3 Cities from 50,000 thru 99,999
## OFFENSE_SUBCAT_ID OFFENSE_NAME OFFENSE_SUBCAT_NAME ACTUAL_COUNT
## 1 HTCSA Human Trafficking Commercial Sex Acts 384
## 2 HTCSA Human Trafficking Commercial Sex Acts 64
## 3 HTIS Human Trafficking Involuntary Servitude 64
## 4 HTCSA Human Trafficking Commercial Sex Acts 64
## 5 HTCSA Human Trafficking Commercial Sex Acts 192
## 6 HTCSA Human Trafficking Commercial Sex Acts 64
## UNFOUNDED_COUNT CLEARED_COUNT JUVENILE_CLEARED_COUNT
## 1 0 384 0
## 2 0 64 0
## 3 0 0 0
## 4 0 64 0
## 5 0 128 0
## 6 0 64 0
summary(htd)
## DATA_YEAR ORI PUB_AGENCY_NAME PUB_AGENCY_UNIT
## Min. :2013 Length:3098 Length:3098 Length:3098
## 1st Qu.:2017 Class :character Class :character Class :character
## Median :2019 Mode :character Mode :character Mode :character
## Mean :2019
## 3rd Qu.:2020
## Max. :2021
##
## AGENCY_TYPE_NAME STATE_ABBR STATE_NAME DIVISION_NAME
## Length:3098 Length:3098 Length:3098 Length:3098
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## COUNTY_NAME REGION_NAME POPULATION_GROUP_CODE
## Length:3098 Length:3098 Length:3098
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## POPULATION_GROUP_DESC OFFENSE_SUBCAT_ID OFFENSE_NAME
## Length:3098 Length:3098 Length:3098
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## OFFENSE_SUBCAT_NAME ACTUAL_COUNT UNFOUNDED_COUNT CLEARED_COUNT
## Length:3098 Min. : -64.0 Min. : 0.00 Min. : 0.00
## Class :character 1st Qu.: 64.0 1st Qu.: 0.00 1st Qu.: 0.00
## Mode :character Median : 64.0 Median : 0.00 Median : 0.00
## Mean : 233.1 Mean : 33.54 Mean : 99.83
## 3rd Qu.: 168.0 3rd Qu.: 0.00 3rd Qu.: 64.00
## Max. :13056.0 Max. :3264.00 Max. :6976.00
## NA's :1969
## JUVENILE_CLEARED_COUNT
## Min. : 0.000
## 1st Qu.: 0.000
## Median : 0.000
## Mean : 5.641
## 3rd Qu.: 0.000
## Max. :2176.000
##
table(htd$OFFENSE_SUBCAT_NAME)
##
## Commercial Sex Acts Involuntary Servitude
## 2296 802
n_distinct(pluck(htd,"POPULATION_GROUP_DESC"))
## [1] 20
table(pluck(htd,"POPULATION_GROUP_DESC"))
##
##
## 3
## Cities 1,000,000 or over
## 77
## Cities from 10,000 thru 24,999
## 319
## Cities from 100,000 thru 249,999
## 378
## Cities from 2,500 thru 9,999
## 229
## Cities from 25,000 thru 49,999
## 282
## Cities from 250,000 thru 499,999
## 164
## Cities from 50,000 thru 99,999
## 370
## Cities from 500,000 thru 999,999
## 141
## Cities under 2,500
## 82
## MSA counties 100,000 or over
## 312
## MSA counties from 10,000 thru 24,999
## 50
## MSA counties from 25,000 thru 99,999
## 210
## MSA counties under 10,000
## 138
## MSA State Police
## 1
## Non-MSA counties 100,000 or over
## 11
## Non-MSA counties from 10,000 thru 24,999
## 86
## Non-MSA counties from 25,000 thru 99,999
## 68
## Non-MSA counties under 10,000
## 141
## Non-MSA State Police
## 36
table(htd$STATE_NAME)
##
## Alabama Alaska Arizona Arkansas Colorado
## 12 15 42 18 111
## Connecticut Delaware Federal Florida Georgia
## 46 24 3 166 137
## Hawaii Idaho Illinois Indiana Iowa
## 9 7 100 52 10
## Kansas Kentucky Louisiana Maine Maryland
## 3 108 40 11 57
## Massachusetts Michigan Minnesota Mississippi Missouri
## 113 115 273 7 86
## Montana Nebraska Nevada New Hampshire New Jersey
## 14 18 33 15 14
## New Mexico New York North Carolina North Dakota Ohio
## 10 6 94 36 36
## Oklahoma Oregon Pennsylvania Rhode Island South Carolina
## 38 31 9 31 80
## South Dakota Tennessee Texas Utah Vermont
## 9 117 478 16 12
## Virginia Washington West Virginia Wisconsin Wyoming
## 89 90 95 144 18
n_distinct(htd$STATE_NAME)
## [1] 50
table(htd$REGION_NAME)
##
## Midwest Northeast Other South West
## 882 257 3 1560 396
n_distinct(htd$REGION_NAME)
## [1] 5
table(htd$DIVISION_NAME)
##
## East North Central East South Central Middle Atlantic Mountain
## 447 244 29 251
## New England Other Pacific South Atlantic
## 228 3 145 742
## West North Central West South Central
## 435 574
n_distinct(htd$DIVISION_NAME)
## [1] 10
n_distinct(htd$DATA_YEAR)
## [1] 9
#What state has the most Actual Counts?
#What year did the highest actual count occur?
#Based on year what state experience the most actual counts
#Is there a relationship between the population group description and actual counts?
#Which offense occurred the most in each region
#The goal of this project is to observe if there any relationships with human trafficking by area, year, demographics, and count of offenses.
htd |>
group_by(POPULATION_GROUP_DESC) |>
summarise(mean_actual = mean(ACTUAL_COUNT),
mean_unfounded = mean(UNFOUNDED_COUNT, na.rm = TRUE))
## # A tibble: 20 × 3
## POPULATION_GROUP_DESC mean_actual mean_unfounded
## <chr> <dbl> <dbl>
## 1 "" 16 NaN
## 2 "Cities 1,000,000 or over" 2043. 79.7
## 3 "Cities from 10,000 thru 24,999" 88.1 7.23
## 4 "Cities from 100,000 thru 249,999" 223. 41.6
## 5 "Cities from 2,500 thru 9,999" 77.1 10.4
## 6 "Cities from 25,000 thru 49,999" 99.3 6.84
## 7 "Cities from 250,000 thru 499,999" 366. 71.3
## 8 "Cities from 50,000 thru 99,999" 156. 5.19
## 9 "Cities from 500,000 thru 999,999" 631. 27.4
## 10 "Cities under 2,500" 68.2 1.26
## 11 "MSA counties 100,000 or over" 310. 102.
## 12 "MSA counties from 10,000 thru 24,999" 92.2 4.92
## 13 "MSA counties from 25,000 thru 99,999" 176. 6.04
## 14 "MSA counties under 10,000" 138. 0
## 15 "MSA State Police" 19 NaN
## 16 "Non-MSA counties 100,000 or over" 89.8 1.67
## 17 "Non-MSA counties from 10,000 thru 24,999" 134. 0
## 18 "Non-MSA counties from 25,000 thru 99,999" 96.5 0
## 19 "Non-MSA counties under 10,000" 122. 17.6
## 20 "Non-MSA State Police" 23.5 0
htd |>
group_by(OFFENSE_SUBCAT_NAME) |>
summarise(mean_actual = mean(ACTUAL_COUNT),
mean_unfounded = mean(UNFOUNDED_COUNT, na.rm = TRUE))
## # A tibble: 2 × 3
## OFFENSE_SUBCAT_NAME mean_actual mean_unfounded
## <chr> <dbl> <dbl>
## 1 Commercial Sex Acts 260. 38.9
## 2 Involuntary Servitude 156. 19.4
htd |> # get data frame
filter(DATA_YEAR >= 2021) |> # then, filter it by the year column
pluck("ACTUAL_COUNT") |> # then, select the "cty" column
mean()
## [1] 197.718
htd |>
filter(DATA_YEAR >= 2021) |>
pluck("ACTUAL_COUNT") |>
max()
## [1] 10560
offense <- htd |>
group_by(OFFENSE_SUBCAT_NAME) |>
summarize(region = n_distinct(REGION_NAME)) |>
arrange(desc(region)) |>
filter(region >= 3) |>
select(OFFENSE_SUBCAT_NAME) |>
as_vector()
p <- htd |>
filter(OFFENSE_SUBCAT_NAME %in% offense) |>
ggplot() +
geom_bar(mapping = aes(x = OFFENSE_SUBCAT_NAME, fill = REGION_NAME)) +
theme_minimal() +
scale_fill_brewer(palette = 'Dark2')
p
htd |>
ggplot() +
geom_point(mapping = aes(x = ACTUAL_COUNT, y = DATA_YEAR)) +
theme_classic()
avg_unfounded <- mean(htd$UNFOUNDED_COUNT, na.rm = TRUE)
mean_ratio <- mean(htd$ACTUAL_COUNT/avg_unfounded)
htd |>
mutate(actual_to_unfounded = ACTUAL_COUNT / UNFOUNDED_COUNT) |>
ggplot() +
geom_histogram(mapping = aes(x = actual_to_unfounded), color = 'white') +
geom_vline(xintercept = mean_ratio, color = 'orange') +
annotate("text", # the type of annotation
x = 1.425, y = 24.5, label = "Average", color = 'orange') +
theme_classic()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2949 rows containing non-finite values (stat_bin).
avg_counts <- htd |>
group_by(REGION_NAME) |>
summarize(avg_actual_counts = mean(ACTUAL_COUNT))
avg_counts |>
ggplot() +
geom_bar(mapping = aes(x = REGION_NAME, y = avg_actual_counts), stat = "identity") +
theme_minimal() +
scale_fill_brewer(palette = 'Dark2')
new_data <- htd |>
group_by(STATE_ABBR) |>
summarise(mean_count = mean(ACTUAL_COUNT))
ggplot(new_data, aes(x = STATE_ABBR, y = mean_count)) +
geom_bar(stat = "identity", fill = "skyblue") +
labs(title = "Mean Actual Count by State", x = "State", y = "Mean Actual Count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
ggplot(htd, aes(REGION_NAME)) +
geom_bar()
ggplot(htd, aes(OFFENSE_SUBCAT_NAME)) +
geom_bar()
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.