knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.1     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.2
## ✔ purrr     1.2.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
human_trafficking <- read_excel("C:/Users/hls68/OneDrive - Drexel University/human_trafficking.xlsx")
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
# copy dataset
ht <- human_trafficking
# rename columns
colnames(ht) <- c("State", "Offense", "Juvenile_male", "Juvenile_female", "Adult_male", "Adult_female")
# remove title/header rows
ht <- ht[-c(1,2,3,4), ]
# fill missing states and convert to numbers
ht <- ht %>%
  mutate(State = ifelse(State == "NA" | State == "", NA, State)) %>%
  fill(State, .direction = "down") %>%
  mutate(
    Juvenile_male = as.numeric(Juvenile_male),
    Juvenile_female = as.numeric(Juvenile_female),
    Adult_male = as.numeric(Adult_male),
    Adult_female = as.numeric(Adult_female)
  )
head(ht)
## # A tibble: 6 × 6
##   State    Offense         Juvenile_male Juvenile_female Adult_male Adult_female
##   <chr>    <chr>                   <dbl>           <dbl>      <dbl>        <dbl>
## 1 Alabama  Commercial Sex…             0               0         56            1
## 2 Alabama  Involuntary Se…             0               0          5            1
## 3 Arizona  Commercial Sex…             1               0         32            3
## 4 Arizona  Involuntary Se…             0               0          3            1
## 5 Arkansas Commercial Sex…             0               0          4            0
## 6 Arkansas Involuntary Se…             0               0          0            0
ggplot(ht, aes(x = Adult_male)) +
  geom_histogram(binwidth = 5, fill = "gray40", color = "black") +
  labs(
    title = "Distribution of Adult Male Human Trafficking Arrests",
    x = "Number of Arrests",
    y = "Frequency"
  ) +
  theme_minimal()

ggplot(ht, aes(x = Offense)) +
  geom_bar(fill = "gray40", color = "black") +
  labs(
    title = "Human Trafficking Arrests by Offense Type",
    x = "Offense Type",
    y = "Number of Records"
  ) +
  theme_minimal()

top_states <- ht %>%
  group_by(State) %>%
  summarise(total_adult_male = sum(Adult_male, na.rm = TRUE)) %>%
  arrange(desc(total_adult_male)) %>%
  slice(1:15)
ggplot(top_states, aes(x = total_adult_male, y = reorder(State, total_adult_male))) +
  geom_point(size = 3) +
  geom_line(aes(group = 1)) +
  labs(
    title = "Top 15 States: Adult Male Human Trafficking Arrests",
    x = "Number of Arrests",
    y = "State"
  ) +
  theme_minimal()

## R Markdown