Part 1: Data Loading and Cleaning

# Load required library
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
col_types_spec <- cols_only(
  id = col_integer(),
  voyage_id = col_integer(),
  voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year = col_double(),
  voyage_slaves_numbers__imp_total_num_slaves_disembarked = col_double(),
  voyage_slaves_numbers__imp_total_num_slaves_embarked = col_double(),
  voyage_dates__length_middle_passage_days = col_double(),
  voyage_dates__imp_length_home_to_disembark = col_double(),
  voyage_crew__crew_first_landing = col_double(),
  voyage_crew__crew_voyage_outset = col_double(),
  voyage_ship__tonnage_mod = col_double(),
  voyage_slaves_numbers__imp_jamaican_cash_price = col_double(),
  voyage_slaves_numbers__imp_mortality_ratio = col_double(),
  voyage_slaves_numbers__percentage_women_among_embarked_slaves = col_double(),
  voyage_outcome__vessel_captured_outcome__name = col_character(),
  voyage_ship__imputed_nationality__name = col_character(),
  voyage_itinerary__imp_region_voyage_begin__name = col_character(),
  voyage_ship__rig_of_vessel__name = col_character(),
  voyage_itinerary__place_voyage_ended__name = col_character(),  # Force as character
  voyage_dates__slave_purchase_began_sparsedate__month = col_double(),
  voyage_slaves_numbers__percentage_men = col_double(),
  voyage_dates__voyage_completed_sparsedate__month = col_double(),
  voyage_itinerary__region_of_return__name = col_character(),
  voyage_slaves_numbers__percentage_boy = col_double(),
  voyage_itinerary__imp_principal_region_slave_dis__name = col_character(),
  voyage_itinerary__imp_principal_region_of_slave_purchase__name = col_character(),
  voyage_dates__date_departed_africa_sparsedate__month = col_double(),
  voyage_dates__voyage_began_sparsedate__month = col_double(),
  voyage_itinerary__imp_port_voyage_begin__name = col_character(),
  voyage_dates__first_dis_of_slaves_sparsedate__month = col_double(),
  voyage_itinerary__imp_broad_region_slave_dis__name = col_character(),
  voyage_slaves_numbers__percentage_girl = col_double(),
  voyage_outcome__particular_outcome__name = col_character(),
  voyage_itinerary__imp_principal_port_slave_dis__name = col_character(),
  voyage_slaves_numbers__percentage_child = col_double(),
  voyage_slaves_numbers__percentage_women = col_double(),
  voyage_dates__departure_last_place_of_landing_sparsedate__month = col_double(),
  voyage_outcome__outcome_owner__name = col_character(),
  voyage_outcome__outcome_slaves__name = col_character(),
  voyage_itinerary__imp_principal_place_of_slave_purchase__name = col_character(),
  voyage_outcome__resistance__name = col_character(),
  voyage_slaves_numbers__percentage_male = col_double(),
  voyage_slaves_numbers__percentage_female = col_double(),
  voyage_itinerary__imp_broad_region_voyage_begin__name = col_character(),
  voyage_itinerary__imp_broad_region_of_slave_purchase__name = col_character(),
  voyage_sources = col_character(),
  enslavers = col_character()
)


# Load the datasets
trans <- read_csv("https://raw.githubusercontent.com/imowerman-prog/data-3210/refs/heads/main/Data/trans-atlantic.csv", col_types = col_types_spec)
intra <- read_csv("https://raw.githubusercontent.com/imowerman-prog/data-3210/refs/heads/main/Data/intra-american.csv", col_types = col_types_spec)

# Rename, clean, and add new variables
clean_trans <- trans %>%
  rename(
    year = voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year,
    slaves_embarked = voyage_slaves_numbers__imp_total_num_slaves_embarked,
    slaves_disembarked = voyage_slaves_numbers__imp_total_num_slaves_disembarked,
    outcome = voyage_outcome__outcome_slaves__name,
    dis_broad = voyage_itinerary__imp_broad_region_slave_dis__name,
    dis_port = voyage_itinerary__imp_principal_port_slave_dis__name
  ) %>%
  mutate(
    year = as.numeric(year),
    slaves_embarked = as.numeric(slaves_embarked),
    slaves_disembarked = as.numeric(slaves_disembarked)
  ) %>%
  filter(
    !is.na(slaves_disembarked),
    slaves_disembarked > 0
  ) %>%
  filter(
    !is.na(outcome) &
      str_detect(outcome, "Slaves disembarked|Voyage completed|Sold slaves")
  ) %>%
  mutate(
    decade = floor(as.numeric(year) / 10) * 10,
    estimated_deaths = slaves_embarked - slaves_disembarked,
    is_us = dis_broad == "Mainland North America" |
      str_detect(dis_port, "New Orleans|Virginia|Charleston|Savannah"),
    source_type = "Trans-Atlantic"
  )


# Clean Intra-American dataset
clean_intra <- intra %>%
  rename(
    year = voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year,
    slaves_embarked = voyage_slaves_numbers__imp_total_num_slaves_embarked,
    slaves_disembarked = voyage_slaves_numbers__imp_total_num_slaves_disembarked,
    outcome = voyage_outcome__outcome_slaves__name,
    dis_broad = voyage_itinerary__imp_broad_region_slave_dis__name,
    dis_port = voyage_itinerary__imp_principal_port_slave_dis__name
  ) %>%
  mutate(
    year = as.numeric(year),
    slaves_embarked = as.numeric(slaves_embarked),
    slaves_disembarked = as.numeric(slaves_disembarked)
  ) %>%
  filter(
    !is.na(slaves_disembarked),
    slaves_disembarked > 0
  ) %>%
  filter(
    !is.na(outcome) &
      str_detect(outcome, "Slaves disembarked|Voyage completed|Sold slaves")
  ) %>%
  mutate(
    decade = floor(as.numeric(year) / 10) * 10,
    estimated_deaths = slaves_embarked - slaves_disembarked,
    is_us = dis_broad == "Mainland North America" |
      str_detect(dis_port, "New Orleans|Virginia|Charleston|Savannah"),
    source_type = "Intra-American"
  )


# Combine datasets
voyages <- bind_rows(clean_trans, clean_intra)

Part 2: Analysis and Questions

Total slaves imported to the US

us_total <- voyages %>%
  filter(is_us == TRUE) %>%
  summarise(total_us_imported = sum(slaves_disembarked, na.rm = TRUE))

us_total
## # A tibble: 1 × 1
##   total_us_imported
##               <dbl>
## 1            419492

Proportion of all slaves taken from Africa

africa_total <- clean_trans %>%
  summarise(total_from_africa = sum(slaves_embarked, na.rm = TRUE))

proportion_us <- us_total$total_us_imported / africa_total$total_from_africa

proportion_us
## [1] 0.05058466

Graph slave imports by decade to the US

us_decade <- voyages %>%
  filter(is_us == TRUE) %>%
  group_by(decade) %>%
  summarise(total_slaves = sum(slaves_disembarked, na.rm = TRUE))

ggplot(us_decade, aes(x = decade, y = total_slaves)) +
  geom_bar(stat = "identity") +
  labs(
    title = "Slave Imports to the United States by Decade",
    x = "Decade",
    y = "Slaves Disembarked"
  )

Imports to the US by decade and ports/region

us_ports <- voyages %>%
  filter(is_us == TRUE) %>%
  mutate(
    state = case_when(
      str_detect(dis_port, "New Orleans") ~ "Louisiana",
      str_detect(dis_port, "Charleston") ~ "South Carolina",
      str_detect(dis_port, "Savannah") ~ "Georgia",
      str_detect(dis_port, "Virginia") ~ "Virginia",
      TRUE ~ "Other"
    )
  ) %>%
  group_by(decade, dis_broad, dis_port, state) %>%
  summarise(total_slaves = sum(slaves_disembarked, na.rm = TRUE))
## `summarise()` has grouped output by 'decade', 'dis_broad', 'dis_port'. You can
## override using the `.groups` argument.
  us_ports
## # A tibble: 329 × 5
## # Groups:   decade, dis_broad, dis_port [329]
##    decade dis_broad              dis_port                   state   total_slaves
##     <dbl> <chr>                  <chr>                      <chr>          <dbl>
##  1   1610 Mainland North America Hampton                    Other             29
##  2   1620 Mainland North America Virginia, port unspecified Virgin…            3
##  3   1630 Mainland North America Boston                     Other              7
##  4   1630 Mainland North America New York                   Other             53
##  5   1630 Mainland North America Virginia, port unspecified Virgin…           13
##  6   1640 Mainland North America New York                   Other             69
##  7   1640 Mainland North America Virginia, port unspecified Virgin…           12
##  8   1650 Mainland North America Maryland, port unspecified Other              5
##  9   1650 Mainland North America New York                   Other            477
## 10   1650 Mainland North America Virginia, port unspecified Virgin…          125
## # ℹ 319 more rows
  ggplot(us_ports, aes(x = decade, y = total_slaves)) +
  geom_bar(stat = "identity") +
  facet_wrap(~ state) +
  labs(
    title = "Slave Imports to the US by State and Decade",
    x = "Decade",
    y = "Slaves Disembarked"
  )

Countries exporting from Africa by decade

exports_by_country <- clean_trans %>%
  rename(country = voyage_ship__imputed_nationality__name) %>%
  group_by(decade, country) %>%
  summarise(
    voyages = n_distinct(voyage_id),
    slaves_embarked = sum(slaves_embarked, na.rm = TRUE)
  ) %>%
  arrange(decade, desc(slaves_embarked))
## `summarise()` has grouped output by 'decade'. You can override using the
## `.groups` argument.
exports_by_country
## # A tibble: 210 × 4
## # Groups:   decade [36]
##    decade country           voyages slaves_embarked
##     <dbl> <chr>               <int>           <dbl>
##  1   1510 Portugal / Brazil       2             624
##  2   1510 0                       9             223
##  3   1510 Spain / Uruguay         8             144
##  4   1520 Spain / Uruguay         3            1043
##  5   1520 0                       3             597
##  6   1530 0                       8            1774
##  7   1530 Portugal / Brazil       2             560
##  8   1530 Spain / Uruguay         1             224
##  9   1540 0                      23            7750
## 10   1540 Portugal / Brazil       1             160
## # ℹ 200 more rows

Part 3: Visualizations and Publication

Plot 1 - US Slave Imports by Decade

us_decade <- voyages %>%
  filter(is_us == TRUE) %>%
  group_by(decade) %>%
  summarise(total_slaves = sum(slaves_disembarked, na.rm = TRUE))

ggplot(us_decade, aes(x = decade, y = total_slaves)) +
  geom_bar(stat = "identity") +
  labs(
    title = "Slave Imports to the United States by Decade",
    x = "Decade",
    y = "Number of Enslaved People Disembarked"
  ) +
  theme_minimal()

Plot 1 – US Imports by State and Decade (Faceted)

us_ports <- voyages %>%
  filter(is_us == TRUE) %>%
  mutate(
    state = case_when(
      str_detect(dis_port, "New Orleans") ~ "Louisiana",
      str_detect(dis_port, "Charleston") ~ "South Carolina",
      str_detect(dis_port, "Savannah") ~ "Georgia",
      str_detect(dis_port, "Virginia") ~ "Virginia",
      TRUE ~ "Other"
    )
  ) %>%
  group_by(decade, state) %>%
  summarise(total_slaves = sum(slaves_disembarked, na.rm = TRUE))
## `summarise()` has grouped output by 'decade'. You can override using the
## `.groups` argument.
ggplot(us_ports, aes(x = decade, y = total_slaves)) +
  geom_bar(stat = "identity") +
  facet_wrap(~ state) +
  labs(
    title = "Slave Imports to the US by State and Decade",
    x = "Decade",
    y = "Number of Enslaved People Disembarked"
  ) +
  theme_minimal()

Summary

This analysis examined data from both the Trans-Atlantic and Intra-American slave trades to better understand the scale and patterns of enslaved people brought to the United States. By combining the datasets and filtering for voyages that successfully disembarked enslaved people, it made it possible to estimate the number of enslaved individuals transported and helped analyze trends over time.

The results of this analysis show that slave imports to the United States varied significantly each decade, with noticeable increases during certain periods of the late eighteenth century. There were many enslaved individuals who were transported to the United States through the Trans-Atlantic trade. However, this only represented a portion of the total number of enslaved people taken from Africa. Regional analysis from this assignment concluded that major ports such as New Orleans, Charleston, and Savannah played key roles in receiving enslaved people. This shows the economic importance of slavery in those areas. Overall, the data highlights from this analysis demonstrats the scale of the slave trade and the geographic concentration of slave imports within the United States.