Load & Clean the Data:

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
col_types_spec <- cols_only(
  id = col_integer(),
  voyage_id = col_integer(),
  voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year = col_double(),
  voyage_slaves_numbers__imp_total_num_slaves_disembarked = col_double(),
  voyage_slaves_numbers__imp_total_num_slaves_embarked = col_double(),
  voyage_dates__length_middle_passage_days = col_double(),
  voyage_dates__imp_length_home_to_disembark = col_double(),
  voyage_crew__crew_first_landing = col_double(),
  voyage_crew__crew_voyage_outset = col_double(),
  voyage_ship__tonnage_mod = col_double(),
  voyage_slaves_numbers__imp_jamaican_cash_price = col_double(),
  voyage_slaves_numbers__imp_mortality_ratio = col_double(),
  voyage_slaves_numbers__percentage_women_among_embarked_slaves = col_double(),
  voyage_outcome__vessel_captured_outcome__name = col_character(),
  voyage_ship__imputed_nationality__name = col_character(),
  voyage_itinerary__imp_region_voyage_begin__name = col_character(),
  voyage_ship__rig_of_vessel__name = col_character(),
  voyage_itinerary__place_voyage_ended__name = col_character(),
  # Force as character
  voyage_dates__slave_purchase_began_sparsedate__month = col_double(),
  voyage_slaves_numbers__percentage_men = col_double(),
  voyage_dates__voyage_completed_sparsedate__month = col_double(),
  voyage_itinerary__region_of_return__name = col_character(),
  voyage_slaves_numbers__percentage_boy = col_double(),
  voyage_itinerary__imp_principal_region_slave_dis__name = col_character(),
  voyage_itinerary__imp_principal_region_of_slave_purchase__name = col_character(),
  voyage_dates__date_departed_africa_sparsedate__month = col_double(),
  voyage_dates__voyage_began_sparsedate__month = col_double(),
  voyage_itinerary__imp_port_voyage_begin__name = col_character(),
  voyage_dates__first_dis_of_slaves_sparsedate__month = col_double(),
  voyage_itinerary__imp_broad_region_slave_dis__name = col_character(),
  voyage_slaves_numbers__percentage_girl = col_double(),
  voyage_outcome__particular_outcome__name = col_character(),
  voyage_itinerary__imp_principal_port_slave_dis__name = col_character(),
  voyage_slaves_numbers__percentage_child = col_double(),
  voyage_slaves_numbers__percentage_women = col_double(),
  voyage_dates__departure_last_place_of_landing_sparsedate__month = col_double(),
  voyage_outcome__outcome_owner__name = col_character(),
  voyage_outcome__outcome_slaves__name = col_character(),
  voyage_itinerary__imp_principal_place_of_slave_purchase__name = col_character(),
  voyage_outcome__resistance__name = col_character(),
  voyage_slaves_numbers__percentage_male = col_double(),
  voyage_slaves_numbers__percentage_female = col_double(),
  voyage_itinerary__imp_broad_region_voyage_begin__name = col_character(),
  voyage_itinerary__imp_broad_region_of_slave_purchase__name = col_character(),
  voyage_sources = col_character(),
  enslavers = col_character()
)

# Load the datasets
trans <- read_csv("https://raw.githubusercontent.com/imowerman-prog/data-3210/refs/heads/main/Data/trans-atlantic.csv", 
                  col_types = col_types_spec)
intra <- read_csv("https://raw.githubusercontent.com/imowerman-prog/data-3210/refs/heads/main/Data/intra-american.csv", 
                  col_types = col_types_spec)

# Define successful outcomes
successful_outcomes <- c(
  "Sold slaves in Americas - subsequent fate unknown",
  "Voyage completed as intended",
  "Captured by pirates or privateers - after disembarkation",
  "Condemned - Americas after disembarkation",
  "Detained and condemned in the United States after slaves disembarked",
  "Condemned in the Americas by British after slaves disembarked",
  "Captured by pirates - slaves sold in Americas from another ship",
  "Shipwrecked or destroyed, after disembarkation",
  "Privateer captured slaves at sea and delivered for sale in America",
  "Prisoners of war stole slaves during escape and carried to port of sale",
  "Captives seized from vessel by Spanish officials and sold",
  "Captured by Dutch - after disembarkation",
  "Shipwrecked, slaves salvaged",
  "Captured by slaves, recaptured and landed slaves in the Americas"
)

# Process trans-atlantic data
trans <- trans %>%
  rename(
    year = voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year,
    slaves_embarked = voyage_slaves_numbers__imp_total_num_slaves_embarked,
    slaves_disembarked = voyage_slaves_numbers__imp_total_num_slaves_disembarked,
    particular_outcome = voyage_outcome__particular_outcome__name,
    dis_broad = voyage_itinerary__imp_broad_region_slave_dis__name,
    dis_port = voyage_itinerary__imp_principal_port_slave_dis__name,
    embark_broad = voyage_itinerary__imp_broad_region_of_slave_purchase__name,
    embark_port = voyage_itinerary__imp_principal_place_of_slave_purchase__name
  ) %>%
  mutate(
    year = as.integer(year),
    slaves_embarked = as.numeric(slaves_embarked),
    slaves_disembarked = as.numeric(slaves_disembarked)
  ) %>%
  filter(
    !is.na(slaves_disembarked),
    slaves_disembarked > 0,
    particular_outcome %in% successful_outcomes
  ) %>%
  mutate(
    decade = floor(year / 10) * 10,
    estimated_deaths = slaves_embarked - slaves_disembarked,
    is_us = dis_broad == "Mainland North America" | 
      grepl("New Orleans", dis_port, ignore.case = TRUE),
    embark_is_us = embark_broad == "Mainland North America" | 
      grepl("New Orleans", embark_port, ignore.case = TRUE),
    source_type = "Trans-Atlantic"
  )

# Process intra-american data
intra <- intra %>%
  rename(
    year = voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year,
    slaves_embarked = voyage_slaves_numbers__imp_total_num_slaves_embarked,
    slaves_disembarked = voyage_slaves_numbers__imp_total_num_slaves_disembarked,
    particular_outcome = voyage_outcome__particular_outcome__name,
    dis_broad = voyage_itinerary__imp_broad_region_slave_dis__name,
    dis_port = voyage_itinerary__imp_principal_port_slave_dis__name,
    embark_broad = voyage_itinerary__imp_broad_region_of_slave_purchase__name,
    embark_port = voyage_itinerary__imp_principal_place_of_slave_purchase__name
  ) %>%
  mutate(
    year = as.integer(year),
    slaves_embarked = as.numeric(slaves_embarked),
    slaves_disembarked = as.numeric(slaves_disembarked)
  ) %>%
  filter(
    !is.na(slaves_disembarked),
    slaves_disembarked > 0,
    particular_outcome %in% successful_outcomes
  ) %>%
  mutate(
    decade = floor(year / 10) * 10,
    estimated_deaths = slaves_embarked - slaves_disembarked,
    is_us = dis_broad == "Mainland North America" | 
      grepl("New Orleans", dis_port, ignore.case = TRUE),
    embark_is_us = embark_broad == "Mainland North America" | 
      grepl("New Orleans", embark_port, ignore.case = TRUE),
    source_type = "Intra-American"
  )

# Combine the datasets
combined <- bind_rows(trans, intra)

Question 1:

gross_us <- combined %>%
  filter(is_us == TRUE) %>%
  summarise(gross_us = sum(slaves_disembarked, na.rm = TRUE)) %>%
  pull(gross_us)

re_exports <- combined %>%
  filter(source_type == "Intra-American", embark_is_us == TRUE, is_us == FALSE) %>%
  summarise(re_exports = sum(slaves_embarked, na.rm = TRUE)) %>%
  pull(re_exports)

net_us <- gross_us - re_exports

cat("Gross slaves imported to the US:", gross_us, "\n")
## Gross slaves imported to the US: 395919
cat("Slaves re-exported from the US:", re_exports, "\n")
## Slaves re-exported from the US: 7652
cat("Net slaves retained in the US:", net_us, "\n")
## Net slaves retained in the US: 388267

Question 2:

trans_total_embarked <- trans %>%
  summarise(total_embarked = sum(slaves_embarked, na.rm = TRUE)) %>%
  pull(total_embarked)

proportion <- net_us / trans_total_embarked

cat("Proportion of all slaves taken from Africa (net US):", proportion, "\n")
## Proportion of all slaves taken from Africa (net US): 0.05107614

Question 3:

library(ggplot2)

us_by_decade <- combined %>%
  filter(is_us == TRUE) %>%
  group_by(decade) %>%
  summarise(total_disembarked = sum(slaves_disembarked, na.rm = TRUE)) %>%
  arrange(decade)

# Plot
ggplot(us_by_decade, aes(x = factor(decade), y = total_disembarked)) +
  geom_col(fill = "#4682B4") +
  labs(
    title = "Slaves Disembarked in US by Decade",
    x = "Decade",
    y = "Number of Slaves Disembarked"
  ) +
  scale_y_continuous(labels = scales::comma)

Question 4:

us_ports_state <- combined %>%
  filter(is_us == TRUE) %>%
  mutate(state = case_when(
    grepl("New Orleans", dis_port, ignore.case = TRUE) ~ "Louisiana",
    grepl("Charleston", dis_port, ignore.case = TRUE) ~ "South Carolina",
    grepl("Savannah", dis_port, ignore.case = TRUE) ~ "Georgia",
    grepl("Baltimore", dis_port, ignore.case = TRUE) ~ "Maryland",
    grepl("Philadelphia", dis_port, ignore.case = TRUE) ~ "Pennsylvania",
    grepl("New York", dis_port, ignore.case = TRUE) ~ "New York",
    grepl("Boston", dis_port, ignore.case = TRUE) ~ "Massachusetts",
    grepl("Norfolk", dis_port, ignore.case = TRUE) ~ "Virginia",
    TRUE ~ "Other/Unknown"
  ))

# Table: total by decade and state
us_state_table <- us_ports_state %>%
  group_by(decade, state) %>%
  summarise(total_disembarked = sum(slaves_disembarked, na.rm = TRUE), .groups = "drop")

print(us_state_table)
## # A tibble: 85 × 3
##    decade state         total_disembarked
##     <dbl> <chr>                     <dbl>
##  1   1610 Other/Unknown                29
##  2   1620 Other/Unknown                 3
##  3   1630 Massachusetts                 7
##  4   1630 New York                     53
##  5   1630 Other/Unknown                13
##  6   1640 New York                     69
##  7   1640 Other/Unknown                12
##  8   1650 New York                    477
##  9   1650 Other/Unknown               130
## 10   1660 New York                    436
## # ℹ 75 more rows
# Faceted plot
ggplot(us_state_table, aes(x = factor(decade), y = total_disembarked, fill = state)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ state, scales = "free_y") +
  labs(
    title = "US Slave Imports by Decade and State (Approx.)",
    x = "Decade",
    y = "Slaves Disembarked"
  ) +
  scale_y_continuous(labels = scales::comma) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Optional:

top_countries <- combined %>%
  group_by(voyage_ship__imputed_nationality__name) %>%
  summarise(total_disembarked = sum(slaves_disembarked, na.rm = TRUE)) %>%
  arrange(desc(total_disembarked)) %>%
  slice_head(n = 10)

print(top_countries)
## # A tibble: 10 × 2
##    voyage_ship__imputed_nationality__name total_disembarked
##    <chr>                                              <dbl>
##  1 Great Britain                                    2414960
##  2 Portugal / Brazil                                2154060
##  3 France                                            890381
##  4 Spain / Uruguay                                   480527
##  5 Netherlands                                       471709
##  6 0                                                 401948
##  7 U.S.A.                                            289462
##  8 Denmark / Baltic                                   83221
##  9 Austria                                             2098
## 10 Other                                               1962
# OPTIONAL: Visualization
library(ggplot2)

ggplot(top_countries, aes(x = reorder(voyage_ship__imputed_nationality__name, total_disembarked), 
                          y = total_disembarked)) +
  geom_col(fill = "steelblue") +
  coord_flip() +
  labs(
    title = "Top 10 Nationalities by Total Slaves Disembarked",
    x = "Ship Nationality",
    y = "Total Slaves Disembarked"
  ) +
  theme_minimal()

Summary:

This project showed that only a small share of enslaved Africans were brought to the U.S., while most were taken to the Caribbean and South America. Some enslaved people were re-exported from the U.S. to other regions. Countries like Britain, Portugal, and Spain were major players in the slave trade. Overall, the U.S. was part of a much larger and tragic system.