Rename long column names for readability (e.g., year, slaves_embarked, slaves_disembarked).

trans <- trans %>%
  rename(
    year = voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year,
    slaves_embarked = voyage_slaves_numbers__imp_total_num_slaves_embarked,
    slaves_disembarked = voyage_slaves_numbers__imp_total_num_slaves_disembarked,
    mortality_ratio = voyage_slaves_numbers__imp_mortality_ratio,
    
    dis_broad = voyage_itinerary__imp_broad_region_slave_dis__name,
    dis_region = voyage_itinerary__imp_principal_region_slave_dis__name,
    dis_port = voyage_itinerary__imp_principal_port_slave_dis__name,
    
    purchase_region = voyage_itinerary__imp_principal_region_of_slave_purchase__name,
    purchase_place = voyage_itinerary__imp_principal_place_of_slave_purchase__name,
    
    ship_nationality = voyage_ship__imputed_nationality__name,
    
    outcome_slaves = voyage_outcome__outcome_slaves__name,
    outcome_owner = voyage_outcome__outcome_owner__name,
    outcome_type = voyage_outcome__particular_outcome__name)

intra <- intra %>%
  rename(
    year = voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year,
    slaves_embarked = voyage_slaves_numbers__imp_total_num_slaves_embarked,
    slaves_disembarked = voyage_slaves_numbers__imp_total_num_slaves_disembarked,
    mortality_ratio = voyage_slaves_numbers__imp_mortality_ratio,
    
    dis_broad = voyage_itinerary__imp_broad_region_slave_dis__name,
    dis_region = voyage_itinerary__imp_principal_region_slave_dis__name,
    dis_port = voyage_itinerary__imp_principal_port_slave_dis__name,
    
    purchase_region = voyage_itinerary__imp_principal_region_of_slave_purchase__name,
    purchase_place = voyage_itinerary__imp_principal_place_of_slave_purchase__name,
    
    ship_nationality = voyage_ship__imputed_nationality__name,
    
    outcome_slaves = voyage_outcome__outcome_slaves__name,
    outcome_owner = voyage_outcome__outcome_owner__name,
    outcome_type = voyage_outcome__particular_outcome__name)

Clean the data:

Convert year to integer, slave numbers to numeric.

trans <- trans %>%
  mutate(
    year = as.integer(year),
    slaves_embarked = as.numeric(slaves_embarked),
    slaves_disembarked = as.numeric(slaves_disembarked))

intra <- intra %>%
  mutate(
    year = as.integer(year),
    slaves_embarked = as.numeric(slaves_embarked),
    slaves_disembarked = as.numeric(slaves_disembarked))

Filter out rows where slaves_disembarked is 0 or NA (incomplete voyages).

trans <- trans %>%
  filter(!is.na(slaves_disembarked), slaves_disembarked > 0)

intra <- intra %>%
  filter(!is.na(slaves_disembarked), slaves_disembarked > 0)

Filter for successful outcomes (e.g., “Slaves disembarked”, “Voyage completed”, “Sold slaves”).

success_pattern <- regex(
  "slaves disembarked|voyage completed|sold slaves|disembark|completed|sold|arriv|landed|delivered",
  ignore_case = TRUE)

trans <- trans %>%
  filter(
    str_detect(coalesce(outcome_slaves, ""), success_pattern) |
      str_detect(coalesce(outcome_type, ""), success_pattern))

intra <- intra %>%
  filter(
    str_detect(coalesce(outcome_slaves, ""), success_pattern) |
      str_detect(coalesce(outcome_type, ""), success_pattern))

Add new columns: decade (e.g., floor(year / 10) * 10), estimated_deaths (slaves_embarked - slaves_disembarked), is_us (TRUE if disembark is US-based, using dis_broad == “Mainland North America” or specific US regions/ports like “New Orleans”).

us_port_pattern <- regex(
  "New Orleans|Charleston|Savannah|Norfolk|New York|Newport|Baltimore|Philadelphia|Mobile|Pensacola|Galveston|Boston",
  ignore_case = TRUE
)

trans <- trans %>%
  mutate(
    decade = floor(year / 10) * 10,
    estimated_deaths = slaves_embarked - slaves_disembarked,
    is_us =
      dis_broad == "Mainland North America" |
      str_detect(coalesce(dis_port, ""), us_port_pattern) |
      str_detect(coalesce(dis_region, ""), regex("United States|USA", ignore_case = TRUE)))

intra <- intra %>%
  mutate(
    decade = floor(year / 10) * 10,
    estimated_deaths = slaves_embarked - slaves_disembarked,
    is_us =
      dis_broad == "Mainland North America" |
      str_detect(coalesce(dis_port, ""), us_port_pattern) |
      str_detect(coalesce(dis_region, ""), regex("United States|USA", ignore_case = TRUE)))

Combine the datasets with bind_rows(), adding a source_type column (“Trans-Atlantic” or “Intra-American”).

all_trades <- bind_rows(
  trans %>% mutate(source_type = "Trans-Atlantic"),
  intra %>% mutate(source_type = "Intra-American"))

Part 2: Analysis and Questions (5 points)

Total slaves imported to the US: Filter for is_us == TRUE, sum slaves_disembarked from both datasets.

us_total_disembarked <- all_trades %>%
  filter(is_us) %>%
  summarise(total_us_disembarked = sum(slaves_disembarked, na.rm = TRUE)) %>%
  pull(total_us_disembarked)

us_total_disembarked
## [1] 420091

Proportion of all slaves taken from Africa: Calculate US total / total slaves_embarked from Trans-Atlantic dataset (as this represents slaves taken from Africa).

africa_total_embarked <- trans %>%
  summarise(total_africa_embarked = sum(slaves_embarked, na.rm = TRUE)) %>%
  pull(total_africa_embarked)

prop_us_of_africa <- us_total_disembarked / africa_total_embarked

tibble(
  us_total_disembarked = us_total_disembarked,
  africa_total_embarked = africa_total_embarked,
  proportion_us_of_africa_embarked = prop_us_of_africa)
## # A tibble: 1 × 3
##   us_total_disembarked africa_total_embarked proportion_us_of_africa_embarked
##                  <dbl>                 <dbl>                            <dbl>
## 1               420091               8899661                           0.0472

Graph slave imports by decade to the US: Filter for US, group by decade, sum slaves_disembarked, plot as a bar graph with ggplot2.

us_by_decade <- all_trades %>%
  filter(is_us) %>%
  group_by(decade) %>%
  summarise(total_disembarked = sum(slaves_disembarked, na.rm = TRUE), .groups = "drop") %>%
  arrange(decade)

ggplot(us_by_decade, aes(x = factor(decade), y = total_disembarked)) +
  geom_col() +
  labs(
    title = "Slave Imports to the US by Decade (Trans-Atlantic + Intra-American)",
    x = "Decade",
    y = "Total slaves disembarked"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Imports to the US by decade and region/port/state: Filter for US, group by decade, dis_region, dis_port (approximate state from port/region, e.g., “New Orleans” -> “Louisiana”), sum slaves_disembarked. Use a table and faceted bar plot.

us_by_decade_region_port <- all_trades %>%
  filter(is_us) %>%
  mutate(
    approx_state = case_when(
      str_detect(coalesce(dis_port, ""), regex("New Orleans", TRUE)) ~ "Louisiana",
      str_detect(coalesce(dis_port, ""), regex("Charleston", TRUE)) ~ "South Carolina",
      str_detect(coalesce(dis_port, ""), regex("Savannah", TRUE)) ~ "Georgia",
      str_detect(coalesce(dis_port, ""), regex("Norfolk", TRUE)) ~ "Virginia",
      str_detect(coalesce(dis_port, ""), regex("Baltimore", TRUE)) ~ "Maryland",
      str_detect(coalesce(dis_port, ""), regex("Philadelphia", TRUE)) ~ "Pennsylvania",
      str_detect(coalesce(dis_port, ""), regex("New York", TRUE)) ~ "New York",
      str_detect(coalesce(dis_port, ""), regex("Boston", TRUE)) ~ "Massachusetts",
      str_detect(coalesce(dis_port, ""), regex("Mobile", TRUE)) ~ "Alabama",
      str_detect(coalesce(dis_port, ""), regex("Pensacola", TRUE)) ~ "Florida",
      str_detect(coalesce(dis_port, ""), regex("Galveston", TRUE)) ~ "Texas",
      TRUE ~ "Other/Unknown"
    )
  ) %>%
  group_by(decade, dis_region, dis_port, approx_state) %>%
  summarise(total_disembarked = sum(slaves_disembarked, na.rm = TRUE), .groups = "drop") %>%
  arrange(decade, desc(total_disembarked))

Table view (top 25 rows by total)

us_by_decade_region_port %>%
  arrange(desc(total_disembarked)) %>%
  head(25) %>%
  knitr::kable()
decade dis_region dis_port approx_state total_disembarked
1800 South Carolina Charleston South Carolina 47743
1760 South Carolina Charleston South Carolina 28070
1730 South Carolina Charleston South Carolina 23306
1770 South Carolina Charleston South Carolina 23086
1750 South Carolina Charleston South Carolina 18901
1830 Gulf coast New Orleans Louisiana 18792
1840 Gulf coast New Orleans Louisiana 18493
1820 Gulf coast New Orleans Louisiana 18265
1780 South Carolina Charleston South Carolina 10061
1720 Virginia York River Other/Unknown 9058
1730 Virginia York River Other/Unknown 8088
1790 Georgia Savannah Georgia 7725
1850 Gulf coast New Orleans Louisiana 7235
1720 South Carolina Charleston South Carolina 5346
1700 Virginia Virginia, port unspecified Other/Unknown 5238
1740 Virginia York River Other/Unknown 5087
1780 Gulf coast New Orleans Louisiana 4644
1710 Virginia York River Other/Unknown 4404
1760 Virginia Upper James River Other/Unknown 4149
1770 Georgia Savannah Georgia 3797
1730 Maryland Maryland, port unspecified Other/Unknown 3604
1740 Virginia Upper James River Other/Unknown 3563
1760 Georgia Savannah Georgia 3557
1800 Gulf coast New Orleans Louisiana 3313
1700 Virginia York River Other/Unknown 3041

Faceted bar plot by approximate state

us_state_plot <- us_by_decade_region_port %>%
  filter(approx_state != "Other/Unknown") %>%
  group_by(decade, approx_state) %>%
  summarise(total_disembarked = sum(total_disembarked), .groups = "drop")

ggplot(us_state_plot, aes(x = factor(decade), y = total_disembarked)) +
  geom_col() +
  facet_wrap(~ approx_state, scales = "free_y") +
  labs(
    title = "US Slave Imports by Decade and Approximate State (from Port)",
    x = "Decade",
    y = "Total slaves disembarked"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Countries participating in export from Africa, by decade: From Trans-Atlantic dataset, group by decade and voyage_ship__imputed_nationality__name (as “country”), count unique voyages or sum slaves_embarked. Display in a table.

countries_by_decade <- trans %>%
  group_by(decade, ship_nationality) %>%
  summarise(
    voyages = n_distinct(voyage_id),
    total_embarked = sum(slaves_embarked, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  arrange(decade, desc(total_embarked))

show top 10 per decade

countries_by_decade %>%
  group_by(decade) %>%
  slice_max(total_embarked, n = 10, with_ties = FALSE) %>%
  ungroup()
## # A tibble: 215 × 4
##    decade ship_nationality  voyages total_embarked
##     <dbl> <chr>               <int>          <dbl>
##  1   1510 Portugal / Brazil       2            624
##  2   1510 0                       9            223
##  3   1510 Spain / Uruguay         8            144
##  4   1520 Spain / Uruguay         3           1043
##  5   1520 0                       3            597
##  6   1530 0                       9           1777
##  7   1530 Portugal / Brazil       2            560
##  8   1530 Spain / Uruguay         1            224
##  9   1540 0                      23           7750
## 10   1540 Portugal / Brazil       1            160
## # ℹ 205 more rows

Part 3: Visualizations + Summary

library(tidyverse)
library(scales)

Plot 1: US imports by decade (bar chart)

plot_us_by_decade <- ggplot(us_by_decade, aes(x = factor(decade), y = total_disembarked)) +
  geom_col() +
  labs(
    title = "Enslaved People Disembarked in the US by Decade",
    subtitle = "Combined Trans-Atlantic and Intra-American datasets (successful + complete voyages only)",
    x = "Decade",
    y = "Total disembarked"
  ) +
  scale_y_continuous(labels = comma) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

plot_us_by_decade

Plot 2: US imports by decade and approximate state (faceted bar chart)

us_by_decade_source <- all_trades %>%
  filter(is_us) %>%
  group_by(decade, source_type) %>%
  summarise(total_disembarked = sum(slaves_disembarked, na.rm = TRUE), .groups = "drop") %>%
  arrange(decade)

ggplot(us_by_decade_source, aes(x = factor(decade), y = total_disembarked, fill = source_type)) +
  geom_col() +
  labs(
    title = "US Slave Imports by Decade, Split by Dataset",
    subtitle = "Stacked totals show the relative contribution of Trans-Atlantic vs Intra-American arrivals",
    x = "Decade",
    y = "Total disembarked",
    fill = "Source type"
  ) +
  scale_y_continuous(labels = comma) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Summary of findings

  • After cleaning the data, the total number of enslaved people disembarked in the United States across both datasets was 420,091.

  • The total number embarked from Africa in the Trans-Atlantic dataset was 8,899,661. This means the US disembarkations in our cleaned data represent about 4.7% of the total embarked from Africa.

  • The decade-level bar chart shows that US arrivals were concentrated in specific decades rather than evenly spread over time.

  • The destination breakdown by inferred state (based on major ports) suggests that a small number of locations, most specifically South Carolina (Charleston) and Louisiana (New Orleans), account for large shares of arrivals in multiple decades. ```