library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
col_types_spec <- cols_only(
  id = col_integer(),
  voyage_id = col_integer(),
  voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year = col_double(),
  voyage_slaves_numbers__imp_total_num_slaves_disembarked = col_double(),
  voyage_slaves_numbers__imp_total_num_slaves_embarked = col_double(),
  voyage_dates__length_middle_passage_days = col_double(),
  voyage_dates__imp_length_home_to_disembark = col_double(),
  voyage_crew__crew_first_landing = col_double(),
  voyage_crew__crew_voyage_outset = col_double(),
  voyage_ship__tonnage_mod = col_double(),
  voyage_slaves_numbers__imp_jamaican_cash_price = col_double(),
  voyage_slaves_numbers__imp_mortality_ratio = col_double(),
  voyage_slaves_numbers__percentage_women_among_embarked_slaves = col_double(),
  voyage_outcome__vessel_captured_outcome__name = col_character(),
  voyage_ship__imputed_nationality__name = col_character(),
  voyage_itinerary__imp_region_voyage_begin__name = col_character(),
  voyage_ship__rig_of_vessel__name = col_character(),
  voyage_itinerary__place_voyage_ended__name = col_character(),
  # Force as character
  voyage_dates__slave_purchase_began_sparsedate__month = col_double(),
  voyage_slaves_numbers__percentage_men = col_double(),
  voyage_dates__voyage_completed_sparsedate__month = col_double(),
  voyage_itinerary__region_of_return__name = col_character(),
  voyage_slaves_numbers__percentage_boy = col_double(),
  voyage_itinerary__imp_principal_region_slave_dis__name = col_character(),
  voyage_itinerary__imp_principal_region_of_slave_purchase__name = col_character(),
  voyage_dates__date_departed_africa_sparsedate__month = col_double(),
  voyage_dates__voyage_began_sparsedate__month = col_double(),
  voyage_itinerary__imp_port_voyage_begin__name = col_character(),
  voyage_dates__first_dis_of_slaves_sparsedate__month = col_double(),
  voyage_itinerary__imp_broad_region_slave_dis__name = col_character(),
  voyage_slaves_numbers__percentage_girl = col_double(),
  voyage_outcome__particular_outcome__name = col_character(),
  voyage_itinerary__imp_principal_port_slave_dis__name = col_character(),
  voyage_slaves_numbers__percentage_child = col_double(),
  voyage_slaves_numbers__percentage_women = col_double(),
  voyage_dates__departure_last_place_of_landing_sparsedate__month = col_double(),
  voyage_outcome__outcome_owner__name = col_character(),
  voyage_outcome__outcome_slaves__name = col_character(),
  voyage_itinerary__imp_principal_place_of_slave_purchase__name = col_character(),
  voyage_outcome__resistance__name = col_character(),
  voyage_slaves_numbers__percentage_male = col_double(),
  voyage_slaves_numbers__percentage_female = col_double(),
  voyage_itinerary__imp_broad_region_voyage_begin__name = col_character(),
  voyage_itinerary__imp_broad_region_of_slave_purchase__name = col_character(),
  voyage_sources = col_character(),
  enslavers = col_character()
)

# Load the datasets
trans <- read_csv("https://raw.githubusercontent.com/imowerman-prog/data-3210/refs/heads/main/Data/trans-atlantic.csv", 
                  col_types = col_types_spec)
intra <- read_csv("https://raw.githubusercontent.com/imowerman-prog/data-3210/refs/heads/main/Data/intra-american.csv", 
                  col_types = col_types_spec)

# Define successful outcomes
successful_outcomes <- c(
  "Sold slaves in Americas - subsequent fate unknown",
  "Voyage completed as intended",
  "Captured by pirates or privateers - after disembarkation",
  "Condemned - Americas after disembarkation",
  "Detained and condemned in the United States after slaves disembarked",
  "Condemned in the Americas by British after slaves disembarked",
  "Captured by pirates - slaves sold in Americas from another ship",
  "Shipwrecked or destroyed, after disembarkation",
  "Privateer captured slaves at sea and delivered for sale in America",
  "Prisoners of war stole slaves during escape and carried to port of sale",
  "Captives seized from vessel by Spanish officials and sold",
  "Captured by Dutch - after disembarkation",
  "Shipwrecked, slaves salvaged",
  "Captured by slaves, recaptured and landed slaves in the Americas"
)

# Process trans-atlantic data
trans <- trans %>%
  rename(
    year = voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year,
    slaves_embarked = voyage_slaves_numbers__imp_total_num_slaves_embarked,
    slaves_disembarked = voyage_slaves_numbers__imp_total_num_slaves_disembarked,
    particular_outcome = voyage_outcome__particular_outcome__name,
    dis_broad = voyage_itinerary__imp_broad_region_slave_dis__name,
    dis_port = voyage_itinerary__imp_principal_port_slave_dis__name,
    embark_broad = voyage_itinerary__imp_broad_region_of_slave_purchase__name,
    embark_port = voyage_itinerary__imp_principal_place_of_slave_purchase__name
  ) %>%
  mutate(
    year = as.integer(year),
    slaves_embarked = as.numeric(slaves_embarked),
    slaves_disembarked = as.numeric(slaves_disembarked)
  ) %>%
  filter(
    !is.na(slaves_disembarked),
    slaves_disembarked > 0,
    particular_outcome %in% successful_outcomes
  ) %>%
  mutate(
    decade = floor(year / 10) * 10,
    estimated_deaths = slaves_embarked - slaves_disembarked,
    is_us = dis_broad == "Mainland North America" | 
      grepl("New Orleans", dis_port, ignore.case = TRUE),
    embark_is_us = embark_broad == "Mainland North America" | 
      grepl("New Orleans", embark_port, ignore.case = TRUE),
    source_type = "Trans-Atlantic"
  )

# Process intra-american data
intra <- intra %>%
  rename(
    year = voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year,
    slaves_embarked = voyage_slaves_numbers__imp_total_num_slaves_embarked,
    slaves_disembarked = voyage_slaves_numbers__imp_total_num_slaves_disembarked,
    particular_outcome = voyage_outcome__particular_outcome__name,
    dis_broad = voyage_itinerary__imp_broad_region_slave_dis__name,
    dis_port = voyage_itinerary__imp_principal_port_slave_dis__name,
    embark_broad = voyage_itinerary__imp_broad_region_of_slave_purchase__name,
    embark_port = voyage_itinerary__imp_principal_place_of_slave_purchase__name
  ) %>%
  mutate(
    year = as.integer(year),
    slaves_embarked = as.numeric(slaves_embarked),
    slaves_disembarked = as.numeric(slaves_disembarked)
  ) %>%
  filter(
    !is.na(slaves_disembarked),
    slaves_disembarked > 0,
    particular_outcome %in% successful_outcomes
  ) %>%
  mutate(
    decade = floor(year / 10) * 10,
    estimated_deaths = slaves_embarked - slaves_disembarked,
    is_us = dis_broad == "Mainland North America" | 
      grepl("New Orleans", dis_port, ignore.case = TRUE),
    embark_is_us = embark_broad == "Mainland North America" | 
      grepl("New Orleans", embark_port, ignore.case = TRUE),
    source_type = "Intra-American"
  )

# Combine the datasets
combined <- bind_rows(trans, intra)

Question 1:Total slaves imported to the US: Filter for is_us == TRUE, sum slaves_disembarked from both datasets.

gross_us <- combined %>%
  filter(is_us == TRUE) %>%
  summarise(gross_us = sum(slaves_disembarked, na.rm = TRUE)) %>%
  pull(gross_us)

re_exports <- combined %>%
  filter(source_type == "Intra-American", embark_is_us == TRUE, is_us == FALSE) %>%
  summarise(re_exports = sum(slaves_embarked, na.rm = TRUE)) %>%
  pull(re_exports)

net_us <- gross_us - re_exports

cat("Gross slaves imported to the US:", gross_us, "\n")
## Gross slaves imported to the US: 395919
cat("Slaves re-exported from the US:", re_exports, "\n")
## Slaves re-exported from the US: 7652
cat("Net slaves retained in the US:", net_us, "\n")
## Net slaves retained in the US: 388267

Question 2: Proportion of all slaves taken from Africa: Calculate US total / total slaves_embarked from Trans-Atlantic dataset (as this represents slaves taken from Africa).

trans_total_embarked <- trans %>%
  summarise(total_embarked = sum(slaves_embarked, na.rm = TRUE)) %>%
  pull(total_embarked)

proportion <- net_us / trans_total_embarked

cat("Proportion of all slaves taken from Africa (net US):", proportion, "\n")
## Proportion of all slaves taken from Africa (net US): 0.05107614

Question 3: Graph slave imports by decade to the US: Filter for US, group by decade, sum slaves_disembarked, plot as a bar graph with ggplot2.

us_imports_by_decade <- combined %>%
  filter(is_us == TRUE) %>%
  group_by(decade) %>%
  summarise(total_disembarked = sum(slaves_disembarked, na.rm = TRUE)) %>%
  ungroup()

ggplot(us_imports_by_decade, aes(x = factor(decade), y = total_disembarked)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  labs(
    title = "Slave Imports to the US by Decade",
    x = "Decade",
    y = "Number of Slaves Disembarked"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

map_port_to_state <- function(port, region) {
  case_when(
    grepl("New Orleans", port, ignore.case = TRUE) ~ "Louisiana",
    grepl("Charleston|Charles Town", port, ignore.case = TRUE) ~ "South Carolina",
    grepl("Savannah", port, ignore.case = TRUE) ~ "Georgia",
    grepl("Baltimore", port, ignore.case = TRUE) ~ "Maryland",
    grepl("Norfolk|Virginia", port, ignore.case = TRUE) ~ "Virginia",
    grepl("Boston", port, ignore.case = TRUE) ~ "Massachusetts",
    grepl("New York", port, ignore.case = TRUE) ~ "New York",
    grepl("Philadelphia", port, ignore.case = TRUE) ~ "Pennsylvania",
    grepl("Rhode Island|Newport|Providence", port, ignore.case = TRUE) ~ "Rhode Island",
    region == "Mainland North America" & is.na(port) ~ "Unknown US State",
    TRUE ~ "Other/Unknown"
  )
}

Question 4: Imports to the US by decade and region/port/state: Filter for US, group by decade, dis_region, dis_port (approximate state from port/region, e.g., “New Orleans” -> “Louisiana”), sum slaves_disembarked. Use a table and faceted bar plot.

us_imports_by_decade_region_port <- combined %>%
  filter(is_us == TRUE) %>%
  mutate(state = map_port_to_state(dis_port, dis_broad)) %>%
  group_by(decade, dis_broad, dis_port, state) %>%
  summarise(total_disembarked = sum(slaves_disembarked, na.rm = TRUE), .groups = "drop") %>%
  arrange(decade, dis_broad, state, dis_port)

print(us_imports_by_decade_region_port)
## # A tibble: 272 × 5
##    decade dis_broad              dis_port                state total_disembarked
##     <dbl> <chr>                  <chr>                   <chr>             <dbl>
##  1   1610 Mainland North America Hampton                 Othe…                29
##  2   1620 Mainland North America Virginia, port unspeci… Virg…                 3
##  3   1630 Mainland North America Boston                  Mass…                 7
##  4   1630 Mainland North America New York                New …                53
##  5   1630 Mainland North America Virginia, port unspeci… Virg…                13
##  6   1640 Mainland North America New York                New …                69
##  7   1640 Mainland North America Virginia, port unspeci… Virg…                12
##  8   1650 Mainland North America New York                New …               477
##  9   1650 Mainland North America Maryland, port unspeci… Othe…                 5
## 10   1650 Mainland North America Virginia, port unspeci… Virg…               125
## # ℹ 262 more rows
ggplot(us_imports_by_decade_region_port, aes(x = factor(decade), y = total_disembarked)) +
  geom_bar(stat = "identity", fill = "darkgreen") +
  facet_wrap(~ state, scales = "free_y") +
  labs(
    title = "Slave Imports to the US by Decade and State",
    x = "Decade",
    y = "Number of Slaves Disembarked"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    strip.text = element_text(size = 10))

Question 5: Countries participating in export from Africa, by decade: From Trans-Atlantic dataset, group by decade and voyage_ship__imputed_nationality__name (as “country”), count unique voyages or sum slaves_embarked. Display in a table.

voyages_by_country_decade <- trans %>%
      group_by(decade, voyage_ship__imputed_nationality__name) %>%
      summarise(num_voyages = n_distinct(voyage_id), .groups = "drop") %>%
      arrange(decade, desc(num_voyages)) %>%
      rename(country = voyage_ship__imputed_nationality__name)

print(voyages_by_country_decade)
## # A tibble: 203 × 3
##    decade country           num_voyages
##     <dbl> <chr>                   <int>
##  1   1510 0                           9
##  2   1510 Spain / Uruguay             8
##  3   1520 Spain / Uruguay             3
##  4   1520 0                           2
##  5   1530 0                           8
##  6   1530 Portugal / Brazil           2
##  7   1530 Spain / Uruguay             1
##  8   1540 0                          23
##  9   1540 Portugal / Brazil           1
## 10   1550 0                          27
## # ℹ 193 more rows
 slaves_by_country_decade <- trans %>%
      group_by(decade, voyage_ship__imputed_nationality__name) %>%
      summarise(total_embarked = sum(slaves_embarked, na.rm = TRUE), .groups = "drop") %>%
      arrange(decade, desc(total_embarked)) %>%
      rename(country = voyage_ship__imputed_nationality__name)
 
 print(slaves_by_country_decade)
## # A tibble: 203 × 3
##    decade country           total_embarked
##     <dbl> <chr>                      <dbl>
##  1   1510 0                            223
##  2   1510 Spain / Uruguay              144
##  3   1520 Spain / Uruguay             1043
##  4   1520 0                            373
##  5   1530 0                           1418
##  6   1530 Portugal / Brazil            560
##  7   1530 Spain / Uruguay              224
##  8   1540 0                           7750
##  9   1540 Portugal / Brazil            160
## 10   1550 0                           9390
## # ℹ 193 more rows
 us_imports_by_decade <- combined %>%
      filter(is_us == TRUE) %>%
      group_by(decade) %>%
      summarise(total_disembarked = sum(slaves_disembarked, na.rm = TRUE)) %>%
      ungroup()
 
 ggplot(us_imports_by_decade, aes(x = factor(decade), y = total_disembarked)) +
      geom_bar(stat = "identity", fill = "steelblue") +
      labs(
        title = "Slave Imports to the US by Decade",
        x = "Decade",
        y = "Number of Slaves Disembarked"
      ) +
      theme_minimal() +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))

 map_port_to_state <- function(port, region) {
      case_when(
        grepl("New Orleans", port, ignore.case = TRUE) ~ "Louisiana",
        grepl("Charleston|Charles Town", port, ignore.case = TRUE) ~ "South Carolina",
        grepl("Savannah", port, ignore.case = TRUE) ~ "Georgia",
        grepl("Baltimore", port, ignore.case = TRUE) ~ "Maryland",
        grepl("Norfolk|Virginia", port, ignore.case = TRUE) ~ "Virginia",
        grepl("Boston", port, ignore.case = TRUE) ~ "Massachusetts",
        grepl("New York", port, ignore.case = TRUE) ~ "New York",
        grepl("Philadelphia", port, ignore.case = TRUE) ~ "Pennsylvania",
        grepl("Rhode Island|Newport|Providence", port, ignore.case = TRUE) ~ "Rhode Island",
        region == "Mainland North America" & is.na(port) ~ "Unknown US State",
        TRUE ~ "Other/Unknown"
      )
 }
 
 us_imports_by_decade_state <- combined %>%
      filter(is_us == TRUE) %>%
      mutate(state = map_port_to_state(dis_port, dis_broad)) %>%
      group_by(decade, state) %>%
      summarise(total_disembarked = sum(slaves_disembarked, na.rm = TRUE), .groups = "drop") %>%
      arrange(decade, state)
 
 ggplot(us_imports_by_decade_state, aes(x = factor(decade), y = total_disembarked)) +
      geom_bar(stat = "identity", fill = "darkgreen") +
      facet_wrap(~ state, scales = "free_y") +
      labs(
        title = "Slave Imports to the US by Decade and State",
        x = "Decade",
        y = "Number of Slaves Disembarked"
      ) +
      theme_minimal() +
      theme(
        axis.text.x = element_text(angle = 45, hjust = 1),
        strip.text = element_text(size = 10)
      )

 voyages_by_country_decade <- trans %>%
      group_by(decade, voyage_ship__imputed_nationality__name) %>%
      summarise(num_voyages = n_distinct(voyage_id), .groups = "drop") %>%
      arrange(decade, desc(num_voyages)) %>%
      rename(country = voyage_ship__imputed_nationality__name)

Summary of Insights

From the data, several key takeaways stand out. The net number of enslaved people retained in the U.S. was found by subtracting re-exports (intra-American trade) from total imports. This shows that while the U.S. was an important destination, it wasn’t the main one compared to regions like the Caribbean. Most arrivals were concentrated in the southern states—particularly South Carolina, Georgia, and Louisiana—with Charleston and New Orleans serving as major ports. Northern ports played a much smaller role. On the broader scale, European nations such as Britain, Portugal, and France led the trans-Atlantic trade, with differences in voyage numbers and embarkations by decade reflecting each empire’s shifting colonial and economic goals.