Data Loading and Cleaning

# Column types
col_types_spec <- cols_only(
  id = col_integer(),
  voyage_id = col_integer(),
  voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year = col_double(),
  voyage_slaves_numbers__imp_total_num_slaves_disembarked = col_double(),
  voyage_slaves_numbers__imp_total_num_slaves_embarked = col_double(),
  voyage_dates__length_middle_passage_days = col_double(),
  voyage_dates__imp_length_home_to_disembark = col_double(),
  voyage_crew__crew_first_landing = col_double(),
  voyage_crew__crew_voyage_outset = col_double(),
  voyage_ship__tonnage_mod = col_double(),
  voyage_slaves_numbers__imp_jamaican_cash_price = col_double(),
  voyage_slaves_numbers__imp_mortality_ratio = col_double(),
  voyage_slaves_numbers__percentage_women_among_embarked_slaves = col_double(),
  voyage_outcome__vessel_captured_outcome__name = col_character(),
  voyage_ship__imputed_nationality__name = col_character(),
  voyage_itinerary__imp_region_voyage_begin__name = col_character(),
  voyage_ship__rig_of_vessel__name = col_character(),
  voyage_itinerary__place_voyage_ended__name = col_character(),
  voyage_dates__slave_purchase_began_sparsedate__month = col_double(),
  voyage_slaves_numbers__percentage_men = col_double(),
  voyage_dates__voyage_completed_sparsedate__month = col_double(),
  voyage_itinerary__region_of_return__name = col_character(),
  voyage_slaves_numbers__percentage_boy = col_double(),
  voyage_itinerary__imp_principal_region_slave_dis__name = col_character(),
  voyage_itinerary__imp_principal_region_of_slave_purchase__name = col_character(),
  voyage_dates__date_departed_africa_sparsedate__month = col_double(),
  voyage_dates__voyage_began_sparsedate__month = col_double(),
  voyage_itinerary__imp_port_voyage_begin__name = col_character(),
  voyage_dates__first_dis_of_slaves_sparsedate__month = col_double(),
  voyage_itinerary__imp_broad_region_slave_dis__name = col_character(),
  voyage_slaves_numbers__percentage_girl = col_double(),
  voyage_outcome__particular_outcome__name = col_character(),
  voyage_itinerary__imp_principal_port_slave_dis__name = col_character(),
  voyage_slaves_numbers__percentage_child = col_double(),
  voyage_slaves_numbers__percentage_women = col_double(),
  voyage_dates__departure_last_place_of_landing_sparsedate__month = col_double(),
  voyage_outcome__outcome_owner__name = col_character(),
  voyage_outcome__outcome_slaves__name = col_character(),
  voyage_itinerary__imp_principal_place_of_slave_purchase__name = col_character(),
  voyage_outcome__resistance__name = col_character(),
  voyage_slaves_numbers__percentage_male = col_double(),
  voyage_slaves_numbers__percentage_female = col_double(),
  voyage_itinerary__imp_broad_region_voyage_begin__name = col_character(),
  voyage_itinerary__imp_broad_region_of_slave_purchase__name = col_character(),
  voyage_sources = col_character(),
  enslavers = col_character()
)

# Load datasets
trans <- read_csv("https://raw.githubusercontent.com/imowerman-prog/data-3210/refs/heads/main/Data/trans-atlantic.csv",
                  col_types = col_types_spec)
intra <- read_csv("https://raw.githubusercontent.com/imowerman-prog/data-3210/refs/heads/main/Data/intra-american.csv",
                  col_types = col_types_spec)

# Diagnostic: Check column names
cat("Columns in trans dataset:\n")
## Columns in trans dataset:
print(colnames(trans))
##  [1] "id"                                                             
##  [2] "voyage_id"                                                      
##  [3] "voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year"      
##  [4] "voyage_slaves_numbers__imp_total_num_slaves_disembarked"        
##  [5] "voyage_slaves_numbers__imp_total_num_slaves_embarked"           
##  [6] "voyage_dates__length_middle_passage_days"                       
##  [7] "voyage_dates__imp_length_home_to_disembark"                     
##  [8] "voyage_crew__crew_first_landing"                                
##  [9] "voyage_crew__crew_voyage_outset"                                
## [10] "voyage_ship__tonnage_mod"                                       
## [11] "voyage_slaves_numbers__imp_jamaican_cash_price"                 
## [12] "voyage_slaves_numbers__imp_mortality_ratio"                     
## [13] "voyage_slaves_numbers__percentage_women_among_embarked_slaves"  
## [14] "voyage_outcome__vessel_captured_outcome__name"                  
## [15] "voyage_ship__imputed_nationality__name"                         
## [16] "voyage_itinerary__imp_region_voyage_begin__name"                
## [17] "voyage_ship__rig_of_vessel__name"                               
## [18] "voyage_itinerary__place_voyage_ended__name"                     
## [19] "voyage_dates__slave_purchase_began_sparsedate__month"           
## [20] "voyage_slaves_numbers__percentage_men"                          
## [21] "voyage_dates__voyage_completed_sparsedate__month"               
## [22] "voyage_itinerary__region_of_return__name"                       
## [23] "voyage_slaves_numbers__percentage_boy"                          
## [24] "voyage_itinerary__imp_principal_region_slave_dis__name"         
## [25] "voyage_itinerary__imp_principal_region_of_slave_purchase__name" 
## [26] "voyage_dates__date_departed_africa_sparsedate__month"           
## [27] "voyage_dates__voyage_began_sparsedate__month"                   
## [28] "voyage_itinerary__imp_port_voyage_begin__name"                  
## [29] "voyage_dates__first_dis_of_slaves_sparsedate__month"            
## [30] "voyage_itinerary__imp_broad_region_slave_dis__name"             
## [31] "voyage_slaves_numbers__percentage_girl"                         
## [32] "voyage_outcome__particular_outcome__name"                       
## [33] "voyage_itinerary__imp_principal_port_slave_dis__name"           
## [34] "voyage_slaves_numbers__percentage_child"                        
## [35] "voyage_slaves_numbers__percentage_women"                        
## [36] "voyage_dates__departure_last_place_of_landing_sparsedate__month"
## [37] "voyage_outcome__outcome_owner__name"                            
## [38] "voyage_outcome__outcome_slaves__name"                           
## [39] "voyage_itinerary__imp_principal_place_of_slave_purchase__name"  
## [40] "voyage_outcome__resistance__name"                               
## [41] "voyage_slaves_numbers__percentage_male"                         
## [42] "voyage_slaves_numbers__percentage_female"                       
## [43] "voyage_itinerary__imp_broad_region_voyage_begin__name"          
## [44] "voyage_itinerary__imp_broad_region_of_slave_purchase__name"     
## [45] "voyage_sources"                                                 
## [46] "enslavers"
cat("\nColumns in intra dataset:\n")
## 
## Columns in intra dataset:
print(colnames(intra))
##  [1] "id"                                                             
##  [2] "voyage_id"                                                      
##  [3] "voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year"      
##  [4] "voyage_slaves_numbers__imp_total_num_slaves_disembarked"        
##  [5] "voyage_slaves_numbers__imp_total_num_slaves_embarked"           
##  [6] "voyage_dates__length_middle_passage_days"                       
##  [7] "voyage_dates__imp_length_home_to_disembark"                     
##  [8] "voyage_crew__crew_first_landing"                                
##  [9] "voyage_crew__crew_voyage_outset"                                
## [10] "voyage_ship__tonnage_mod"                                       
## [11] "voyage_slaves_numbers__imp_jamaican_cash_price"                 
## [12] "voyage_slaves_numbers__imp_mortality_ratio"                     
## [13] "voyage_slaves_numbers__percentage_women_among_embarked_slaves"  
## [14] "voyage_outcome__vessel_captured_outcome__name"                  
## [15] "voyage_ship__imputed_nationality__name"                         
## [16] "voyage_itinerary__imp_region_voyage_begin__name"                
## [17] "voyage_ship__rig_of_vessel__name"                               
## [18] "voyage_itinerary__place_voyage_ended__name"                     
## [19] "voyage_dates__slave_purchase_began_sparsedate__month"           
## [20] "voyage_slaves_numbers__percentage_men"                          
## [21] "voyage_dates__voyage_completed_sparsedate__month"               
## [22] "voyage_itinerary__region_of_return__name"                       
## [23] "voyage_slaves_numbers__percentage_boy"                          
## [24] "voyage_itinerary__imp_principal_region_slave_dis__name"         
## [25] "voyage_itinerary__imp_principal_region_of_slave_purchase__name" 
## [26] "voyage_dates__date_departed_africa_sparsedate__month"           
## [27] "voyage_dates__voyage_began_sparsedate__month"                   
## [28] "voyage_itinerary__imp_port_voyage_begin__name"                  
## [29] "voyage_dates__first_dis_of_slaves_sparsedate__month"            
## [30] "voyage_itinerary__imp_broad_region_slave_dis__name"             
## [31] "voyage_slaves_numbers__percentage_girl"                         
## [32] "voyage_outcome__particular_outcome__name"                       
## [33] "voyage_itinerary__imp_principal_port_slave_dis__name"           
## [34] "voyage_slaves_numbers__percentage_child"                        
## [35] "voyage_slaves_numbers__percentage_women"                        
## [36] "voyage_dates__departure_last_place_of_landing_sparsedate__month"
## [37] "voyage_outcome__outcome_owner__name"                            
## [38] "voyage_outcome__outcome_slaves__name"                           
## [39] "voyage_itinerary__imp_principal_place_of_slave_purchase__name"  
## [40] "voyage_outcome__resistance__name"                               
## [41] "voyage_slaves_numbers__percentage_male"                         
## [42] "voyage_slaves_numbers__percentage_female"                       
## [43] "voyage_itinerary__imp_broad_region_voyage_begin__name"          
## [44] "voyage_itinerary__imp_broad_region_of_slave_purchase__name"     
## [45] "voyage_sources"                                                 
## [46] "enslavers"
# Define successful outcomes
successful_outcomes <- c(
  "Sold slaves in Americas - subsequent fate unknown",
  "Voyage completed as intended",
  "Captured by pirates or privateers - after disembarkation",
  "Condemned - Americas after disembarkation",
  "Detained and condemned in the United States after slaves disembarked",
  "Condemned in the Americas by British after slaves disembarked",
  "Captured by pirates - slaves sold in Americas from another ship",
  "Shipwrecked or destroyed, after disembarkation",
  "Privateer captured slaves at sea and delivered for sale in America",
  "Prisoners of war stole slaves during escape and carried to port of sale",
  "Captives seized from vessel by Spanish officials and sold",
  "Captured by Dutch - after disembarkation",
  "Shipwrecked, slaves salvaged",
  "Captured by slaves, recaptured and landed slaves in the Americas"
)

# Process trans-atlantic data
trans <- trans %>%
  rename(
    year = voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year,
    slaves_embarked = voyage_slaves_numbers__imp_total_num_slaves_embarked,
    slaves_disembarked = voyage_slaves_numbers__imp_total_num_slaves_disembarked,
    particular_outcome = voyage_outcome__particular_outcome__name,
    dis_broad = voyage_itinerary__imp_broad_region_slave_dis__name,
    dis_port = voyage_itinerary__imp_principal_port_slave_dis__name,
    dis_region = voyage_itinerary__imp_principal_region_slave_dis__name,
    embark_broad = voyage_itinerary__imp_broad_region_of_slave_purchase__name,
    embark_port = voyage_itinerary__imp_principal_place_of_slave_purchase__name,
    country = voyage_ship__imputed_nationality__name
  ) %>%
  mutate(
    year = as.integer(year),
    slaves_embarked = as.numeric(slaves_embarked),
    slaves_disembarked = as.numeric(slaves_disembarked),
    decade = floor(year / 10) * 10,
    estimated_deaths = pmax(0, slaves_embarked - slaves_disembarked),
    is_us = dis_broad == "Mainland North America" | grepl("New Orleans", dis_port, ignore.case = TRUE),
    embark_is_us = embark_broad == "Mainland North America" | grepl("New Orleans", embark_port, ignore.case = TRUE),
    source_type = "Trans-Atlantic"
  ) %>%
  filter(
    !is.na(slaves_disembarked),
    slaves_disembarked > 0,
    particular_outcome %in% successful_outcomes
  )

# Process intra-american data
intra <- intra %>%
  rename(
    year = voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year,
    slaves_embarked = voyage_slaves_numbers__imp_total_num_slaves_embarked,
    slaves_disembarked = voyage_slaves_numbers__imp_total_num_slaves_disembarked,
    particular_outcome = voyage_outcome__particular_outcome__name,
    dis_broad = voyage_itinerary__imp_broad_region_slave_dis__name,
    dis_port = voyage_itinerary__imp_principal_port_slave_dis__name,
    dis_region = voyage_itinerary__imp_principal_region_slave_dis__name,
    embark_broad = voyage_itinerary__imp_broad_region_of_slave_purchase__name,
    embark_port = voyage_itinerary__imp_principal_place_of_slave_purchase__name,
    country = voyage_ship__imputed_nationality__name
  ) %>%
  mutate(
    year = as.integer(year),
    slaves_embarked = as.numeric(slaves_embarked),
    slaves_disembarked = as.numeric(slaves_disembarked),
    decade = floor(year / 10) * 10,
    estimated_deaths = pmax(0, slaves_embarked - slaves_disembarked),
    is_us = dis_broad == "Mainland North America" | grepl("New Orleans", dis_port, ignore.case = TRUE),
    embark_is_us = embark_broad == "Mainland North America" | grepl("New Orleans", embark_port, ignore.case = TRUE),
    source_type = "Intra-American"
  ) %>%
  filter(
    !is.na(slaves_disembarked),
    slaves_disembarked > 0,
    particular_outcome %in% successful_outcomes
  )

# Combine datasets
combined <- bind_rows(trans, intra)

# Diagnostic: Check column names in combined
cat("Columns in combined dataset:\n")
## Columns in combined dataset:
print(colnames(combined))
##  [1] "id"                                                             
##  [2] "voyage_id"                                                      
##  [3] "year"                                                           
##  [4] "slaves_disembarked"                                             
##  [5] "slaves_embarked"                                                
##  [6] "voyage_dates__length_middle_passage_days"                       
##  [7] "voyage_dates__imp_length_home_to_disembark"                     
##  [8] "voyage_crew__crew_first_landing"                                
##  [9] "voyage_crew__crew_voyage_outset"                                
## [10] "voyage_ship__tonnage_mod"                                       
## [11] "voyage_slaves_numbers__imp_jamaican_cash_price"                 
## [12] "voyage_slaves_numbers__imp_mortality_ratio"                     
## [13] "voyage_slaves_numbers__percentage_women_among_embarked_slaves"  
## [14] "voyage_outcome__vessel_captured_outcome__name"                  
## [15] "country"                                                        
## [16] "voyage_itinerary__imp_region_voyage_begin__name"                
## [17] "voyage_ship__rig_of_vessel__name"                               
## [18] "voyage_itinerary__place_voyage_ended__name"                     
## [19] "voyage_dates__slave_purchase_began_sparsedate__month"           
## [20] "voyage_slaves_numbers__percentage_men"                          
## [21] "voyage_dates__voyage_completed_sparsedate__month"               
## [22] "voyage_itinerary__region_of_return__name"                       
## [23] "voyage_slaves_numbers__percentage_boy"                          
## [24] "dis_region"                                                     
## [25] "voyage_itinerary__imp_principal_region_of_slave_purchase__name" 
## [26] "voyage_dates__date_departed_africa_sparsedate__month"           
## [27] "voyage_dates__voyage_began_sparsedate__month"                   
## [28] "voyage_itinerary__imp_port_voyage_begin__name"                  
## [29] "voyage_dates__first_dis_of_slaves_sparsedate__month"            
## [30] "dis_broad"                                                      
## [31] "voyage_slaves_numbers__percentage_girl"                         
## [32] "particular_outcome"                                             
## [33] "dis_port"                                                       
## [34] "voyage_slaves_numbers__percentage_child"                        
## [35] "voyage_slaves_numbers__percentage_women"                        
## [36] "voyage_dates__departure_last_place_of_landing_sparsedate__month"
## [37] "voyage_outcome__outcome_owner__name"                            
## [38] "voyage_outcome__outcome_slaves__name"                           
## [39] "embark_port"                                                    
## [40] "voyage_outcome__resistance__name"                               
## [41] "voyage_slaves_numbers__percentage_male"                         
## [42] "voyage_slaves_numbers__percentage_female"                       
## [43] "voyage_itinerary__imp_broad_region_voyage_begin__name"          
## [44] "embark_broad"                                                   
## [45] "voyage_sources"                                                 
## [46] "enslavers"                                                      
## [47] "decade"                                                         
## [48] "estimated_deaths"                                               
## [49] "is_us"                                                          
## [50] "embark_is_us"                                                   
## [51] "source_type"

Analysis and Questions

1. Net Slaves Imported to US

gross_us <- combined %>%
  filter(is_us == TRUE) %>%
  summarise(gross_us = sum(slaves_disembarked, na.rm = TRUE)) %>%
  pull(gross_us)

re_exports <- combined %>%
  filter(source_type == "Intra-American", embark_is_us == TRUE, is_us == FALSE) %>%
  summarise(re_exports = sum(slaves_embarked, na.rm = TRUE)) %>%
  pull(re_exports)

net_us <- gross_us - re_exports

cat("Gross slaves imported to the US:", gross_us, "\n")
## Gross slaves imported to the US: 395919
cat("Slaves re-exported from the US:", re_exports, "\n")
## Slaves re-exported from the US: 7652
cat("Net slaves retained in the US:", net_us, "\n")
## Net slaves retained in the US: 388267

2. Proportion of Slaves Taken from Africa

trans_total_embarked <- trans %>%
  summarise(total_embarked = sum(slaves_embarked, na.rm = TRUE)) %>%
  pull(total_embarked)

proportion <- net_us / trans_total_embarked

cat("Proportion of all slaves taken from Africa (net US):", proportion, "\n")
## Proportion of all slaves taken from Africa (net US): 0.05107614

3. U.S. Slave Imports by Decade

us_by_decade <- combined %>%
  filter(is_us == TRUE) %>%
  group_by(decade) %>%
  summarise(total_disembarked = sum(slaves_disembarked, na.rm = TRUE)) %>%
  ungroup() %>%
  filter(!is.na(decade))
us_by_decade
## # A tibble: 26 × 2
##    decade total_disembarked
##     <dbl>             <dbl>
##  1   1610                29
##  2   1620                 3
##  3   1630                73
##  4   1640                81
##  5   1650               607
##  6   1660               459
##  7   1670              1330
##  8   1680              2317
##  9   1690              3936
## 10   1700             11284
## # ℹ 16 more rows
ggplot(us_by_decade, aes(x = decade, y = total_disembarked)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  theme_minimal() +
  labs(
    title = "US Slave Imports by Decade",
    x = "Decade",
    y = "Total Slaves Disembarked"
  ) +
  scale_x_continuous(breaks = seq(min(us_by_decade$decade, na.rm = TRUE), 
                                 max(us_by_decade$decade, na.rm = TRUE), 
                                 by = 10)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

4. U.S. Slave Imports by Decade, Region, and State

combined <- combined %>%
  mutate(
    state = case_when(
      grepl("New Orleans", dis_port, ignore.case = TRUE) ~ "Louisiana",
      grepl("Charleston", dis_port, ignore.case = TRUE) ~ "South Carolina",
      grepl("Savannah", dis_port, ignore.case = TRUE) ~ "Georgia",
      grepl("Norfolk|Richmond", dis_port, ignore.case = TRUE) ~ "Virginia",
      grepl("Baltimore", dis_port, ignore.case = TRUE) ~ "Maryland",
      grepl("New York", dis_port, ignore.case = TRUE) ~ "New York",
      grepl("Philadelphia", dis_port, ignore.case = TRUE) ~ "Pennsylvania",
      grepl("Mobile", dis_port, ignore.case = TRUE) ~ "Alabama",
      grepl("Boston", dis_port, ignore.case = TRUE) ~ "Massachusetts",
      TRUE ~ "Other"
    )
  )

# Check if dis_region exists
if ("dis_region" %in% colnames(combined)) {
  us_by_region_state_decade <- combined %>%
    filter(is_us == TRUE) %>%
    group_by(decade, dis_region, state) %>%
    summarise(total_disembarked = sum(slaves_disembarked, na.rm = TRUE)) %>%
    ungroup() %>%
    filter(!is.na(decade), !is.na(dis_region), !is.na(state))
} else {
  cat("Warning: dis_region not found, grouping by decade and state only\n")
  us_by_region_state_decade <- combined %>%
    filter(is_us == TRUE) %>%
    group_by(decade, state) %>%
    summarise(total_disembarked = sum(slaves_disembarked, na.rm = TRUE)) %>%
    ungroup() %>%
    filter(!is.na(decade), !is.na(state))
}
us_by_region_state_decade
## # A tibble: 151 × 4
##    decade dis_region    state         total_disembarked
##     <dbl> <chr>         <chr>                     <dbl>
##  1   1610 Virginia      Other                        29
##  2   1620 Virginia      Other                         3
##  3   1630 Massachusetts Massachusetts                 7
##  4   1630 New York      New York                     53
##  5   1630 Virginia      Other                        13
##  6   1640 New York      New York                     69
##  7   1640 Virginia      Other                        12
##  8   1650 Maryland      Other                         5
##  9   1650 New York      New York                    477
## 10   1650 Virginia      Other                       125
## # ℹ 141 more rows
ggplot(us_by_region_state_decade, aes(x = decade, y = total_disembarked)) +
  geom_bar(stat = "identity", fill = "darkgreen") +
  facet_wrap(~ state, scales = "free_y") +
  theme_minimal() +
  labs(
    title = "US Slave Imports by Decade and State",
    x = "Decade",
    y = "Total Slaves Disembarked"
  ) +
  scale_x_continuous(breaks = seq(min(us_by_region_state_decade$decade, na.rm = TRUE), 
                                 max(us_by_region_state_decade$decade, na.rm = TRUE), 
                                 by = 20)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

5. Countries Exporting from Africa by Decade

exporting_summary <- trans %>%
  filter(!is.na(country)) %>%
  group_by(decade, country) %>%
  summarise(
    total_voyages = n_distinct(voyage_id),
    total_embarked = sum(slaves_embarked, na.rm = TRUE)
  ) %>%
  arrange(decade, desc(total_embarked)) %>%
  ungroup()
exporting_summary
## # A tibble: 203 × 4
##    decade country           total_voyages total_embarked
##     <dbl> <chr>                     <int>          <dbl>
##  1   1510 0                             9            223
##  2   1510 Spain / Uruguay               8            144
##  3   1520 Spain / Uruguay               3           1043
##  4   1520 0                             2            373
##  5   1530 0                             8           1418
##  6   1530 Portugal / Brazil             2            560
##  7   1530 Spain / Uruguay               1            224
##  8   1540 0                            23           7750
##  9   1540 Portugal / Brazil             1            160
## 10   1550 0                            27           9390
## # ℹ 193 more rows

Summary

This analysis of the trans-Atlantic and intra-American slave trade datasets reveals the scope of the U.S.’s role in this dark chapter of history. The U.S. retained a net of 388,267 slaves, with 395,919 disembarked in regions like “Mainland North America” or New Orleans and 7,652 re-exported to other destinations. This net figure accounts for 5.11% of the roughly 7.6 million slaves embarked from Africa, showing the U.S. was a significant but smaller player compared to places like the Caribbean. U.S. imports grew from just 3 slaves in 1620 to 11,284 by 1700, with data indicating higher amounts in later decades. Early imports were concentrated in states like Virginia (29 slaves in 1610) and New York (477 in 1650), based on 151 state-region combinations. African exports in the early 1500s were led by Spain/Uruguay (1,043 slaves in 1520) and ships (23 slaves in 1510), based on 203 country-decade records. These findings highlight the growth of U.S. imports over time and the diverse origins of the trans-Atlantic trade.