Data Loading and Cleaning
# Column types
col_types_spec <- cols_only(
id = col_integer(),
voyage_id = col_integer(),
voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year = col_double(),
voyage_slaves_numbers__imp_total_num_slaves_disembarked = col_double(),
voyage_slaves_numbers__imp_total_num_slaves_embarked = col_double(),
voyage_dates__length_middle_passage_days = col_double(),
voyage_dates__imp_length_home_to_disembark = col_double(),
voyage_crew__crew_first_landing = col_double(),
voyage_crew__crew_voyage_outset = col_double(),
voyage_ship__tonnage_mod = col_double(),
voyage_slaves_numbers__imp_jamaican_cash_price = col_double(),
voyage_slaves_numbers__imp_mortality_ratio = col_double(),
voyage_slaves_numbers__percentage_women_among_embarked_slaves = col_double(),
voyage_outcome__vessel_captured_outcome__name = col_character(),
voyage_ship__imputed_nationality__name = col_character(),
voyage_itinerary__imp_region_voyage_begin__name = col_character(),
voyage_ship__rig_of_vessel__name = col_character(),
voyage_itinerary__place_voyage_ended__name = col_character(),
voyage_dates__slave_purchase_began_sparsedate__month = col_double(),
voyage_slaves_numbers__percentage_men = col_double(),
voyage_dates__voyage_completed_sparsedate__month = col_double(),
voyage_itinerary__region_of_return__name = col_character(),
voyage_slaves_numbers__percentage_boy = col_double(),
voyage_itinerary__imp_principal_region_slave_dis__name = col_character(),
voyage_itinerary__imp_principal_region_of_slave_purchase__name = col_character(),
voyage_dates__date_departed_africa_sparsedate__month = col_double(),
voyage_dates__voyage_began_sparsedate__month = col_double(),
voyage_itinerary__imp_port_voyage_begin__name = col_character(),
voyage_dates__first_dis_of_slaves_sparsedate__month = col_double(),
voyage_itinerary__imp_broad_region_slave_dis__name = col_character(),
voyage_slaves_numbers__percentage_girl = col_double(),
voyage_outcome__particular_outcome__name = col_character(),
voyage_itinerary__imp_principal_port_slave_dis__name = col_character(),
voyage_slaves_numbers__percentage_child = col_double(),
voyage_slaves_numbers__percentage_women = col_double(),
voyage_dates__departure_last_place_of_landing_sparsedate__month = col_double(),
voyage_outcome__outcome_owner__name = col_character(),
voyage_outcome__outcome_slaves__name = col_character(),
voyage_itinerary__imp_principal_place_of_slave_purchase__name = col_character(),
voyage_outcome__resistance__name = col_character(),
voyage_slaves_numbers__percentage_male = col_double(),
voyage_slaves_numbers__percentage_female = col_double(),
voyage_itinerary__imp_broad_region_voyage_begin__name = col_character(),
voyage_itinerary__imp_broad_region_of_slave_purchase__name = col_character(),
voyage_sources = col_character(),
enslavers = col_character()
)
# Load datasets
trans <- read_csv("https://raw.githubusercontent.com/imowerman-prog/data-3210/refs/heads/main/Data/trans-atlantic.csv",
col_types = col_types_spec)
intra <- read_csv("https://raw.githubusercontent.com/imowerman-prog/data-3210/refs/heads/main/Data/intra-american.csv",
col_types = col_types_spec)
# Diagnostic: Check column names
cat("Columns in trans dataset:\n")
## Columns in trans dataset:
print(colnames(trans))
## [1] "id"
## [2] "voyage_id"
## [3] "voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year"
## [4] "voyage_slaves_numbers__imp_total_num_slaves_disembarked"
## [5] "voyage_slaves_numbers__imp_total_num_slaves_embarked"
## [6] "voyage_dates__length_middle_passage_days"
## [7] "voyage_dates__imp_length_home_to_disembark"
## [8] "voyage_crew__crew_first_landing"
## [9] "voyage_crew__crew_voyage_outset"
## [10] "voyage_ship__tonnage_mod"
## [11] "voyage_slaves_numbers__imp_jamaican_cash_price"
## [12] "voyage_slaves_numbers__imp_mortality_ratio"
## [13] "voyage_slaves_numbers__percentage_women_among_embarked_slaves"
## [14] "voyage_outcome__vessel_captured_outcome__name"
## [15] "voyage_ship__imputed_nationality__name"
## [16] "voyage_itinerary__imp_region_voyage_begin__name"
## [17] "voyage_ship__rig_of_vessel__name"
## [18] "voyage_itinerary__place_voyage_ended__name"
## [19] "voyage_dates__slave_purchase_began_sparsedate__month"
## [20] "voyage_slaves_numbers__percentage_men"
## [21] "voyage_dates__voyage_completed_sparsedate__month"
## [22] "voyage_itinerary__region_of_return__name"
## [23] "voyage_slaves_numbers__percentage_boy"
## [24] "voyage_itinerary__imp_principal_region_slave_dis__name"
## [25] "voyage_itinerary__imp_principal_region_of_slave_purchase__name"
## [26] "voyage_dates__date_departed_africa_sparsedate__month"
## [27] "voyage_dates__voyage_began_sparsedate__month"
## [28] "voyage_itinerary__imp_port_voyage_begin__name"
## [29] "voyage_dates__first_dis_of_slaves_sparsedate__month"
## [30] "voyage_itinerary__imp_broad_region_slave_dis__name"
## [31] "voyage_slaves_numbers__percentage_girl"
## [32] "voyage_outcome__particular_outcome__name"
## [33] "voyage_itinerary__imp_principal_port_slave_dis__name"
## [34] "voyage_slaves_numbers__percentage_child"
## [35] "voyage_slaves_numbers__percentage_women"
## [36] "voyage_dates__departure_last_place_of_landing_sparsedate__month"
## [37] "voyage_outcome__outcome_owner__name"
## [38] "voyage_outcome__outcome_slaves__name"
## [39] "voyage_itinerary__imp_principal_place_of_slave_purchase__name"
## [40] "voyage_outcome__resistance__name"
## [41] "voyage_slaves_numbers__percentage_male"
## [42] "voyage_slaves_numbers__percentage_female"
## [43] "voyage_itinerary__imp_broad_region_voyage_begin__name"
## [44] "voyage_itinerary__imp_broad_region_of_slave_purchase__name"
## [45] "voyage_sources"
## [46] "enslavers"
cat("\nColumns in intra dataset:\n")
##
## Columns in intra dataset:
print(colnames(intra))
## [1] "id"
## [2] "voyage_id"
## [3] "voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year"
## [4] "voyage_slaves_numbers__imp_total_num_slaves_disembarked"
## [5] "voyage_slaves_numbers__imp_total_num_slaves_embarked"
## [6] "voyage_dates__length_middle_passage_days"
## [7] "voyage_dates__imp_length_home_to_disembark"
## [8] "voyage_crew__crew_first_landing"
## [9] "voyage_crew__crew_voyage_outset"
## [10] "voyage_ship__tonnage_mod"
## [11] "voyage_slaves_numbers__imp_jamaican_cash_price"
## [12] "voyage_slaves_numbers__imp_mortality_ratio"
## [13] "voyage_slaves_numbers__percentage_women_among_embarked_slaves"
## [14] "voyage_outcome__vessel_captured_outcome__name"
## [15] "voyage_ship__imputed_nationality__name"
## [16] "voyage_itinerary__imp_region_voyage_begin__name"
## [17] "voyage_ship__rig_of_vessel__name"
## [18] "voyage_itinerary__place_voyage_ended__name"
## [19] "voyage_dates__slave_purchase_began_sparsedate__month"
## [20] "voyage_slaves_numbers__percentage_men"
## [21] "voyage_dates__voyage_completed_sparsedate__month"
## [22] "voyage_itinerary__region_of_return__name"
## [23] "voyage_slaves_numbers__percentage_boy"
## [24] "voyage_itinerary__imp_principal_region_slave_dis__name"
## [25] "voyage_itinerary__imp_principal_region_of_slave_purchase__name"
## [26] "voyage_dates__date_departed_africa_sparsedate__month"
## [27] "voyage_dates__voyage_began_sparsedate__month"
## [28] "voyage_itinerary__imp_port_voyage_begin__name"
## [29] "voyage_dates__first_dis_of_slaves_sparsedate__month"
## [30] "voyage_itinerary__imp_broad_region_slave_dis__name"
## [31] "voyage_slaves_numbers__percentage_girl"
## [32] "voyage_outcome__particular_outcome__name"
## [33] "voyage_itinerary__imp_principal_port_slave_dis__name"
## [34] "voyage_slaves_numbers__percentage_child"
## [35] "voyage_slaves_numbers__percentage_women"
## [36] "voyage_dates__departure_last_place_of_landing_sparsedate__month"
## [37] "voyage_outcome__outcome_owner__name"
## [38] "voyage_outcome__outcome_slaves__name"
## [39] "voyage_itinerary__imp_principal_place_of_slave_purchase__name"
## [40] "voyage_outcome__resistance__name"
## [41] "voyage_slaves_numbers__percentage_male"
## [42] "voyage_slaves_numbers__percentage_female"
## [43] "voyage_itinerary__imp_broad_region_voyage_begin__name"
## [44] "voyage_itinerary__imp_broad_region_of_slave_purchase__name"
## [45] "voyage_sources"
## [46] "enslavers"
# Define successful outcomes
successful_outcomes <- c(
"Sold slaves in Americas - subsequent fate unknown",
"Voyage completed as intended",
"Captured by pirates or privateers - after disembarkation",
"Condemned - Americas after disembarkation",
"Detained and condemned in the United States after slaves disembarked",
"Condemned in the Americas by British after slaves disembarked",
"Captured by pirates - slaves sold in Americas from another ship",
"Shipwrecked or destroyed, after disembarkation",
"Privateer captured slaves at sea and delivered for sale in America",
"Prisoners of war stole slaves during escape and carried to port of sale",
"Captives seized from vessel by Spanish officials and sold",
"Captured by Dutch - after disembarkation",
"Shipwrecked, slaves salvaged",
"Captured by slaves, recaptured and landed slaves in the Americas"
)
# Process trans-atlantic data
trans <- trans %>%
rename(
year = voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year,
slaves_embarked = voyage_slaves_numbers__imp_total_num_slaves_embarked,
slaves_disembarked = voyage_slaves_numbers__imp_total_num_slaves_disembarked,
particular_outcome = voyage_outcome__particular_outcome__name,
dis_broad = voyage_itinerary__imp_broad_region_slave_dis__name,
dis_port = voyage_itinerary__imp_principal_port_slave_dis__name,
dis_region = voyage_itinerary__imp_principal_region_slave_dis__name,
embark_broad = voyage_itinerary__imp_broad_region_of_slave_purchase__name,
embark_port = voyage_itinerary__imp_principal_place_of_slave_purchase__name,
country = voyage_ship__imputed_nationality__name
) %>%
mutate(
year = as.integer(year),
slaves_embarked = as.numeric(slaves_embarked),
slaves_disembarked = as.numeric(slaves_disembarked),
decade = floor(year / 10) * 10,
estimated_deaths = pmax(0, slaves_embarked - slaves_disembarked),
is_us = dis_broad == "Mainland North America" | grepl("New Orleans", dis_port, ignore.case = TRUE),
embark_is_us = embark_broad == "Mainland North America" | grepl("New Orleans", embark_port, ignore.case = TRUE),
source_type = "Trans-Atlantic"
) %>%
filter(
!is.na(slaves_disembarked),
slaves_disembarked > 0,
particular_outcome %in% successful_outcomes
)
# Process intra-american data
intra <- intra %>%
rename(
year = voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year,
slaves_embarked = voyage_slaves_numbers__imp_total_num_slaves_embarked,
slaves_disembarked = voyage_slaves_numbers__imp_total_num_slaves_disembarked,
particular_outcome = voyage_outcome__particular_outcome__name,
dis_broad = voyage_itinerary__imp_broad_region_slave_dis__name,
dis_port = voyage_itinerary__imp_principal_port_slave_dis__name,
dis_region = voyage_itinerary__imp_principal_region_slave_dis__name,
embark_broad = voyage_itinerary__imp_broad_region_of_slave_purchase__name,
embark_port = voyage_itinerary__imp_principal_place_of_slave_purchase__name,
country = voyage_ship__imputed_nationality__name
) %>%
mutate(
year = as.integer(year),
slaves_embarked = as.numeric(slaves_embarked),
slaves_disembarked = as.numeric(slaves_disembarked),
decade = floor(year / 10) * 10,
estimated_deaths = pmax(0, slaves_embarked - slaves_disembarked),
is_us = dis_broad == "Mainland North America" | grepl("New Orleans", dis_port, ignore.case = TRUE),
embark_is_us = embark_broad == "Mainland North America" | grepl("New Orleans", embark_port, ignore.case = TRUE),
source_type = "Intra-American"
) %>%
filter(
!is.na(slaves_disembarked),
slaves_disembarked > 0,
particular_outcome %in% successful_outcomes
)
# Combine datasets
combined <- bind_rows(trans, intra)
# Diagnostic: Check column names in combined
cat("Columns in combined dataset:\n")
## Columns in combined dataset:
print(colnames(combined))
## [1] "id"
## [2] "voyage_id"
## [3] "year"
## [4] "slaves_disembarked"
## [5] "slaves_embarked"
## [6] "voyage_dates__length_middle_passage_days"
## [7] "voyage_dates__imp_length_home_to_disembark"
## [8] "voyage_crew__crew_first_landing"
## [9] "voyage_crew__crew_voyage_outset"
## [10] "voyage_ship__tonnage_mod"
## [11] "voyage_slaves_numbers__imp_jamaican_cash_price"
## [12] "voyage_slaves_numbers__imp_mortality_ratio"
## [13] "voyage_slaves_numbers__percentage_women_among_embarked_slaves"
## [14] "voyage_outcome__vessel_captured_outcome__name"
## [15] "country"
## [16] "voyage_itinerary__imp_region_voyage_begin__name"
## [17] "voyage_ship__rig_of_vessel__name"
## [18] "voyage_itinerary__place_voyage_ended__name"
## [19] "voyage_dates__slave_purchase_began_sparsedate__month"
## [20] "voyage_slaves_numbers__percentage_men"
## [21] "voyage_dates__voyage_completed_sparsedate__month"
## [22] "voyage_itinerary__region_of_return__name"
## [23] "voyage_slaves_numbers__percentage_boy"
## [24] "dis_region"
## [25] "voyage_itinerary__imp_principal_region_of_slave_purchase__name"
## [26] "voyage_dates__date_departed_africa_sparsedate__month"
## [27] "voyage_dates__voyage_began_sparsedate__month"
## [28] "voyage_itinerary__imp_port_voyage_begin__name"
## [29] "voyage_dates__first_dis_of_slaves_sparsedate__month"
## [30] "dis_broad"
## [31] "voyage_slaves_numbers__percentage_girl"
## [32] "particular_outcome"
## [33] "dis_port"
## [34] "voyage_slaves_numbers__percentage_child"
## [35] "voyage_slaves_numbers__percentage_women"
## [36] "voyage_dates__departure_last_place_of_landing_sparsedate__month"
## [37] "voyage_outcome__outcome_owner__name"
## [38] "voyage_outcome__outcome_slaves__name"
## [39] "embark_port"
## [40] "voyage_outcome__resistance__name"
## [41] "voyage_slaves_numbers__percentage_male"
## [42] "voyage_slaves_numbers__percentage_female"
## [43] "voyage_itinerary__imp_broad_region_voyage_begin__name"
## [44] "embark_broad"
## [45] "voyage_sources"
## [46] "enslavers"
## [47] "decade"
## [48] "estimated_deaths"
## [49] "is_us"
## [50] "embark_is_us"
## [51] "source_type"
Analysis and Questions
1. Net Slaves Imported to US
gross_us <- combined %>%
filter(is_us == TRUE) %>%
summarise(gross_us = sum(slaves_disembarked, na.rm = TRUE)) %>%
pull(gross_us)
re_exports <- combined %>%
filter(source_type == "Intra-American", embark_is_us == TRUE, is_us == FALSE) %>%
summarise(re_exports = sum(slaves_embarked, na.rm = TRUE)) %>%
pull(re_exports)
net_us <- gross_us - re_exports
cat("Gross slaves imported to the US:", gross_us, "\n")
## Gross slaves imported to the US: 395919
cat("Slaves re-exported from the US:", re_exports, "\n")
## Slaves re-exported from the US: 7652
cat("Net slaves retained in the US:", net_us, "\n")
## Net slaves retained in the US: 388267
2. Proportion of Slaves Taken from Africa
trans_total_embarked <- trans %>%
summarise(total_embarked = sum(slaves_embarked, na.rm = TRUE)) %>%
pull(total_embarked)
proportion <- net_us / trans_total_embarked
cat("Proportion of all slaves taken from Africa (net US):", proportion, "\n")
## Proportion of all slaves taken from Africa (net US): 0.05107614
3. U.S. Slave Imports by Decade
us_by_decade <- combined %>%
filter(is_us == TRUE) %>%
group_by(decade) %>%
summarise(total_disembarked = sum(slaves_disembarked, na.rm = TRUE)) %>%
ungroup() %>%
filter(!is.na(decade))
us_by_decade
## # A tibble: 26 × 2
## decade total_disembarked
## <dbl> <dbl>
## 1 1610 29
## 2 1620 3
## 3 1630 73
## 4 1640 81
## 5 1650 607
## 6 1660 459
## 7 1670 1330
## 8 1680 2317
## 9 1690 3936
## 10 1700 11284
## # ℹ 16 more rows
ggplot(us_by_decade, aes(x = decade, y = total_disembarked)) +
geom_bar(stat = "identity", fill = "steelblue") +
theme_minimal() +
labs(
title = "US Slave Imports by Decade",
x = "Decade",
y = "Total Slaves Disembarked"
) +
scale_x_continuous(breaks = seq(min(us_by_decade$decade, na.rm = TRUE),
max(us_by_decade$decade, na.rm = TRUE),
by = 10)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

4. U.S. Slave Imports by Decade, Region, and State
combined <- combined %>%
mutate(
state = case_when(
grepl("New Orleans", dis_port, ignore.case = TRUE) ~ "Louisiana",
grepl("Charleston", dis_port, ignore.case = TRUE) ~ "South Carolina",
grepl("Savannah", dis_port, ignore.case = TRUE) ~ "Georgia",
grepl("Norfolk|Richmond", dis_port, ignore.case = TRUE) ~ "Virginia",
grepl("Baltimore", dis_port, ignore.case = TRUE) ~ "Maryland",
grepl("New York", dis_port, ignore.case = TRUE) ~ "New York",
grepl("Philadelphia", dis_port, ignore.case = TRUE) ~ "Pennsylvania",
grepl("Mobile", dis_port, ignore.case = TRUE) ~ "Alabama",
grepl("Boston", dis_port, ignore.case = TRUE) ~ "Massachusetts",
TRUE ~ "Other"
)
)
# Check if dis_region exists
if ("dis_region" %in% colnames(combined)) {
us_by_region_state_decade <- combined %>%
filter(is_us == TRUE) %>%
group_by(decade, dis_region, state) %>%
summarise(total_disembarked = sum(slaves_disembarked, na.rm = TRUE)) %>%
ungroup() %>%
filter(!is.na(decade), !is.na(dis_region), !is.na(state))
} else {
cat("Warning: dis_region not found, grouping by decade and state only\n")
us_by_region_state_decade <- combined %>%
filter(is_us == TRUE) %>%
group_by(decade, state) %>%
summarise(total_disembarked = sum(slaves_disembarked, na.rm = TRUE)) %>%
ungroup() %>%
filter(!is.na(decade), !is.na(state))
}
us_by_region_state_decade
## # A tibble: 151 × 4
## decade dis_region state total_disembarked
## <dbl> <chr> <chr> <dbl>
## 1 1610 Virginia Other 29
## 2 1620 Virginia Other 3
## 3 1630 Massachusetts Massachusetts 7
## 4 1630 New York New York 53
## 5 1630 Virginia Other 13
## 6 1640 New York New York 69
## 7 1640 Virginia Other 12
## 8 1650 Maryland Other 5
## 9 1650 New York New York 477
## 10 1650 Virginia Other 125
## # ℹ 141 more rows
ggplot(us_by_region_state_decade, aes(x = decade, y = total_disembarked)) +
geom_bar(stat = "identity", fill = "darkgreen") +
facet_wrap(~ state, scales = "free_y") +
theme_minimal() +
labs(
title = "US Slave Imports by Decade and State",
x = "Decade",
y = "Total Slaves Disembarked"
) +
scale_x_continuous(breaks = seq(min(us_by_region_state_decade$decade, na.rm = TRUE),
max(us_by_region_state_decade$decade, na.rm = TRUE),
by = 20)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

5. Countries Exporting from Africa by Decade
exporting_summary <- trans %>%
filter(!is.na(country)) %>%
group_by(decade, country) %>%
summarise(
total_voyages = n_distinct(voyage_id),
total_embarked = sum(slaves_embarked, na.rm = TRUE)
) %>%
arrange(decade, desc(total_embarked)) %>%
ungroup()
exporting_summary
## # A tibble: 203 × 4
## decade country total_voyages total_embarked
## <dbl> <chr> <int> <dbl>
## 1 1510 0 9 223
## 2 1510 Spain / Uruguay 8 144
## 3 1520 Spain / Uruguay 3 1043
## 4 1520 0 2 373
## 5 1530 0 8 1418
## 6 1530 Portugal / Brazil 2 560
## 7 1530 Spain / Uruguay 1 224
## 8 1540 0 23 7750
## 9 1540 Portugal / Brazil 1 160
## 10 1550 0 27 9390
## # ℹ 193 more rows