=============================================================================
Assignment: Analyzing Trans-Atlantic and Intra-American Slave Trade
with Tidyverse
PART 1: DATA LOADING AND CLEANING
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
col_types_spec <- cols_only(
id = col_integer(),
voyage_id = col_integer(),
voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year = col_double(),
voyage_slaves_numbers__imp_total_num_slaves_disembarked = col_double(),
voyage_slaves_numbers__imp_total_num_slaves_embarked = col_double(),
voyage_dates__length_middle_passage_days = col_double(),
voyage_dates__imp_length_home_to_disembark = col_double(),
voyage_crew__crew_first_landing = col_double(),
voyage_crew__crew_voyage_outset = col_double(),
voyage_ship__tonnage_mod = col_double(),
voyage_slaves_numbers__imp_jamaican_cash_price = col_double(),
voyage_slaves_numbers__imp_mortality_ratio = col_double(),
voyage_slaves_numbers__percentage_women_among_embarked_slaves = col_double(),
voyage_outcome__vessel_captured_outcome__name = col_character(),
voyage_ship__imputed_nationality__name = col_character(),
voyage_itinerary__imp_region_voyage_begin__name = col_character(),
voyage_ship__rig_of_vessel__name = col_character(),
voyage_itinerary__place_voyage_ended__name = col_character(),
voyage_dates__slave_purchase_began_sparsedate__month = col_double(),
voyage_slaves_numbers__percentage_men = col_double(),
voyage_dates__voyage_completed_sparsedate__month = col_double(),
voyage_itinerary__region_of_return__name = col_character(),
voyage_slaves_numbers__percentage_boy = col_double(),
voyage_itinerary__imp_principal_region_slave_dis__name = col_character(),
voyage_itinerary__imp_principal_region_of_slave_purchase__name = col_character(),
voyage_dates__date_departed_africa_sparsedate__month = col_double(),
voyage_dates__voyage_began_sparsedate__month = col_double(),
voyage_itinerary__imp_port_voyage_begin__name = col_character(),
voyage_dates__first_dis_of_slaves_sparsedate__month = col_double(),
voyage_itinerary__imp_broad_region_slave_dis__name = col_character(),
voyage_slaves_numbers__percentage_girl = col_double(),
voyage_outcome__particular_outcome__name = col_character(),
voyage_itinerary__imp_principal_port_slave_dis__name = col_character(),
voyage_slaves_numbers__percentage_child = col_double(),
voyage_slaves_numbers__percentage_women = col_double(),
voyage_dates__departure_last_place_of_landing_sparsedate__month = col_double(),
voyage_outcome__outcome_owner__name = col_character(),
voyage_outcome__outcome_slaves__name = col_character(),
voyage_itinerary__imp_principal_place_of_slave_purchase__name = col_character(),
voyage_outcome__resistance__name = col_character(),
voyage_slaves_numbers__percentage_male = col_double(),
voyage_slaves_numbers__percentage_female = col_double(),
voyage_itinerary__imp_broad_region_voyage_begin__name = col_character(),
voyage_itinerary__imp_broad_region_of_slave_purchase__name = col_character(),
voyage_sources = col_character(),
enslavers = col_character()
)
cat("Loading datasets...\n")
## Loading datasets...
trans <- read_csv(
"https://raw.githubusercontent.com/imowerman-prog/data-3210/refs/heads/main/Data/trans-atlantic.csv",
col_types = col_types_spec
)
intra <- read_csv(
"https://raw.githubusercontent.com/imowerman-prog/data-3210/refs/heads/main/Data/intra-american.csv",
col_types = col_types_spec
)
cat("Trans-Atlantic rows loaded:", nrow(trans), "\n")
## Trans-Atlantic rows loaded: 36340
cat("Intra-American rows loaded:", nrow(intra), "\n")
## Intra-American rows loaded: 37777
Step 1: Rename columns for readability
rename_cols <- function(df) {
df %>%
rename(
year = voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year,
slaves_disembarked = voyage_slaves_numbers__imp_total_num_slaves_disembarked,
slaves_embarked = voyage_slaves_numbers__imp_total_num_slaves_embarked,
middle_passage_days = voyage_dates__length_middle_passage_days,
voyage_length = voyage_dates__imp_length_home_to_disembark,
crew_landing = voyage_crew__crew_first_landing,
crew_outset = voyage_crew__crew_voyage_outset,
tonnage = voyage_ship__tonnage_mod,
cash_price = voyage_slaves_numbers__imp_jamaican_cash_price,
mortality_ratio = voyage_slaves_numbers__imp_mortality_ratio,
pct_women_embarked = voyage_slaves_numbers__percentage_women_among_embarked_slaves,
vessel_outcome = voyage_outcome__vessel_captured_outcome__name,
nationality = voyage_ship__imputed_nationality__name,
voyage_begin_region = voyage_itinerary__imp_region_voyage_begin__name,
rig = voyage_ship__rig_of_vessel__name,
place_voyage_ended = voyage_itinerary__place_voyage_ended__name,
month_slave_purchase = voyage_dates__slave_purchase_began_sparsedate__month,
pct_men = voyage_slaves_numbers__percentage_men,
month_voyage_complete = voyage_dates__voyage_completed_sparsedate__month,
region_of_return = voyage_itinerary__region_of_return__name,
pct_boy = voyage_slaves_numbers__percentage_boy,
dis_region = voyage_itinerary__imp_principal_region_slave_dis__name,
purchase_region = voyage_itinerary__imp_principal_region_of_slave_purchase__name,
month_depart_africa = voyage_dates__date_departed_africa_sparsedate__month,
month_voyage_began = voyage_dates__voyage_began_sparsedate__month,
port_voyage_begin = voyage_itinerary__imp_port_voyage_begin__name,
month_first_dis = voyage_dates__first_dis_of_slaves_sparsedate__month,
dis_broad = voyage_itinerary__imp_broad_region_slave_dis__name,
pct_girl = voyage_slaves_numbers__percentage_girl,
particular_outcome = voyage_outcome__particular_outcome__name,
dis_port = voyage_itinerary__imp_principal_port_slave_dis__name,
pct_child = voyage_slaves_numbers__percentage_child,
pct_women = voyage_slaves_numbers__percentage_women,
month_depart_landing = voyage_dates__departure_last_place_of_landing_sparsedate__month,
outcome_owner = voyage_outcome__outcome_owner__name,
outcome_slaves = voyage_outcome__outcome_slaves__name,
purchase_place = voyage_itinerary__imp_principal_place_of_slave_purchase__name,
resistance = voyage_outcome__resistance__name,
pct_male = voyage_slaves_numbers__percentage_male,
pct_female = voyage_slaves_numbers__percentage_female,
broad_region_begin = voyage_itinerary__imp_broad_region_voyage_begin__name,
broad_purchase_region = voyage_itinerary__imp_broad_region_of_slave_purchase__name
)
}
trans <- rename_cols(trans)
intra <- rename_cols(intra)
cat("Columns renamed successfully.\n")
## Columns renamed successfully.
Step 2: Convert types
trans <- trans %>%
mutate(
year = as.integer(year),
slaves_disembarked = as.numeric(slaves_disembarked),
slaves_embarked = as.numeric(slaves_embarked)
)
intra <- intra %>%
mutate(
year = as.integer(year),
slaves_disembarked = as.numeric(slaves_disembarked),
slaves_embarked = as.numeric(slaves_embarked)
)
Step 3: Filter out rows where slaves_disembarked is 0 or NA
trans <- trans %>% filter(!is.na(slaves_disembarked), slaves_disembarked > 0)
intra <- intra %>% filter(!is.na(slaves_disembarked), slaves_disembarked > 0)
cat("After filtering 0/NA disembarked — Trans:", nrow(trans), "| Intra:", nrow(intra), "\n")
## After filtering 0/NA disembarked — Trans: 34412 | Intra: 37773
Step 4: Filter for successful outcomes
trans <- trans %>%
filter(
is.na(particular_outcome) |
grepl("disembarked|completed|sold|captives disembarked",
particular_outcome, ignore.case = TRUE)
)
intra <- intra %>%
filter(
is.na(particular_outcome) |
grepl("disembarked|completed|sold|captives disembarked",
particular_outcome, ignore.case = TRUE)
)
cat("After outcome filter — Trans:", nrow(trans), "| Intra:", nrow(intra), "\n")
## After outcome filter — Trans: 23952 | Intra: 33266
Step 5: Add derived columns
us_port_keywords <- paste(c(
"new orleans", "charleston", "savannah", "baltimore", "norfolk",
"virginia", "maryland", "georgia", "south carolina", "north carolina",
"louisiana", "mississippi", "alabama", "florida", "texas",
"rhode island", "new york", "boston", "philadelphia"
), collapse = "|")
add_derived_cols <- function(df) {
df %>%
mutate(
decade = floor(year / 10) * 10,
estimated_deaths = case_when(
!is.na(slaves_embarked) & !is.na(slaves_disembarked) ~ slaves_embarked - slaves_disembarked,
TRUE ~ NA_real_
),
is_us = case_when(
!is.na(dis_broad) & str_detect(dis_broad, regex("Mainland North America", ignore_case = TRUE)) ~ TRUE,
!is.na(dis_port) & str_detect(str_to_lower(dis_port), us_port_keywords) ~ TRUE,
!is.na(dis_region) & str_detect(str_to_lower(dis_region), us_port_keywords) ~ TRUE,
TRUE ~ FALSE
)
)
}
trans <- add_derived_cols(trans)
intra <- add_derived_cols(intra)
cat("\nTrans-Atlantic US voyages:", sum(trans$is_us, na.rm = TRUE), "\n")
##
## Trans-Atlantic US voyages: 1610
cat("Intra-American US voyages:", sum(intra$is_us, na.rm = TRUE), "\n")
## Intra-American US voyages: 6725
Step 6: Combine datasets
combined <- bind_rows(
trans %>% mutate(source_type = "Trans-Atlantic"),
intra %>% mutate(source_type = "Intra-American")
)
cat("\nCombined rows:", nrow(combined), "\n")
##
## Combined rows: 57218
cat("Source type breakdown:\n")
## Source type breakdown:
print(table(combined$source_type))
##
## Intra-American Trans-Atlantic
## 33266 23952
PART 2: ANALYSIS
cat("\n--- PART 2: Analysis ---\n")
##
## --- PART 2: Analysis ---
Q1: Total slaves imported to the US
us_data <- combined %>% filter(is_us == TRUE)
us_total <- sum(us_data$slaves_disembarked, na.rm = TRUE)
cat("\nQ1 — Total enslaved people imported to the US:", scales::comma(us_total), "\n")
##
## Q1 — Total enslaved people imported to the US: 392,394
Q2: Proportion of all slaves taken from Africa
total_embarked_africa <- sum(trans$slaves_embarked, na.rm = TRUE)
us_proportion <- us_total / total_embarked_africa
cat("Q2 — Total embarked from Africa:", scales::comma(total_embarked_africa), "\n")
## Q2 — Total embarked from Africa: 7,484,499
cat("Q2 — US share of Africa-embarked:", round(us_proportion * 100, 2), "%\n")
## Q2 — US share of Africa-embarked: 5.24 %
Q3: Slave imports to the US by decade — bar graph
us_by_decade <- us_data %>%
filter(!is.na(decade)) %>%
group_by(decade) %>%
summarise(total_disembarked = sum(slaves_disembarked, na.rm = TRUE), .groups = "drop")
print(us_by_decade)
## # A tibble: 26 × 2
## decade total_disembarked
## <dbl> <dbl>
## 1 1610 29
## 2 1620 3
## 3 1630 73
## 4 1640 81
## 5 1650 563
## 6 1660 459
## 7 1670 1330
## 8 1680 2187
## 9 1690 3928
## 10 1700 11451
## # ℹ 16 more rows
plot_q3 <- ggplot(us_by_decade, aes(x = factor(decade), y = total_disembarked)) +
geom_col(fill = "#8B1A1A", color = "black", linewidth = 0.3) +
scale_y_continuous(labels = scales::comma) +
labs(
title = "Enslaved People Imported to the United States by Decade",
subtitle = "Combined Trans-Atlantic and Intra-American voyages",
x = "Decade",
y = "Total Enslaved People Disembarked",
caption = "Source: Slave Voyages Database"
) +
theme_minimal(base_size = 12) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(face = "bold"),
panel.grid.major.x = element_blank()
)
print(plot_q3)

Q4: US imports by decade and state
map_to_state <- function(port) {
port_lower <- str_to_lower(port)
case_when(
str_detect(port_lower, "new orleans|louisian") ~ "Louisiana",
str_detect(port_lower, "charleston|south carolina") ~ "South Carolina",
str_detect(port_lower, "savannah|georgia") ~ "Georgia",
str_detect(port_lower, "virginia|norfolk|richmond") ~ "Virginia",
str_detect(port_lower, "maryland|baltimore|annapolis") ~ "Maryland",
str_detect(port_lower, "north carolina") ~ "North Carolina",
str_detect(port_lower, "mississippi|natchez") ~ "Mississippi",
str_detect(port_lower, "alabama|mobile") ~ "Alabama",
str_detect(port_lower, "florida") ~ "Florida",
str_detect(port_lower, "texas|galveston") ~ "Texas",
str_detect(port_lower, "rhode island|newport|providence") ~ "Rhode Island",
str_detect(port_lower, "new york") ~ "New York",
str_detect(port_lower, "massachusetts|boston") ~ "Massachusetts",
str_detect(port_lower, "pennsylvania|philadelphia") ~ "Pennsylvania",
str_detect(port_lower, "connecticut") ~ "Connecticut",
str_detect(port_lower, "mainland north america") ~ "USA (unspecified)",
TRUE ~ "Other/Unknown"
)
}
us_by_decade_region <- us_data %>%
filter(!is.na(decade)) %>%
mutate(
state = map_to_state(coalesce(dis_port, dis_region, dis_broad, "unknown"))
) %>%
group_by(decade, state) %>%
summarise(total_disembarked = sum(slaves_disembarked, na.rm = TRUE), .groups = "drop") %>%
arrange(decade, desc(total_disembarked))
print(us_by_decade_region)
## # A tibble: 131 × 3
## decade state total_disembarked
## <dbl> <chr> <dbl>
## 1 1610 Other/Unknown 29
## 2 1620 Virginia 3
## 3 1630 New York 53
## 4 1630 Virginia 13
## 5 1630 Massachusetts 7
## 6 1640 New York 69
## 7 1640 Virginia 12
## 8 1650 New York 433
## 9 1650 Virginia 125
## 10 1650 Maryland 5
## # ℹ 121 more rows
top_states <- us_by_decade_region %>%
group_by(state) %>%
summarise(grand_total = sum(total_disembarked)) %>%
slice_max(grand_total, n = 8) %>%
pull(state)
plot_q4 <- us_by_decade_region %>%
filter(state %in% top_states) %>%
ggplot(aes(x = factor(decade), y = total_disembarked, fill = state)) +
geom_col(color = "black", linewidth = 0.2) +
facet_wrap(~state, scales = "free_y", ncol = 2) +
scale_y_continuous(labels = scales::comma) +
scale_fill_brewer(palette = "Set2") +
labs(
title = "US Slave Imports by Decade and Destination State/Region",
subtitle = "Top 8 destination states shown",
x = "Decade",
y = "Enslaved People Disembarked",
caption = "Source: Slave Voyages Database"
) +
theme_minimal(base_size = 10) +
theme(
axis.text.x = element_text(angle = 60, hjust = 1, size = 7),
plot.title = element_text(face = "bold"),
legend.position = "none"
)
print(plot_q4)

Q5: Countries in Trans-Atlantic trade by decade
countries_by_decade <- trans %>%
filter(!is.na(decade), !is.na(nationality)) %>%
group_by(decade, country = nationality) %>%
summarise(
n_voyages = n(),
total_embarked = sum(slaves_embarked, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(decade, desc(n_voyages))
print(head(countries_by_decade, 30))
## # A tibble: 30 × 4
## decade country n_voyages total_embarked
## <dbl> <chr> <int> <dbl>
## 1 1510 0 9 223
## 2 1510 Spain / Uruguay 8 144
## 3 1510 Portugal / Brazil 2 624
## 4 1520 Spain / Uruguay 3 1043
## 5 1520 0 2 373
## 6 1530 0 8 1418
## 7 1530 Portugal / Brazil 2 560
## 8 1530 Spain / Uruguay 1 224
## 9 1540 0 23 7750
## 10 1540 Portugal / Brazil 1 160
## # ℹ 20 more rows
top_countries <- countries_by_decade %>%
group_by(country) %>%
summarise(total = sum(n_voyages)) %>%
slice_max(total, n = 7) %>%
pull(country)
plot_q5 <- countries_by_decade %>%
filter(country %in% top_countries, !is.na(decade)) %>%
ggplot(aes(x = decade, y = n_voyages, fill = country)) +
geom_area(alpha = 0.85, color = "white", linewidth = 0.2) +
scale_fill_brewer(palette = "Dark2") +
scale_x_continuous(breaks = seq(1500, 1900, by = 50)) +
scale_y_continuous(labels = scales::comma) +
labs(
title = "Trans-Atlantic Slave Trade Voyages by Country and Decade",
subtitle = "Top 7 participating nations",
x = "Decade",
y = "Number of Voyages",
fill = "Country",
caption = "Source: Slave Voyages Database"
) +
theme_minimal(base_size = 12) +
theme(plot.title = element_text(face = "bold"))
print(plot_q5)

Summary stats
cat("\n--- Summary ---\n")
##
## --- Summary ---
cat("Total combined voyages:", nrow(combined), "\n")
## Total combined voyages: 57218
cat("Year range:", min(combined$year, na.rm = TRUE), "-", max(combined$year, na.rm = TRUE), "\n")
## Year range: 1514 - 1887
cat("Total estimated deaths:", scales::comma(sum(combined$estimated_deaths, na.rm = TRUE)), "\n")
## Total estimated deaths: 1,013,508
cat("Unique nationalities:", length(unique(trans$nationality)), "\n")
## Unique nationalities: 12
cat("\nScript complete.\n")
##
## Script complete.