=============================================================================

Assignment: Analyzing Trans-Atlantic and Intra-American Slave Trade with Tidyverse

PART 1: DATA LOADING AND CLEANING

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
col_types_spec <- cols_only(
  id = col_integer(),
  voyage_id = col_integer(),
  voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year = col_double(),
  voyage_slaves_numbers__imp_total_num_slaves_disembarked = col_double(),
  voyage_slaves_numbers__imp_total_num_slaves_embarked = col_double(),
  voyage_dates__length_middle_passage_days = col_double(),
  voyage_dates__imp_length_home_to_disembark = col_double(),
  voyage_crew__crew_first_landing = col_double(),
  voyage_crew__crew_voyage_outset = col_double(),
  voyage_ship__tonnage_mod = col_double(),
  voyage_slaves_numbers__imp_jamaican_cash_price = col_double(),
  voyage_slaves_numbers__imp_mortality_ratio = col_double(),
  voyage_slaves_numbers__percentage_women_among_embarked_slaves = col_double(),
  voyage_outcome__vessel_captured_outcome__name = col_character(),
  voyage_ship__imputed_nationality__name = col_character(),
  voyage_itinerary__imp_region_voyage_begin__name = col_character(),
  voyage_ship__rig_of_vessel__name = col_character(),
  voyage_itinerary__place_voyage_ended__name = col_character(),
  voyage_dates__slave_purchase_began_sparsedate__month = col_double(),
  voyage_slaves_numbers__percentage_men = col_double(),
  voyage_dates__voyage_completed_sparsedate__month = col_double(),
  voyage_itinerary__region_of_return__name = col_character(),
  voyage_slaves_numbers__percentage_boy = col_double(),
  voyage_itinerary__imp_principal_region_slave_dis__name = col_character(),
  voyage_itinerary__imp_principal_region_of_slave_purchase__name = col_character(),
  voyage_dates__date_departed_africa_sparsedate__month = col_double(),
  voyage_dates__voyage_began_sparsedate__month = col_double(),
  voyage_itinerary__imp_port_voyage_begin__name = col_character(),
  voyage_dates__first_dis_of_slaves_sparsedate__month = col_double(),
  voyage_itinerary__imp_broad_region_slave_dis__name = col_character(),
  voyage_slaves_numbers__percentage_girl = col_double(),
  voyage_outcome__particular_outcome__name = col_character(),
  voyage_itinerary__imp_principal_port_slave_dis__name = col_character(),
  voyage_slaves_numbers__percentage_child = col_double(),
  voyage_slaves_numbers__percentage_women = col_double(),
  voyage_dates__departure_last_place_of_landing_sparsedate__month = col_double(),
  voyage_outcome__outcome_owner__name = col_character(),
  voyage_outcome__outcome_slaves__name = col_character(),
  voyage_itinerary__imp_principal_place_of_slave_purchase__name = col_character(),
  voyage_outcome__resistance__name = col_character(),
  voyage_slaves_numbers__percentage_male = col_double(),
  voyage_slaves_numbers__percentage_female = col_double(),
  voyage_itinerary__imp_broad_region_voyage_begin__name = col_character(),
  voyage_itinerary__imp_broad_region_of_slave_purchase__name = col_character(),
  voyage_sources = col_character(),
  enslavers = col_character()
)

cat("Loading datasets...\n")
## Loading datasets...
trans <- read_csv(
  "https://raw.githubusercontent.com/imowerman-prog/data-3210/refs/heads/main/Data/trans-atlantic.csv",
  col_types = col_types_spec
)
intra <- read_csv(
  "https://raw.githubusercontent.com/imowerman-prog/data-3210/refs/heads/main/Data/intra-american.csv",
  col_types = col_types_spec
)

cat("Trans-Atlantic rows loaded:", nrow(trans), "\n")
## Trans-Atlantic rows loaded: 36340
cat("Intra-American rows loaded:", nrow(intra), "\n")
## Intra-American rows loaded: 37777

Step 1: Rename columns for readability

rename_cols <- function(df) {
  df %>%
    rename(
      year                  = voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year,
      slaves_disembarked    = voyage_slaves_numbers__imp_total_num_slaves_disembarked,
      slaves_embarked       = voyage_slaves_numbers__imp_total_num_slaves_embarked,
      middle_passage_days   = voyage_dates__length_middle_passage_days,
      voyage_length         = voyage_dates__imp_length_home_to_disembark,
      crew_landing          = voyage_crew__crew_first_landing,
      crew_outset           = voyage_crew__crew_voyage_outset,
      tonnage               = voyage_ship__tonnage_mod,
      cash_price            = voyage_slaves_numbers__imp_jamaican_cash_price,
      mortality_ratio       = voyage_slaves_numbers__imp_mortality_ratio,
      pct_women_embarked    = voyage_slaves_numbers__percentage_women_among_embarked_slaves,
      vessel_outcome        = voyage_outcome__vessel_captured_outcome__name,
      nationality           = voyage_ship__imputed_nationality__name,
      voyage_begin_region   = voyage_itinerary__imp_region_voyage_begin__name,
      rig                   = voyage_ship__rig_of_vessel__name,
      place_voyage_ended    = voyage_itinerary__place_voyage_ended__name,
      month_slave_purchase  = voyage_dates__slave_purchase_began_sparsedate__month,
      pct_men               = voyage_slaves_numbers__percentage_men,
      month_voyage_complete = voyage_dates__voyage_completed_sparsedate__month,
      region_of_return      = voyage_itinerary__region_of_return__name,
      pct_boy               = voyage_slaves_numbers__percentage_boy,
      dis_region            = voyage_itinerary__imp_principal_region_slave_dis__name,
      purchase_region       = voyage_itinerary__imp_principal_region_of_slave_purchase__name,
      month_depart_africa   = voyage_dates__date_departed_africa_sparsedate__month,
      month_voyage_began    = voyage_dates__voyage_began_sparsedate__month,
      port_voyage_begin     = voyage_itinerary__imp_port_voyage_begin__name,
      month_first_dis       = voyage_dates__first_dis_of_slaves_sparsedate__month,
      dis_broad             = voyage_itinerary__imp_broad_region_slave_dis__name,
      pct_girl              = voyage_slaves_numbers__percentage_girl,
      particular_outcome    = voyage_outcome__particular_outcome__name,
      dis_port              = voyage_itinerary__imp_principal_port_slave_dis__name,
      pct_child             = voyage_slaves_numbers__percentage_child,
      pct_women             = voyage_slaves_numbers__percentage_women,
      month_depart_landing  = voyage_dates__departure_last_place_of_landing_sparsedate__month,
      outcome_owner         = voyage_outcome__outcome_owner__name,
      outcome_slaves        = voyage_outcome__outcome_slaves__name,
      purchase_place        = voyage_itinerary__imp_principal_place_of_slave_purchase__name,
      resistance            = voyage_outcome__resistance__name,
      pct_male              = voyage_slaves_numbers__percentage_male,
      pct_female            = voyage_slaves_numbers__percentage_female,
      broad_region_begin    = voyage_itinerary__imp_broad_region_voyage_begin__name,
      broad_purchase_region = voyage_itinerary__imp_broad_region_of_slave_purchase__name
    )
}

trans <- rename_cols(trans)
intra <- rename_cols(intra)

cat("Columns renamed successfully.\n")
## Columns renamed successfully.

Step 2: Convert types

trans <- trans %>%
  mutate(
    year               = as.integer(year),
    slaves_disembarked = as.numeric(slaves_disembarked),
    slaves_embarked    = as.numeric(slaves_embarked)
  )

intra <- intra %>%
  mutate(
    year               = as.integer(year),
    slaves_disembarked = as.numeric(slaves_disembarked),
    slaves_embarked    = as.numeric(slaves_embarked)
  )

Step 3: Filter out rows where slaves_disembarked is 0 or NA

trans <- trans %>% filter(!is.na(slaves_disembarked), slaves_disembarked > 0)
intra <- intra %>% filter(!is.na(slaves_disembarked), slaves_disembarked > 0)

cat("After filtering 0/NA disembarked — Trans:", nrow(trans), "| Intra:", nrow(intra), "\n")
## After filtering 0/NA disembarked — Trans: 34412 | Intra: 37773

Step 4: Filter for successful outcomes

trans <- trans %>%
  filter(
    is.na(particular_outcome) |
      grepl("disembarked|completed|sold|captives disembarked",
            particular_outcome, ignore.case = TRUE)
  )

intra <- intra %>%
  filter(
    is.na(particular_outcome) |
      grepl("disembarked|completed|sold|captives disembarked",
            particular_outcome, ignore.case = TRUE)
  )

cat("After outcome filter — Trans:", nrow(trans), "| Intra:", nrow(intra), "\n")
## After outcome filter — Trans: 23952 | Intra: 33266

Step 5: Add derived columns

us_port_keywords <- paste(c(
  "new orleans", "charleston", "savannah", "baltimore", "norfolk",
  "virginia", "maryland", "georgia", "south carolina", "north carolina",
  "louisiana", "mississippi", "alabama", "florida", "texas",
  "rhode island", "new york", "boston", "philadelphia"
), collapse = "|")

add_derived_cols <- function(df) {
  df %>%
    mutate(
      decade = floor(year / 10) * 10,
      estimated_deaths = case_when(
        !is.na(slaves_embarked) & !is.na(slaves_disembarked) ~ slaves_embarked - slaves_disembarked,
        TRUE ~ NA_real_
      ),
      is_us = case_when(
        !is.na(dis_broad)  & str_detect(dis_broad,  regex("Mainland North America", ignore_case = TRUE)) ~ TRUE,
        !is.na(dis_port)   & str_detect(str_to_lower(dis_port),   us_port_keywords) ~ TRUE,
        !is.na(dis_region) & str_detect(str_to_lower(dis_region), us_port_keywords) ~ TRUE,
        TRUE ~ FALSE
      )
    )
}

trans <- add_derived_cols(trans)
intra <- add_derived_cols(intra)

cat("\nTrans-Atlantic US voyages:", sum(trans$is_us, na.rm = TRUE), "\n")
## 
## Trans-Atlantic US voyages: 1610
cat("Intra-American US voyages:", sum(intra$is_us, na.rm = TRUE), "\n")
## Intra-American US voyages: 6725

Step 6: Combine datasets

combined <- bind_rows(
  trans %>% mutate(source_type = "Trans-Atlantic"),
  intra %>% mutate(source_type = "Intra-American")
)

cat("\nCombined rows:", nrow(combined), "\n")
## 
## Combined rows: 57218
cat("Source type breakdown:\n")
## Source type breakdown:
print(table(combined$source_type))
## 
## Intra-American Trans-Atlantic 
##          33266          23952

PART 2: ANALYSIS

cat("\n--- PART 2: Analysis ---\n")
## 
## --- PART 2: Analysis ---

Q1: Total slaves imported to the US

us_data   <- combined %>% filter(is_us == TRUE)
us_total  <- sum(us_data$slaves_disembarked, na.rm = TRUE)
cat("\nQ1 — Total enslaved people imported to the US:", scales::comma(us_total), "\n")
## 
## Q1 — Total enslaved people imported to the US: 392,394

Q2: Proportion of all slaves taken from Africa

total_embarked_africa <- sum(trans$slaves_embarked, na.rm = TRUE)
us_proportion <- us_total / total_embarked_africa
cat("Q2 — Total embarked from Africa:", scales::comma(total_embarked_africa), "\n")
## Q2 — Total embarked from Africa: 7,484,499
cat("Q2 — US share of Africa-embarked:", round(us_proportion * 100, 2), "%\n")
## Q2 — US share of Africa-embarked: 5.24 %

Q3: Slave imports to the US by decade — bar graph

us_by_decade <- us_data %>%
  filter(!is.na(decade)) %>%
  group_by(decade) %>%
  summarise(total_disembarked = sum(slaves_disembarked, na.rm = TRUE), .groups = "drop")

print(us_by_decade)
## # A tibble: 26 × 2
##    decade total_disembarked
##     <dbl>             <dbl>
##  1   1610                29
##  2   1620                 3
##  3   1630                73
##  4   1640                81
##  5   1650               563
##  6   1660               459
##  7   1670              1330
##  8   1680              2187
##  9   1690              3928
## 10   1700             11451
## # ℹ 16 more rows
plot_q3 <- ggplot(us_by_decade, aes(x = factor(decade), y = total_disembarked)) +
  geom_col(fill = "#8B1A1A", color = "black", linewidth = 0.3) +
  scale_y_continuous(labels = scales::comma) +
  labs(
    title    = "Enslaved People Imported to the United States by Decade",
    subtitle = "Combined Trans-Atlantic and Intra-American voyages",
    x        = "Decade",
    y        = "Total Enslaved People Disembarked",
    caption  = "Source: Slave Voyages Database"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    axis.text.x        = element_text(angle = 45, hjust = 1),
    plot.title         = element_text(face = "bold"),
    panel.grid.major.x = element_blank()
  )

print(plot_q3)

Q4: US imports by decade and state

map_to_state <- function(port) {
  port_lower <- str_to_lower(port)
  case_when(
    str_detect(port_lower, "new orleans|louisian")            ~ "Louisiana",
    str_detect(port_lower, "charleston|south carolina")       ~ "South Carolina",
    str_detect(port_lower, "savannah|georgia")                ~ "Georgia",
    str_detect(port_lower, "virginia|norfolk|richmond")       ~ "Virginia",
    str_detect(port_lower, "maryland|baltimore|annapolis")    ~ "Maryland",
    str_detect(port_lower, "north carolina")                  ~ "North Carolina",
    str_detect(port_lower, "mississippi|natchez")             ~ "Mississippi",
    str_detect(port_lower, "alabama|mobile")                  ~ "Alabama",
    str_detect(port_lower, "florida")                         ~ "Florida",
    str_detect(port_lower, "texas|galveston")                 ~ "Texas",
    str_detect(port_lower, "rhode island|newport|providence") ~ "Rhode Island",
    str_detect(port_lower, "new york")                        ~ "New York",
    str_detect(port_lower, "massachusetts|boston")            ~ "Massachusetts",
    str_detect(port_lower, "pennsylvania|philadelphia")       ~ "Pennsylvania",
    str_detect(port_lower, "connecticut")                     ~ "Connecticut",
    str_detect(port_lower, "mainland north america")          ~ "USA (unspecified)",
    TRUE                                                      ~ "Other/Unknown"
  )
}

us_by_decade_region <- us_data %>%
  filter(!is.na(decade)) %>%
  mutate(
    state = map_to_state(coalesce(dis_port, dis_region, dis_broad, "unknown"))
  ) %>%
  group_by(decade, state) %>%
  summarise(total_disembarked = sum(slaves_disembarked, na.rm = TRUE), .groups = "drop") %>%
  arrange(decade, desc(total_disembarked))

print(us_by_decade_region)
## # A tibble: 131 × 3
##    decade state         total_disembarked
##     <dbl> <chr>                     <dbl>
##  1   1610 Other/Unknown                29
##  2   1620 Virginia                      3
##  3   1630 New York                     53
##  4   1630 Virginia                     13
##  5   1630 Massachusetts                 7
##  6   1640 New York                     69
##  7   1640 Virginia                     12
##  8   1650 New York                    433
##  9   1650 Virginia                    125
## 10   1650 Maryland                      5
## # ℹ 121 more rows
top_states <- us_by_decade_region %>%
  group_by(state) %>%
  summarise(grand_total = sum(total_disembarked)) %>%
  slice_max(grand_total, n = 8) %>%
  pull(state)

plot_q4 <- us_by_decade_region %>%
  filter(state %in% top_states) %>%
  ggplot(aes(x = factor(decade), y = total_disembarked, fill = state)) +
  geom_col(color = "black", linewidth = 0.2) +
  facet_wrap(~state, scales = "free_y", ncol = 2) +
  scale_y_continuous(labels = scales::comma) +
  scale_fill_brewer(palette = "Set2") +
  labs(
    title    = "US Slave Imports by Decade and Destination State/Region",
    subtitle = "Top 8 destination states shown",
    x        = "Decade",
    y        = "Enslaved People Disembarked",
    caption  = "Source: Slave Voyages Database"
  ) +
  theme_minimal(base_size = 10) +
  theme(
    axis.text.x     = element_text(angle = 60, hjust = 1, size = 7),
    plot.title      = element_text(face = "bold"),
    legend.position = "none"
  )

print(plot_q4)

Q5: Countries in Trans-Atlantic trade by decade

countries_by_decade <- trans %>%
  filter(!is.na(decade), !is.na(nationality)) %>%
  group_by(decade, country = nationality) %>%
  summarise(
    n_voyages      = n(),
    total_embarked = sum(slaves_embarked, na.rm = TRUE),
    .groups        = "drop"
  ) %>%
  arrange(decade, desc(n_voyages))

print(head(countries_by_decade, 30))
## # A tibble: 30 × 4
##    decade country           n_voyages total_embarked
##     <dbl> <chr>                 <int>          <dbl>
##  1   1510 0                         9            223
##  2   1510 Spain / Uruguay           8            144
##  3   1510 Portugal / Brazil         2            624
##  4   1520 Spain / Uruguay           3           1043
##  5   1520 0                         2            373
##  6   1530 0                         8           1418
##  7   1530 Portugal / Brazil         2            560
##  8   1530 Spain / Uruguay           1            224
##  9   1540 0                        23           7750
## 10   1540 Portugal / Brazil         1            160
## # ℹ 20 more rows
top_countries <- countries_by_decade %>%
  group_by(country) %>%
  summarise(total = sum(n_voyages)) %>%
  slice_max(total, n = 7) %>%
  pull(country)

plot_q5 <- countries_by_decade %>%
  filter(country %in% top_countries, !is.na(decade)) %>%
  ggplot(aes(x = decade, y = n_voyages, fill = country)) +
  geom_area(alpha = 0.85, color = "white", linewidth = 0.2) +
  scale_fill_brewer(palette = "Dark2") +
  scale_x_continuous(breaks = seq(1500, 1900, by = 50)) +
  scale_y_continuous(labels = scales::comma) +
  labs(
    title    = "Trans-Atlantic Slave Trade Voyages by Country and Decade",
    subtitle = "Top 7 participating nations",
    x        = "Decade",
    y        = "Number of Voyages",
    fill     = "Country",
    caption  = "Source: Slave Voyages Database"
  ) +
  theme_minimal(base_size = 12) +
  theme(plot.title = element_text(face = "bold"))

print(plot_q5)

Summary stats

cat("\n--- Summary ---\n")
## 
## --- Summary ---
cat("Total combined voyages:", nrow(combined), "\n")
## Total combined voyages: 57218
cat("Year range:", min(combined$year, na.rm = TRUE), "-", max(combined$year, na.rm = TRUE), "\n")
## Year range: 1514 - 1887
cat("Total estimated deaths:", scales::comma(sum(combined$estimated_deaths, na.rm = TRUE)), "\n")
## Total estimated deaths: 1,013,508
cat("Unique nationalities:", length(unique(trans$nationality)), "\n")
## Unique nationalities: 12
cat("\nScript complete.\n")
## 
## Script complete.