JACOB STOUGHTON AND JAKUB KEPA

Load required library

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)


col_types_spec <- cols_only(
  id = col_integer(),
  voyage_id = col_integer(),
  voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year = col_double(),
  voyage_slaves_numbers__imp_total_num_slaves_disembarked = col_double(),
  voyage_slaves_numbers__imp_total_num_slaves_embarked = col_double(),
  voyage_dates__length_middle_passage_days = col_double(),
  voyage_dates__imp_length_home_to_disembark = col_double(),
  voyage_crew__crew_first_landing = col_double(),
  voyage_crew__crew_voyage_outset = col_double(),
  voyage_ship__tonnage_mod = col_double(),
  voyage_slaves_numbers__imp_jamaican_cash_price = col_double(),
  voyage_slaves_numbers__imp_mortality_ratio = col_double(),
  voyage_slaves_numbers__percentage_women_among_embarked_slaves = col_double(),
  voyage_outcome__vessel_captured_outcome__name = col_character(),
  voyage_ship__imputed_nationality__name = col_character(),
  voyage_itinerary__imp_region_voyage_begin__name = col_character(),
  voyage_ship__rig_of_vessel__name = col_character(),
  voyage_itinerary__place_voyage_ended__name = col_character(),
  # Force as character
  voyage_dates__slave_purchase_began_sparsedate__month = col_double(),
  voyage_slaves_numbers__percentage_men = col_double(),
  voyage_dates__voyage_completed_sparsedate__month = col_double(),
  voyage_itinerary__region_of_return__name = col_character(),
  voyage_slaves_numbers__percentage_boy = col_double(),
  voyage_itinerary__imp_principal_region_slave_dis__name = col_character(),
  voyage_itinerary__imp_principal_region_of_slave_purchase__name = col_character(),
  voyage_dates__date_departed_africa_sparsedate__month = col_double(),
  voyage_dates__voyage_began_sparsedate__month = col_double(),
  voyage_itinerary__imp_port_voyage_begin__name = col_character(),
  voyage_dates__first_dis_of_slaves_sparsedate__month = col_double(),
  voyage_itinerary__imp_broad_region_slave_dis__name = col_character(),
  voyage_slaves_numbers__percentage_girl = col_double(),
  voyage_outcome__particular_outcome__name = col_character(),
  voyage_itinerary__imp_principal_port_slave_dis__name = col_character(),
  voyage_slaves_numbers__percentage_child = col_double(),
  voyage_slaves_numbers__percentage_women = col_double(),
  voyage_dates__departure_last_place_of_landing_sparsedate__month = col_double(),
  voyage_outcome__outcome_owner__name = col_character(),
  voyage_outcome__outcome_slaves__name = col_character(),
  voyage_itinerary__imp_principal_place_of_slave_purchase__name = col_character(),
  voyage_outcome__resistance__name = col_character(),
  voyage_slaves_numbers__percentage_male = col_double(),
  voyage_slaves_numbers__percentage_female = col_double(),
  voyage_itinerary__imp_broad_region_voyage_begin__name = col_character(),
  voyage_itinerary__imp_broad_region_of_slave_purchase__name = col_character(),
  voyage_sources = col_character(),
  enslavers = col_character()
)

Load the datasets

trans <- read_csv("https://raw.githubusercontent.com/imowerman-prog/data-3210/refs/heads/main/Data/trans-atlantic.csv", 
                  col_types = col_types_spec)
intra <- read_csv("https://raw.githubusercontent.com/imowerman-prog/data-3210/refs/heads/main/Data/intra-american.csv", 
                  col_types = col_types_spec)

Define successful outcomes

successful_outcomes <- c(
  "Sold slaves in Americas - subsequent fate unknown",
  "Voyage completed as intended",
  "Captured by pirates or privateers - after disembarkation",
  "Condemned - Americas after disembarkation",
  "Detained and condemned in the United States after slaves disembarked",
  "Condemned in the Americas by British after slaves disembarked",
  "Captured by pirates - slaves sold in Americas from another ship",
  "Shipwrecked or destroyed, after disembarkation",
  "Privateer captured slaves at sea and delivered for sale in America",
  "Prisoners of war stole slaves during escape and carried to port of sale",
  "Captives seized from vessel by Spanish officials and sold",
  "Captured by Dutch - after disembarkation",
  "Shipwrecked, slaves salvaged",
  "Captured by slaves, recaptured and landed slaves in the Americas"
)

Process trans-atlantic data

trans <- trans %>%
  rename(
    year = voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year,
    slaves_embarked = voyage_slaves_numbers__imp_total_num_slaves_embarked,
    slaves_disembarked = voyage_slaves_numbers__imp_total_num_slaves_disembarked,
    particular_outcome = voyage_outcome__particular_outcome__name,
    dis_broad = voyage_itinerary__imp_broad_region_slave_dis__name,
    dis_port = voyage_itinerary__imp_principal_port_slave_dis__name,
    embark_broad = voyage_itinerary__imp_broad_region_of_slave_purchase__name,
    embark_port = voyage_itinerary__imp_principal_place_of_slave_purchase__name, 
    nationality = voyage_ship__imputed_nationality__name
  ) %>%
  mutate(
    year = as.integer(year),
    slaves_embarked = as.numeric(slaves_embarked),
    slaves_disembarked = as.numeric(slaves_disembarked)
  ) %>%
  filter(
    !is.na(slaves_disembarked),
    slaves_disembarked > 0,
    particular_outcome %in% successful_outcomes
  ) %>%
  mutate(
    decade = floor(year / 10) * 10,
    estimated_deaths = slaves_embarked - slaves_disembarked,
    is_us = dis_broad == "Mainland North America" | 
      grepl("New Orleans", dis_port, ignore.case = TRUE),
    embark_is_us = embark_broad == "Mainland North America" | 
      grepl("New Orleans", embark_port, ignore.case = TRUE),
    source_type = "Trans-Atlantic"
  )

Process intra-american data

intra <- intra %>%
  rename(
    year = voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year,
    slaves_embarked = voyage_slaves_numbers__imp_total_num_slaves_embarked,
    slaves_disembarked = voyage_slaves_numbers__imp_total_num_slaves_disembarked,
    particular_outcome = voyage_outcome__particular_outcome__name,
    dis_broad = voyage_itinerary__imp_broad_region_slave_dis__name,
    dis_port = voyage_itinerary__imp_principal_port_slave_dis__name,
    embark_broad = voyage_itinerary__imp_broad_region_of_slave_purchase__name,
    embark_port = voyage_itinerary__imp_principal_place_of_slave_purchase__name
  ) %>%
  mutate(
    year = as.integer(year),
    slaves_embarked = as.numeric(slaves_embarked),
    slaves_disembarked = as.numeric(slaves_disembarked)
  ) %>%
  filter(
    !is.na(slaves_disembarked),
    slaves_disembarked > 0,
    particular_outcome %in% successful_outcomes
  ) %>%
  mutate(
    decade = floor(year / 10) * 10,
    estimated_deaths = slaves_embarked - slaves_disembarked,
    is_us = dis_broad == "Mainland North America" | 
      grepl("New Orleans", dis_port, ignore.case = TRUE),
    embark_is_us = embark_broad == "Mainland North America" | 
      grepl("New Orleans", embark_port, ignore.case = TRUE),
    source_type = "Intra-American"
  )

Combine the datasets

combined <- bind_rows(trans, intra)

Part 2: Net slaves imported to the US (gross imports minus re-exports)

gross_us <- combined %>%
  filter(is_us == TRUE) %>%
  summarise(gross_us = sum(slaves_disembarked, na.rm = TRUE)) %>%
  pull(gross_us)

re_exports <- combined %>%
  filter(source_type == "Intra-American", embark_is_us == TRUE, is_us == FALSE) %>%
  summarise(re_exports = sum(slaves_embarked, na.rm = TRUE)) %>%
  pull(re_exports)

net_us <- gross_us - re_exports

cat("Gross slaves imported to the US:", gross_us, "\n")
## Gross slaves imported to the US: 395919
cat("Slaves re-exported from the US:", re_exports, "\n")
## Slaves re-exported from the US: 7652
cat("Net slaves retained in the US:", net_us, "\n")
## Net slaves retained in the US: 388267

Part 2: Proportion of all slaves taken from Africa (using net US)

trans_total_embarked <- trans %>%
  summarise(total_embarked = sum(slaves_embarked, na.rm = TRUE)) %>%
  pull(total_embarked)

proportion <- net_us / trans_total_embarked

cat("Proportion of all slaves taken from Africa (net US):", proportion, "\n")
## Proportion of all slaves taken from Africa (net US): 0.05107614

Part 2: visualizations

approximate_state <- function(port) {
  case_when(
    grepl("New Orleans", port, ignore.case = TRUE) ~ "Louisiana",
    grepl("Charleston", port, ignore.case = TRUE) ~ "South Carolina",
    grepl("Savannah", port, ignore.case = TRUE) ~ "Georgia",
    grepl("Mobile", port, ignore.case = TRUE) ~ "Alabama",
    grepl("Norfolk|Hampton", port, ignore.case = TRUE) ~ "Virginia",
    grepl("Baltimore", port, ignore.case = TRUE) ~ "Maryland",
    grepl("New York", port, ignore.case = TRUE) ~ "New York",
    grepl("Philadelphia", port, ignore.case = TRUE) ~ "Pennsylvania",
    grepl("Boston", port, ignore.case = TRUE) ~ "Massachusetts",
    grepl("Rhode Island|Providence|Newport", port, ignore.case = TRUE) ~ "Rhode Island",
    TRUE ~ "Other/Unknown"
  )
}


combined %>%
  filter(is_us) %>%
  count(decade, wt = slaves_disembarked, name = "total") %>%
  ggplot(aes(x = decade, y = total)) +
  geom_col(fill = "steelblue") +
  scale_y_continuous(labels = scales::comma) +
  labs(title = "Slave Imports to the United States by Decade",
       x = "Decade", y = "Number of Enslaved People Disembarked") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

us_detailed <- combined %>%
  filter(is_us) %>%
  mutate(state = approximate_state(dis_port)) %>%
  group_by(decade, dis_broad, dis_port, state) %>%
  summarise(total = sum(slaves_disembarked, na.rm = TRUE), .groups = "drop") %>%
  arrange(decade, desc(total))

cat("\n=== Slave Imports to US by Decade, Region, Port, and State ===\n")
## 
## === Slave Imports to US by Decade, Region, Port, and State ===
print(us_detailed)
## # A tibble: 272 × 5
##    decade dis_broad              dis_port                   state         total
##     <dbl> <chr>                  <chr>                      <chr>         <dbl>
##  1   1610 Mainland North America Hampton                    Virginia         29
##  2   1620 Mainland North America Virginia, port unspecified Other/Unknown     3
##  3   1630 Mainland North America New York                   New York         53
##  4   1630 Mainland North America Virginia, port unspecified Other/Unknown    13
##  5   1630 Mainland North America Boston                     Massachusetts     7
##  6   1640 Mainland North America New York                   New York         69
##  7   1640 Mainland North America Virginia, port unspecified Other/Unknown    12
##  8   1650 Mainland North America New York                   New York        477
##  9   1650 Mainland North America Virginia, port unspecified Other/Unknown   125
## 10   1650 Mainland North America Maryland, port unspecified Other/Unknown     5
## # ℹ 262 more rows
combined %>%
  filter(is_us) %>%
  mutate(state = approximate_state(dis_port)) %>%
  count(decade, state, wt = slaves_disembarked, name = "total") %>%
  ggplot(aes(x = decade, y = total, fill = state)) +
  geom_col() +
  facet_wrap(~state, scales = "free_y") +
  scale_y_continuous(labels = scales::comma) +
  labs(title = "Slave Imports to the US by Decade and State",
       x = "Decade", y = "Number of Enslaved People Disembarked") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")

countries <- trans %>%
  filter(!is.na(nationality) & nationality != "", !is.na(decade)) %>%
  group_by(decade, nationality) %>%
  summarise(num_voyages = n(), total_embarked = sum(slaves_embarked, na.rm = TRUE), .groups = "drop") %>%
  arrange(decade, desc(total_embarked))

cat("\n=== Countries Participating in Export from Africa by Decade ===\n")
## 
## === Countries Participating in Export from Africa by Decade ===
print(countries)
## # A tibble: 203 × 4
##    decade nationality       num_voyages total_embarked
##     <dbl> <chr>                   <int>          <dbl>
##  1   1510 0                           9            223
##  2   1510 Spain / Uruguay             8            144
##  3   1520 Spain / Uruguay             3           1043
##  4   1520 0                           2            373
##  5   1530 0                           8           1418
##  6   1530 Portugal / Brazil           2            560
##  7   1530 Spain / Uruguay             1            224
##  8   1540 0                          23           7750
##  9   1540 Portugal / Brazil           1            160
## 10   1550 0                          27           9390
## # ℹ 193 more rows
top_8 <- countries %>% count(nationality, wt = total_embarked) %>% slice_max(n, n = 8) %>% pull(nationality)

countries %>%
  filter(nationality %in% top_8) %>%
  ggplot(aes(x = decade, y = total_embarked, fill = nationality)) +
  geom_col(position = "dodge") +
  scale_y_continuous(labels = scales::comma) +
  labs(title = "Enslaved People Embarked from Africa by Country and Decade (Top 8)",
       x = "Decade", y = "Number of Enslaved People Embarked", fill = "Country") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Part 3: Visualizations

Trans-Atlantic vs. Intra-American to US

combined %>%
  filter(is_us) %>%
  count(decade, source_type, wt = slaves_disembarked, name = "total") %>%
  ggplot(aes(x = decade, y = total, fill = source_type)) +
  geom_col(position = "stack") +
  scale_y_continuous(labels = scales::comma) +
  scale_fill_manual(values = c("Trans-Atlantic" = "#2c7bb6", 
                                "Intra-American" = "#d7191c")) +
  labs(title = "Source of Enslaved People Arriving in the United States",
       subtitle = "Trans-Atlantic voyages from Africa vs. Intra-American transfers",
       x = "Decade", y = "Number of Enslaved People Disembarked",
       fill = "Voyage Type") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

#Part 3: Visualization 2

mortality_analysis <- combined %>%
  filter(slaves_embarked > 0, estimated_deaths >= 0) %>%
  mutate(mortality_rate = estimated_deaths / slaves_embarked) %>%
  group_by(decade, source_type) %>%
  summarise(
    avg_mortality = mean(mortality_rate, na.rm = TRUE),
    total_deaths = sum(estimated_deaths, na.rm = TRUE),
    .groups = "drop"
  )

ggplot(mortality_analysis, aes(x = decade, y = avg_mortality, color = source_type)) +
  geom_line(size = 1.2) +
  geom_point(size = 3) +
  scale_y_continuous(labels = scales::percent) +
  labs(title = "Average Mortality Rates During Slave Voyages",
       subtitle = "Comparing Trans-Atlantic and Intra-American journeys",
       x = "Decade", y = "Average Mortality Rate",
       color = "Voyage Type") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Part 3: Summary

From looking at the data and the visualizations we have seen many interesting things. The first and most notable thing comes from the intra versus trans-atlantic numbers on the bar graph. You can see the historical development of the much higher numbers on the trans-atlantic trade as slavery began in the US and slaves were being transported over. Later it is clear when the trans-atlantic trade stopped as the intra-american trade rises immensely and the trans-atlantic numbers plummet. The next realization we made was how poor travel conditions were and the lack on improvement. It is surprising that the mortality rates in the intra-american voyages had a higher mortality rate, but a guess would be smaller groups being transported leading to a higher weight on anyone who may die during the journey. Still, it is surprsing that as many die on likely shorter and less brutal voyages. The visuals help greatly with putting together historical context, as we can see the peak of the slave trade around the mid-to-late 1700s. Overall, the assignment put into perspective the scale and brutal conditions that surround the slave trade.