Load required library

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
col_types_spec <- cols_only(
  id = col_integer(),
  voyage_id = col_integer(),
  voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year = col_double(),
  voyage_slaves_numbers__imp_total_num_slaves_disembarked = col_double(),
  voyage_slaves_numbers__imp_total_num_slaves_embarked = col_double(),
  voyage_dates__length_middle_passage_days = col_double(),
  voyage_dates__imp_length_home_to_disembark = col_double(),
  voyage_crew__crew_first_landing = col_double(),
  voyage_crew__crew_voyage_outset = col_double(),
  voyage_ship__tonnage_mod = col_double(),
  voyage_slaves_numbers__imp_jamaican_cash_price = col_double(),
  voyage_slaves_numbers__imp_mortality_ratio = col_double(),
  voyage_slaves_numbers__percentage_women_among_embarked_slaves = col_double(),
  voyage_outcome__vessel_captured_outcome__name = col_character(),
  voyage_ship__imputed_nationality__name = col_character(),
  voyage_itinerary__imp_region_voyage_begin__name = col_character(),
  voyage_ship__rig_of_vessel__name = col_character(),
  voyage_itinerary__place_voyage_ended__name = col_character(),  # Force as character
  voyage_dates__slave_purchase_began_sparsedate__month = col_double(),
  voyage_slaves_numbers__percentage_men = col_double(),
  voyage_dates__voyage_completed_sparsedate__month = col_double(),
  voyage_itinerary__region_of_return__name = col_character(),
  voyage_slaves_numbers__percentage_boy = col_double(),
  voyage_itinerary__imp_principal_region_slave_dis__name = col_character(),
  voyage_itinerary__imp_principal_region_of_slave_purchase__name = col_character(),
  voyage_dates__date_departed_africa_sparsedate__month = col_double(),
  voyage_dates__voyage_began_sparsedate__month = col_double(),
  voyage_itinerary__imp_port_voyage_begin__name = col_character(),
  voyage_dates__first_dis_of_slaves_sparsedate__month = col_double(),
  voyage_itinerary__imp_broad_region_slave_dis__name = col_character(),
  voyage_slaves_numbers__percentage_girl = col_double(),
  voyage_outcome__particular_outcome__name = col_character(),
  voyage_itinerary__imp_principal_port_slave_dis__name = col_character(),
  voyage_slaves_numbers__percentage_child = col_double(),
  voyage_slaves_numbers__percentage_women = col_double(),
  voyage_dates__departure_last_place_of_landing_sparsedate__month = col_double(),
  voyage_outcome__outcome_owner__name = col_character(),
  voyage_outcome__outcome_slaves__name = col_character(),
  voyage_itinerary__imp_principal_place_of_slave_purchase__name = col_character(),
  voyage_outcome__resistance__name = col_character(),
  voyage_slaves_numbers__percentage_male = col_double(),
  voyage_slaves_numbers__percentage_female = col_double(),
  voyage_itinerary__imp_broad_region_voyage_begin__name = col_character(),
  voyage_itinerary__imp_broad_region_of_slave_purchase__name = col_character(),
  voyage_sources = col_character(),
  enslavers = col_character()
)

Load the datasets

trans <- read_csv("https://raw.githubusercontent.com/imowerman-prog/data-3210/refs/heads/main/Data/trans-atlantic.csv", col_types = col_types_spec)
intra <- read_csv("https://raw.githubusercontent.com/imowerman-prog/data-3210/refs/heads/main/Data/intra-american.csv", col_types = col_types_spec)

Clean the data:

Convert year to integer, slave numbers to numeric.

library(tidyverse)

Define a renaming vector for clarity

rename_cols <- c(
  year                = "voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year",
  slaves_disembarked  = "voyage_slaves_numbers__imp_total_num_slaves_disembarked",
  slaves_embarked     = "voyage_slaves_numbers__imp_total_num_slaves_embarked",
  middle_passage_days = "voyage_dates__length_middle_passage_days",
  home_to_disembark_days = "voyage_dates__imp_length_home_to_disembark",
  crew_first_landing  = "voyage_crew__crew_first_landing",
  crew_outset         = "voyage_crew__crew_voyage_outset",
  ship_tonnage        = "voyage_ship__tonnage_mod",
  jamaican_cash_price = "voyage_slaves_numbers__imp_jamaican_cash_price",
  mortality_ratio     = "voyage_slaves_numbers__imp_mortality_ratio",
  dis_broad           = "voyage_itinerary__imp_broad_region_slave_dis__name",
  dis_port            = "voyage_itinerary__place_voyage_ended__name",
  outcome             = "voyage_outcome__particular_outcome__name"
)

Function to clean a dataset

clean_voyage_data <- function(df, source_label) {
  df %>%
    rename(!!!rename_cols) %>%
    mutate(
      year = as.integer(year),
      slaves_embarked = as.numeric(slaves_embarked),
      slaves_disembarked = as.numeric(slaves_disembarked)
    ) %>%
    filter(!is.na(slaves_disembarked), slaves_disembarked > 0) %>%
    filter(str_detect(outcome, regex("Slaves disembarked|Voyage completed|Sold slaves", ignore_case = TRUE))) %>%
    mutate(
      decade = floor(year / 10) * 10,
      estimated_deaths = slaves_embarked - slaves_disembarked,
      is_us = dis_broad == "Mainland North America" | 
        dis_port %in% c("New Orleans", "Charleston", "Savannah", "Virginia", "South Carolina", "North Carolina"),
      source_type = source_label
    )
}

Clean both datasets

trans_clean <- clean_voyage_data(trans, "Trans-Atlantic")
intra_clean <- clean_voyage_data(intra, "Intra-American")

Combine into one dataset

voyages_clean <- bind_rows(trans_clean, intra_clean)

Total number of slaves disembarked in the US

us_total <- voyages_clean %>%
  filter(is_us == TRUE) %>%
  summarise(total_slaves_us = sum(slaves_disembarked, na.rm = TRUE))
us_total
## # A tibble: 1 × 1
##   total_slaves_us
##             <dbl>
## 1          391971

Total number of slaves disembarked total

americas_total <- voyages_clean %>%
  summarise(total_slaves_us = sum(slaves_disembarked, na.rm = TRUE))
americas_total
## # A tibble: 1 × 1
##   total_slaves_us
##             <dbl>
## 1         7054559

Total embarked from Africa (Trans-Atlantic only)

africa_total <- trans_clean %>%
  summarise(total_embarked_africa = sum(slaves_embarked, na.rm = TRUE))

Proportion US imports / African total embarked

proportion_us <- us_total$total_slaves_us / africa_total$total_embarked_africa
proportion_us
## [1] 0.05268403

#Graph slave imports by decade to the US: Filter for US, group by decade, sum slaves_disembarked, plot as a bar graph with ggplot2.

library(ggplot2)
voyages_clean %>%
  filter(is_us == TRUE) %>%
  group_by(decade) %>%
  summarise(total_imports = sum(slaves_disembarked, na.rm = TRUE)) %>%
  ggplot(aes(x = decade, y = total_imports)) +
  geom_col(fill = "steelblue") +
  labs(
    title = "Slave Imports to the US by Decade",
    x = "Decade",
    y = "Slaves Disembarked"
  ) +
  theme_minimal()

#Imports to the US by decade and region/port/state: Filter for US, group by decade, dis_region, dis_port (approximate state from port/region, e.g., “New Orleans” -> “Louisiana”), sum slaves_disembarked. Use a table and faceted bar plot.

Simple port -> state mapping

port_to_state <- c(
  "Georgia, port unspecified"        = "Georgia",
  "Maryland, port unspecified"       = "Maryland",
  "Virginia, port unspecified"       = "Virginia",
  "New York"                         = "New York",
  "Florida, port unspecified"        = "Florida",
  "New Orleans"                      = "Louisiana",
  "York River"                        = "Virginia",
  "Patuxent"                          = "Maryland",
  "USA, location unspecified"         = NA,   # unknown, can filter later
  "Charleston"                        = "South Carolina",
  "Rappahannock"                      = "Virginia",
  "South Carolina, place unspecified"= "South Carolina",
  "Lower James River"                  = "Virginia",
  "Upper James River"                  = "Virginia",
  "South Potomac"                      = "Virginia",
  "North Potomac"                      = "Maryland",
  "Bermuda Hundred"                    = "Virginia",
  "Annapolis"                          = "Maryland",
  "Savannah"                           = "Georgia",
  "Osborne's Landing"                  = "North Carolina",
  "Newport"                            = "Rhode Island",
  "Philadelphia"                       = "Pennsylvania",
  "North Carolina, port unspecified"   = "North Carolina",
  "Eastern New Jersey"                 = "New Jersey",
  "St. Augustine"                      = "Florida",
  "Mississippi"                        = "Mississippi",
  "Pensacola"                          = "Florida",
  "Rhode Island, port unspecified"     = "Rhode Island",
  "Portsmouth (NH)"                     = "New Hampshire",
  "Boston"                             = "Massachusetts",
  "Hampton"                             = "Virginia",
  "Perth Amboy"                         = "New Jersey",
  "Delaware River"                       = "Delaware",
  "Louisiana"                            = "Louisiana",
  "Londontowne"                          = "Maryland",
  "Biloxi"                               = "Mississippi",
  "La Balise"                             = "Louisiana",
  "Roanoke"                               = "Virginia",
  "Tybee Island"                          = "Georgia",
  "Mobile"                                = "Alabama",
  "Middletown"                             = "Connecticut",
  "New London"                             = "Connecticut",
  "Beaufort"                               = "North Carolina",
  "Portsmouth"                             = "New Hampshire",
  "Lower Cedar Point"                      = "Virginia",
  "Oxford"                                 = "Maryland",
  "Nottingham"                             = "Maryland",
  "Nanjemoy"                               = "Maryland",
  "Georgetown"                             = "South Carolina",
  "Brunswick"                              = "Georgia",
  "Potomac river"                           = "Maryland",
  "Accomac"                                 = "Virginia",
  "Sunbury"                                 = "Georgia",
  "Pocomoke"                                = "Maryland",
  "Cecil County"                            = "Maryland",
  "New Bern"                                = "North Carolina",
  "Piscataqua"                              = "New Hampshire",
  "Kittery Point"                           = "Maine",
  "Galveston"                               = "Texas",
  "Pass Cavallo"                             = "Texas",
  "Houston"                                 = "Texas",
  "Edenton"                                 = "North Carolina"
)

Bar Chart for States

library(dplyr)
us_ports <- voyages_clean %>%
  filter(dis_broad == "Mainland North America") %>%
  select(voyage_itinerary__imp_principal_port_slave_dis__name, slaves_disembarked)
us_ports <- us_ports %>%
  mutate(state = recode(voyage_itinerary__imp_principal_port_slave_dis__name, !!!port_to_state, .default = "Other"))
us_states_disembark <- us_ports %>%
  group_by(state) %>%
  summarise(total_disembarked = sum(slaves_disembarked, na.rm = TRUE), .groups = "drop") %>%
  arrange(desc(total_disembarked))
library(ggplot2)
ggplot(us_states_disembark, aes(x = reorder(state, -total_disembarked), y = total_disembarked, fill = state)) +
  geom_col() +
  labs(
    title = "Slave Disembarkation by U.S. State",
    x = "State",
    y = "Total Slaves Disembarked",
    fill = "State"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Countries exporting slaves from Africa by decade

countries_exporting <- trans_clean %>%
 # Keep only voyages with a valid year
  filter(!is.na(year)) %>%
  # Group by decade and country
  group_by(decade, country = voyage_ship__imputed_nationality__name) %>%
  
  # Summarise number of voyages and total slaves embarked
  summarise(
    voyages = n_distinct(voyage_id),
    total_embarked = sum(slaves_embarked, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  
  # Arrange by decade and descending number of slaves
  arrange(decade, desc(total_embarked))
countries_exporting
## # A tibble: 204 × 4
##    decade country           voyages total_embarked
##     <dbl> <chr>               <int>          <dbl>
##  1   1510 Portugal / Brazil       2            624
##  2   1510 0                       9            223
##  3   1510 Spain / Uruguay         8            144
##  4   1520 Spain / Uruguay         3           1043
##  5   1520 0                       2            373
##  6   1530 0                       8           1418
##  7   1530 Portugal / Brazil       2            560
##  8   1530 Spain / Uruguay         1            224
##  9   1540 0                      23           7750
## 10   1540 Portugal / Brazil       1            160
## # ℹ 194 more rows
americas_total <- sum(voyages_clean$slaves_disembarked, na.rm = TRUE)
us_total <- sum(us_states_disembark$total_disembarked, na.rm = TRUE)
library(tibble)
pie_data <- tibble(
  region = c( "Americas (Disembarked)", "US (Disembarked)"),
  total = c(americas_total, us_total)
)
library(ggplot2)
ggplot(pie_data, aes(x = "", y = total, fill = region)) +
  geom_col(width = 1, color = "white") +    # make a bar for each slice
  coord_polar(theta = "y") +                # convert bar chart to pie
  labs(
    title = "Percent of Slaves to U.S.",
    fill = "Region"
  ) +
  theme_minimal() +
  theme(axis.title = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank())

library(dplyr)
dis_broad_summary <- voyages_clean %>%
  filter(!dis_broad %in% c("Other", "0", "Europe", "Africa")) %>%
  group_by(dis_broad) %>%
  summarise(
    total_disembarked = sum(slaves_disembarked, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  arrange(desc(total_disembarked))  # optional: sort by total
library(ggplot2)
ggplot(dis_broad_summary, aes(x = reorder(dis_broad, -total_disembarked), y = total_disembarked, fill = dis_broad)) +
  geom_col() +
  labs(
    title = "Slave Disembarkation by Broad Region",
    x = "Broad Region",
    y = "Total Slaves Disembarked",
    fill = "Region"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))  # rotate x labels for readability