library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
col_types_spec <- cols_only(
id = col_integer(),
voyage_id = col_integer(),
voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year = col_double(),
voyage_slaves_numbers__imp_total_num_slaves_disembarked = col_double(),
voyage_slaves_numbers__imp_total_num_slaves_embarked = col_double(),
voyage_dates__length_middle_passage_days = col_double(),
voyage_dates__imp_length_home_to_disembark = col_double(),
voyage_crew__crew_first_landing = col_double(),
voyage_crew__crew_voyage_outset = col_double(),
voyage_ship__tonnage_mod = col_double(),
voyage_slaves_numbers__imp_jamaican_cash_price = col_double(),
voyage_slaves_numbers__imp_mortality_ratio = col_double(),
voyage_slaves_numbers__percentage_women_among_embarked_slaves = col_double(),
voyage_outcome__vessel_captured_outcome__name = col_character(),
voyage_ship__imputed_nationality__name = col_character(),
voyage_itinerary__imp_region_voyage_begin__name = col_character(),
voyage_ship__rig_of_vessel__name = col_character(),
voyage_itinerary__place_voyage_ended__name = col_character(), # Force as character
voyage_dates__slave_purchase_began_sparsedate__month = col_double(),
voyage_slaves_numbers__percentage_men = col_double(),
voyage_dates__voyage_completed_sparsedate__month = col_double(),
voyage_itinerary__region_of_return__name = col_character(),
voyage_slaves_numbers__percentage_boy = col_double(),
voyage_itinerary__imp_principal_region_slave_dis__name = col_character(),
voyage_itinerary__imp_principal_region_of_slave_purchase__name = col_character(),
voyage_dates__date_departed_africa_sparsedate__month = col_double(),
voyage_dates__voyage_began_sparsedate__month = col_double(),
voyage_itinerary__imp_port_voyage_begin__name = col_character(),
voyage_dates__first_dis_of_slaves_sparsedate__month = col_double(),
voyage_itinerary__imp_broad_region_slave_dis__name = col_character(),
voyage_slaves_numbers__percentage_girl = col_double(),
voyage_outcome__particular_outcome__name = col_character(),
voyage_itinerary__imp_principal_port_slave_dis__name = col_character(),
voyage_slaves_numbers__percentage_child = col_double(),
voyage_slaves_numbers__percentage_women = col_double(),
voyage_dates__departure_last_place_of_landing_sparsedate__month = col_double(),
voyage_outcome__outcome_owner__name = col_character(),
voyage_outcome__outcome_slaves__name = col_character(),
voyage_itinerary__imp_principal_place_of_slave_purchase__name = col_character(),
voyage_outcome__resistance__name = col_character(),
voyage_slaves_numbers__percentage_male = col_double(),
voyage_slaves_numbers__percentage_female = col_double(),
voyage_itinerary__imp_broad_region_voyage_begin__name = col_character(),
voyage_itinerary__imp_broad_region_of_slave_purchase__name = col_character(),
voyage_sources = col_character(),
enslavers = col_character()
)
trans <- read_csv("https://raw.githubusercontent.com/imowerman-prog/data-3210/refs/heads/main/Data/trans-atlantic.csv", col_types = col_types_spec)
intra <- read_csv("https://raw.githubusercontent.com/imowerman-prog/data-3210/refs/heads/main/Data/intra-american.csv", col_types = col_types_spec)
library(tidyverse)
rename_cols <- c(
year = "voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year",
slaves_disembarked = "voyage_slaves_numbers__imp_total_num_slaves_disembarked",
slaves_embarked = "voyage_slaves_numbers__imp_total_num_slaves_embarked",
middle_passage_days = "voyage_dates__length_middle_passage_days",
home_to_disembark_days = "voyage_dates__imp_length_home_to_disembark",
crew_first_landing = "voyage_crew__crew_first_landing",
crew_outset = "voyage_crew__crew_voyage_outset",
ship_tonnage = "voyage_ship__tonnage_mod",
jamaican_cash_price = "voyage_slaves_numbers__imp_jamaican_cash_price",
mortality_ratio = "voyage_slaves_numbers__imp_mortality_ratio",
dis_broad = "voyage_itinerary__imp_broad_region_slave_dis__name",
dis_port = "voyage_itinerary__place_voyage_ended__name",
outcome = "voyage_outcome__particular_outcome__name"
)
clean_voyage_data <- function(df, source_label) {
df %>%
rename(!!!rename_cols) %>%
mutate(
year = as.integer(year),
slaves_embarked = as.numeric(slaves_embarked),
slaves_disembarked = as.numeric(slaves_disembarked)
) %>%
filter(!is.na(slaves_disembarked), slaves_disembarked > 0) %>%
filter(str_detect(outcome, regex("Slaves disembarked|Voyage completed|Sold slaves", ignore_case = TRUE))) %>%
mutate(
decade = floor(year / 10) * 10,
estimated_deaths = slaves_embarked - slaves_disembarked,
is_us = dis_broad == "Mainland North America" |
dis_port %in% c("New Orleans", "Charleston", "Savannah", "Virginia", "South Carolina", "North Carolina"),
source_type = source_label
)
}
trans_clean <- clean_voyage_data(trans, "Trans-Atlantic")
intra_clean <- clean_voyage_data(intra, "Intra-American")
voyages_clean <- bind_rows(trans_clean, intra_clean)
us_total <- voyages_clean %>%
filter(is_us == TRUE) %>%
summarise(total_slaves_us = sum(slaves_disembarked, na.rm = TRUE))
us_total
## # A tibble: 1 × 1
## total_slaves_us
## <dbl>
## 1 391971
americas_total <- voyages_clean %>%
summarise(total_slaves_us = sum(slaves_disembarked, na.rm = TRUE))
americas_total
## # A tibble: 1 × 1
## total_slaves_us
## <dbl>
## 1 7054559
africa_total <- trans_clean %>%
summarise(total_embarked_africa = sum(slaves_embarked, na.rm = TRUE))
proportion_us <- us_total$total_slaves_us / africa_total$total_embarked_africa
proportion_us
## [1] 0.05268403
#Graph slave imports by decade to the US: Filter for US, group by decade, sum slaves_disembarked, plot as a bar graph with ggplot2.
library(ggplot2)
voyages_clean %>%
filter(is_us == TRUE) %>%
group_by(decade) %>%
summarise(total_imports = sum(slaves_disembarked, na.rm = TRUE)) %>%
ggplot(aes(x = decade, y = total_imports)) +
geom_col(fill = "steelblue") +
labs(
title = "Slave Imports to the US by Decade",
x = "Decade",
y = "Slaves Disembarked"
) +
theme_minimal()
#Imports to the US by decade and region/port/state: Filter for US, group by decade, dis_region, dis_port (approximate state from port/region, e.g., “New Orleans” -> “Louisiana”), sum slaves_disembarked. Use a table and faceted bar plot.
port_to_state <- c(
"Georgia, port unspecified" = "Georgia",
"Maryland, port unspecified" = "Maryland",
"Virginia, port unspecified" = "Virginia",
"New York" = "New York",
"Florida, port unspecified" = "Florida",
"New Orleans" = "Louisiana",
"York River" = "Virginia",
"Patuxent" = "Maryland",
"USA, location unspecified" = NA, # unknown, can filter later
"Charleston" = "South Carolina",
"Rappahannock" = "Virginia",
"South Carolina, place unspecified"= "South Carolina",
"Lower James River" = "Virginia",
"Upper James River" = "Virginia",
"South Potomac" = "Virginia",
"North Potomac" = "Maryland",
"Bermuda Hundred" = "Virginia",
"Annapolis" = "Maryland",
"Savannah" = "Georgia",
"Osborne's Landing" = "North Carolina",
"Newport" = "Rhode Island",
"Philadelphia" = "Pennsylvania",
"North Carolina, port unspecified" = "North Carolina",
"Eastern New Jersey" = "New Jersey",
"St. Augustine" = "Florida",
"Mississippi" = "Mississippi",
"Pensacola" = "Florida",
"Rhode Island, port unspecified" = "Rhode Island",
"Portsmouth (NH)" = "New Hampshire",
"Boston" = "Massachusetts",
"Hampton" = "Virginia",
"Perth Amboy" = "New Jersey",
"Delaware River" = "Delaware",
"Louisiana" = "Louisiana",
"Londontowne" = "Maryland",
"Biloxi" = "Mississippi",
"La Balise" = "Louisiana",
"Roanoke" = "Virginia",
"Tybee Island" = "Georgia",
"Mobile" = "Alabama",
"Middletown" = "Connecticut",
"New London" = "Connecticut",
"Beaufort" = "North Carolina",
"Portsmouth" = "New Hampshire",
"Lower Cedar Point" = "Virginia",
"Oxford" = "Maryland",
"Nottingham" = "Maryland",
"Nanjemoy" = "Maryland",
"Georgetown" = "South Carolina",
"Brunswick" = "Georgia",
"Potomac river" = "Maryland",
"Accomac" = "Virginia",
"Sunbury" = "Georgia",
"Pocomoke" = "Maryland",
"Cecil County" = "Maryland",
"New Bern" = "North Carolina",
"Piscataqua" = "New Hampshire",
"Kittery Point" = "Maine",
"Galveston" = "Texas",
"Pass Cavallo" = "Texas",
"Houston" = "Texas",
"Edenton" = "North Carolina"
)
library(dplyr)
us_ports <- voyages_clean %>%
filter(dis_broad == "Mainland North America") %>%
select(voyage_itinerary__imp_principal_port_slave_dis__name, slaves_disembarked)
us_ports <- us_ports %>%
mutate(state = recode(voyage_itinerary__imp_principal_port_slave_dis__name, !!!port_to_state, .default = "Other"))
us_states_disembark <- us_ports %>%
group_by(state) %>%
summarise(total_disembarked = sum(slaves_disembarked, na.rm = TRUE), .groups = "drop") %>%
arrange(desc(total_disembarked))
library(ggplot2)
ggplot(us_states_disembark, aes(x = reorder(state, -total_disembarked), y = total_disembarked, fill = state)) +
geom_col() +
labs(
title = "Slave Disembarkation by U.S. State",
x = "State",
y = "Total Slaves Disembarked",
fill = "State"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
countries_exporting <- trans_clean %>%
# Keep only voyages with a valid year
filter(!is.na(year)) %>%
# Group by decade and country
group_by(decade, country = voyage_ship__imputed_nationality__name) %>%
# Summarise number of voyages and total slaves embarked
summarise(
voyages = n_distinct(voyage_id),
total_embarked = sum(slaves_embarked, na.rm = TRUE),
.groups = "drop"
) %>%
# Arrange by decade and descending number of slaves
arrange(decade, desc(total_embarked))
countries_exporting
## # A tibble: 204 × 4
## decade country voyages total_embarked
## <dbl> <chr> <int> <dbl>
## 1 1510 Portugal / Brazil 2 624
## 2 1510 0 9 223
## 3 1510 Spain / Uruguay 8 144
## 4 1520 Spain / Uruguay 3 1043
## 5 1520 0 2 373
## 6 1530 0 8 1418
## 7 1530 Portugal / Brazil 2 560
## 8 1530 Spain / Uruguay 1 224
## 9 1540 0 23 7750
## 10 1540 Portugal / Brazil 1 160
## # ℹ 194 more rows
americas_total <- sum(voyages_clean$slaves_disembarked, na.rm = TRUE)
us_total <- sum(us_states_disembark$total_disembarked, na.rm = TRUE)
library(tibble)
pie_data <- tibble(
region = c( "Americas (Disembarked)", "US (Disembarked)"),
total = c(americas_total, us_total)
)
library(ggplot2)
ggplot(pie_data, aes(x = "", y = total, fill = region)) +
geom_col(width = 1, color = "white") + # make a bar for each slice
coord_polar(theta = "y") + # convert bar chart to pie
labs(
title = "Percent of Slaves to U.S.",
fill = "Region"
) +
theme_minimal() +
theme(axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank())
library(dplyr)
dis_broad_summary <- voyages_clean %>%
filter(!dis_broad %in% c("Other", "0", "Europe", "Africa")) %>%
group_by(dis_broad) %>%
summarise(
total_disembarked = sum(slaves_disembarked, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(desc(total_disembarked)) # optional: sort by total
library(ggplot2)
ggplot(dis_broad_summary, aes(x = reorder(dis_broad, -total_disembarked), y = total_disembarked, fill = dis_broad)) +
geom_col() +
labs(
title = "Slave Disembarkation by Broad Region",
x = "Broad Region",
y = "Total Slaves Disembarked",
fill = "Region"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # rotate x labels for readability