# Load required library
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
col_types_spec <- cols_only(
  id = col_integer(),
  voyage_id = col_integer(),
  voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year = col_double(),
  voyage_slaves_numbers__imp_total_num_slaves_disembarked = col_double(),
  voyage_slaves_numbers__imp_total_num_slaves_embarked = col_double(),
  voyage_dates__length_middle_passage_days = col_double(),
  voyage_dates__imp_length_home_to_disembark = col_double(),
  voyage_crew__crew_first_landing = col_double(),
  voyage_crew__crew_voyage_outset = col_double(),
  voyage_ship__tonnage_mod = col_double(),
  voyage_slaves_numbers__imp_jamaican_cash_price = col_double(),
  voyage_slaves_numbers__imp_mortality_ratio = col_double(),
  voyage_slaves_numbers__percentage_women_among_embarked_slaves = col_double(),
  voyage_outcome__vessel_captured_outcome__name = col_character(),
  voyage_ship__imputed_nationality__name = col_character(),
  voyage_itinerary__imp_region_voyage_begin__name = col_character(),
  voyage_ship__rig_of_vessel__name = col_character(),
  voyage_itinerary__place_voyage_ended__name = col_character(),  # Force as character
  voyage_dates__slave_purchase_began_sparsedate__month = col_double(),
  voyage_slaves_numbers__percentage_men = col_double(),
  voyage_dates__voyage_completed_sparsedate__month = col_double(),
  voyage_itinerary__region_of_return__name = col_character(),
  voyage_slaves_numbers__percentage_boy = col_double(),
  voyage_itinerary__imp_principal_region_slave_dis__name = col_character(),
  voyage_itinerary__imp_principal_region_of_slave_purchase__name = col_character(),
  voyage_dates__date_departed_africa_sparsedate__month = col_double(),
  voyage_dates__voyage_began_sparsedate__month = col_double(),
  voyage_itinerary__imp_port_voyage_begin__name = col_character(),
  voyage_dates__first_dis_of_slaves_sparsedate__month = col_double(),
  voyage_itinerary__imp_broad_region_slave_dis__name = col_character(),
  voyage_slaves_numbers__percentage_girl = col_double(),
  voyage_outcome__particular_outcome__name = col_character(),
  voyage_itinerary__imp_principal_port_slave_dis__name = col_character(),
  voyage_slaves_numbers__percentage_child = col_double(),
  voyage_slaves_numbers__percentage_women = col_double(),
  voyage_dates__departure_last_place_of_landing_sparsedate__month = col_double(),
  voyage_outcome__outcome_owner__name = col_character(),
  voyage_outcome__outcome_slaves__name = col_character(),
  voyage_itinerary__imp_principal_place_of_slave_purchase__name = col_character(),
  voyage_outcome__resistance__name = col_character(),
  voyage_slaves_numbers__percentage_male = col_double(),
  voyage_slaves_numbers__percentage_female = col_double(),
  voyage_itinerary__imp_broad_region_voyage_begin__name = col_character(),
  voyage_itinerary__imp_broad_region_of_slave_purchase__name = col_character(),
  voyage_sources = col_character(),
  enslavers = col_character()
)
# Load the datasets
trans <- read_csv("https://raw.githubusercontent.com/imowerman-prog/data-3210/refs/heads/main/Data/trans-atlantic.csv", col_types = col_types_spec)
intra <- read_csv("https://raw.githubusercontent.com/imowerman-prog/data-3210/refs/heads/main/Data/intra-american.csv", col_types = col_types_spec)
#Rename long column names for readability
clean_data <- function(df, source_type){
  df %>%
    rename(
      year = voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year,
      slaves_embarked = voyage_slaves_numbers__imp_total_num_slaves_embarked,
      slaves_disembarked = voyage_slaves_numbers__imp_total_num_slaves_disembarked,
      dis_broad = voyage_itinerary__imp_broad_region_slave_dis__name,
      dis_region = voyage_itinerary__imp_principal_region_slave_dis__name,
      dis_port = voyage_itinerary__imp_principal_port_slave_dis__name,
      outcome = voyage_outcome__outcome_slaves__name,
      nationality = voyage_ship__imputed_nationality__name
    ) %>%
    filter(!is.na(slaves_disembarked), slaves_disembarked > 0) %>%
    mutate(
      year = as.integer(year),
      decade = floor(year/10)*10,
      estimated_deaths = slaves_embarked - slaves_disembarked,
      is_us = dis_broad == "Mainland North America" |
        str_detect(tolower(dis_port), "new orleans|charleston|savannah|norfolk|new york|philadelphia|boston"),
      source = source_type
    )
}
trans_clean <- clean_data(trans, "Trans-Atlantic")
intra_clean <- clean_data(intra, "Intra-American")
combined <- bind_rows(trans_clean, intra_clean)
us_total <- combined %>%
  filter(is_us) %>%
  summarise(total = sum(slaves_disembarked, na.rm=TRUE))
us_total
## # A tibble: 1 × 1
##    total
##    <dbl>
## 1 439667
total_africa <- sum(trans_clean$slaves_embarked, na.rm=TRUE)
us_from_africa <- sum(trans_clean$slaves_disembarked[trans_clean$is_us], na.rm=TRUE)
us_from_africa / total_africa
## [1] 0.02896453
us_by_decade <- combined %>%
  filter(is_us) %>%
  group_by(decade) %>%
  summarise(total = sum(slaves_disembarked, na.rm=TRUE))
ggplot(us_by_decade, aes(x=factor(decade), y=total)) +
  geom_col(fill="steelblue") +
  labs(title="US Slave Imports by Decade", x="Decade", y="Slaves Disembarked")

countries <- trans_clean %>%
  group_by(decade, nationality) %>%
  summarise(total_embarked = sum(slaves_embarked, na.rm=TRUE), .groups="drop") %>%
  arrange(decade, desc(total_embarked))
head(countries, 20)
## # A tibble: 20 × 3
##    decade nationality       total_embarked
##     <dbl> <chr>                      <dbl>
##  1   1510 Portugal / Brazil            624
##  2   1510 0                            223
##  3   1510 Spain / Uruguay              144
##  4   1520 Spain / Uruguay             1043
##  5   1520 0                            597
##  6   1530 0                           1777
##  7   1530 Portugal / Brazil            919
##  8   1530 Spain / Uruguay              224
##  9   1540 0                          19385
## 10   1540 Portugal / Brazil            160
## 11   1550 0                          16949
## 12   1550 Portugal / Brazil            718
## 13   1560 0                          11791
## 14   1560 Great Britain               1749
## 15   1560 Spain                        400
## 16   1560 Spain / Uruguay              295
## 17   1560 Portugal / Brazil            176
## 18   1570 0                          29608
## 19   1570 Portugal / Brazil            856
## 20   1570 France                       104