library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
col_types_spec <- cols_only(
  id = col_integer(),
  voyage_id = col_integer(),
  voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year = col_double(),
  voyage_slaves_numbers__imp_total_num_slaves_disembarked = col_double(),
  voyage_slaves_numbers__imp_total_num_slaves_embarked = col_double(),
  voyage_dates__length_middle_passage_days = col_double(),
  voyage_dates__imp_length_home_to_disembark = col_double(),
  voyage_crew__crew_first_landing = col_double(),
  voyage_crew__crew_voyage_outset = col_double(),
  voyage_ship__tonnage_mod = col_double(),
  voyage_slaves_numbers__imp_jamaican_cash_price = col_double(),
  voyage_slaves_numbers__imp_mortality_ratio = col_double(),
  voyage_slaves_numbers__percentage_women_among_embarked_slaves = col_double(),
  voyage_outcome__vessel_captured_outcome__name = col_character(),
  voyage_ship__imputed_nationality__name = col_character(),
  voyage_itinerary__imp_region_voyage_begin__name = col_character(),
  voyage_ship__rig_of_vessel__name = col_character(),
  voyage_itinerary__place_voyage_ended__name = col_character(),
  # Force as character
  voyage_dates__slave_purchase_began_sparsedate__month = col_double(),
  voyage_slaves_numbers__percentage_men = col_double(),
  voyage_dates__voyage_completed_sparsedate__month = col_double(),
  voyage_itinerary__region_of_return__name = col_character(),
  voyage_slaves_numbers__percentage_boy = col_double(),
  voyage_itinerary__imp_principal_region_slave_dis__name = col_character(),
  voyage_itinerary__imp_principal_region_of_slave_purchase__name = col_character(),
  voyage_dates__date_departed_africa_sparsedate__month = col_double(),
  voyage_dates__voyage_began_sparsedate__month = col_double(),
  voyage_itinerary__imp_port_voyage_begin__name = col_character(),
  voyage_dates__first_dis_of_slaves_sparsedate__month = col_double(),
  voyage_itinerary__imp_broad_region_slave_dis__name = col_character(),
  voyage_slaves_numbers__percentage_girl = col_double(),
  voyage_outcome__particular_outcome__name = col_character(),
  voyage_itinerary__imp_principal_port_slave_dis__name = col_character(),
  voyage_slaves_numbers__percentage_child = col_double(),
  voyage_slaves_numbers__percentage_women = col_double(),
  voyage_dates__departure_last_place_of_landing_sparsedate__month = col_double(),
  voyage_outcome__outcome_owner__name = col_character(),
  voyage_outcome__outcome_slaves__name = col_character(),
  voyage_itinerary__imp_principal_place_of_slave_purchase__name = col_character(),
  voyage_outcome__resistance__name = col_character(),
  voyage_slaves_numbers__percentage_male = col_double(),
  voyage_slaves_numbers__percentage_female = col_double(),
  voyage_itinerary__imp_broad_region_voyage_begin__name = col_character(),
  voyage_itinerary__imp_broad_region_of_slave_purchase__name = col_character(),
  voyage_sources = col_character(),
  enslavers = col_character()
)

trans <- read_csv("https://raw.githubusercontent.com/imowerman-prog/data-3210/refs/heads/main/Data/trans-atlantic.csv", 
                  col_types = col_types_spec)
intra <- read_csv("https://raw.githubusercontent.com/imowerman-prog/data-3210/refs/heads/main/Data/intra-american.csv", 
                  col_types = col_types_spec)

successful_outcomes <- c(
  "Sold slaves in Americas - subsequent fate unknown",
  "Voyage completed as intended",
  "Captured by pirates or privateers - after disembarkation",
  "Condemned - Americas after disembarkation",
  "Detained and condemned in the United States after slaves disembarked",
  "Condemned in the Americas by British after slaves disembarked",
  "Captured by pirates - slaves sold in Americas from another ship",
  "Shipwrecked or destroyed, after disembarkation",
  "Privateer captured slaves at sea and delivered for sale in America",
  "Prisoners of war stole slaves during escape and carried to port of sale",
  "Captives seized from vessel by Spanish officials and sold",
  "Captured by Dutch - after disembarkation",
  "Shipwrecked, slaves salvaged",
  "Captured by slaves, recaptured and landed slaves in the Americas"
)

trans <- trans %>%
  rename(
    year = voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year,
    slaves_embarked = voyage_slaves_numbers__imp_total_num_slaves_embarked,
    slaves_disembarked = voyage_slaves_numbers__imp_total_num_slaves_disembarked,
    particular_outcome = voyage_outcome__particular_outcome__name,
    dis_broad = voyage_itinerary__imp_broad_region_slave_dis__name,
    dis_port = voyage_itinerary__imp_principal_port_slave_dis__name,
    embark_broad = voyage_itinerary__imp_broad_region_of_slave_purchase__name,
    embark_port = voyage_itinerary__imp_principal_place_of_slave_purchase__name
  ) %>%
  mutate(
    year = as.integer(year),
    slaves_embarked = as.numeric(slaves_embarked),
    slaves_disembarked = as.numeric(slaves_disembarked)
  ) %>%
  filter(
    !is.na(slaves_disembarked),
    slaves_disembarked > 0,
    particular_outcome %in% successful_outcomes
  ) %>%
  mutate(
    decade = floor(year / 10) * 10,
    estimated_deaths = slaves_embarked - slaves_disembarked,
    is_us = dis_broad == "Mainland North America" | 
      grepl("New Orleans", dis_port, ignore.case = TRUE),
    embark_is_us = embark_broad == "Mainland North America" | 
      grepl("New Orleans", embark_port, ignore.case = TRUE),
    source_type = "Trans-Atlantic"
  )

intra <- intra %>%
  rename(
    year = voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year,
    slaves_embarked = voyage_slaves_numbers__imp_total_num_slaves_embarked,
    slaves_disembarked = voyage_slaves_numbers__imp_total_num_slaves_disembarked,
    particular_outcome = voyage_outcome__particular_outcome__name,
    dis_broad = voyage_itinerary__imp_broad_region_slave_dis__name,
    dis_port = voyage_itinerary__imp_principal_port_slave_dis__name,
    embark_broad = voyage_itinerary__imp_broad_region_of_slave_purchase__name,
    embark_port = voyage_itinerary__imp_principal_place_of_slave_purchase__name
  ) %>%
  mutate(
    year = as.integer(year),
    slaves_embarked = as.numeric(slaves_embarked),
    slaves_disembarked = as.numeric(slaves_disembarked)
  ) %>%
  filter(
    !is.na(slaves_disembarked),
    slaves_disembarked > 0,
    particular_outcome %in% successful_outcomes
  ) %>%
  mutate(
    decade = floor(year / 10) * 10,
    estimated_deaths = slaves_embarked - slaves_disembarked,
    is_us = dis_broad == "Mainland North America" | 
      grepl("New Orleans", dis_port, ignore.case = TRUE),
    embark_is_us = embark_broad == "Mainland North America" | 
      grepl("New Orleans", embark_port, ignore.case = TRUE),
    source_type = "Intra-American"
  )

combined <- bind_rows(trans, intra)

Question 1: Net slaves imported to the US (gross imports minus re-exports)

gross_us <- combined %>%
  filter(is_us == TRUE) %>%
  summarise(gross_us = sum(slaves_disembarked, na.rm = TRUE)) %>%
  pull(gross_us)

re_exports <- combined %>%
  filter(source_type == "Intra-American", embark_is_us == TRUE, is_us == FALSE) %>%
  summarise(re_exports = sum(slaves_embarked, na.rm = TRUE)) %>%
  pull(re_exports)

net_us <- gross_us - re_exports

cat("Gross slaves imported to the US:", gross_us, "\n")
## Gross slaves imported to the US: 395919
cat("Slaves re-exported from the US:", re_exports, "\n")
## Slaves re-exported from the US: 7652
cat("Net slaves retained in the US:", net_us, "\n")
## Net slaves retained in the US: 388267

Question 2: Proportion of all slaves taken from Africa (using net US)

trans_total_embarked <- trans %>%
  summarise(total_embarked = sum(slaves_embarked, na.rm = TRUE)) %>%
  pull(total_embarked)

proportion <- net_us / trans_total_embarked

cat("Proportion of all slaves taken from Africa (net US):", proportion, "\n")
## Proportion of all slaves taken from Africa (net US): 0.05107614

Question 3: Graph slave imports by decade to the US: Filter for US, group by decade, sum slaves_disembarked, plot as a bar graph with ggplot2.

us_imports_by_decade <- combined %>%
  filter(is_us == TRUE) %>%
  group_by(decade) %>%
  summarise(total_disembarked = sum(slaves_disembarked, na.rm = TRUE))

# Create the bar graph
ggplot(data = us_imports_by_decade, aes(x = factor(decade), y = total_disembarked)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  labs(
    title = "Slave Imports to the US by Decade",
    x = "Decade",
    y = "Number of Slaves Disembarked"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Question 4: Imports to the US by decade and region/port/state: Filter for US, group by decade, dis_region, dis_port (approximate state from port/region, e.g., “New Orleans” -> “Louisiana”), sum slaves_disembarked. Use a table and faceted bar plot.

# Function to approximate state from port or region
map_port_to_state <- function(port, region) {
  case_when(
    grepl("New Orleans", port, ignore.case = TRUE) ~ "Louisiana",
    grepl("Charleston|South Carolina", port, ignore.case = TRUE) | 
      region == "South Carolina" ~ "South Carolina",
    grepl("Savannah|Georgia", port, ignore.case = TRUE) | 
      region == "Georgia" ~ "Georgia",
    grepl("Virginia", port, ignore.case = TRUE) | 
      region == "Virginia" ~ "Virginia",
    grepl("Maryland|Baltimore", port, ignore.case = TRUE) | 
      region == "Maryland" ~ "Maryland",
    grepl("New York", port, ignore.case = TRUE) | 
      region == "New York" ~ "New York",
    grepl("Philadelphia|Pennsylvania", port, ignore.case = TRUE) | 
      region == "Pennsylvania" ~ "Pennsylvania",
    grepl("Rhode Island|Providence|Newport", port, ignore.case = TRUE) | 
      region == "Rhode Island" ~ "Rhode Island",
    grepl("Boston|Massachusetts", port, ignore.case = TRUE) | 
      region == "Massachusetts" ~ "Massachusetts",
    grepl("North Carolina", port, ignore.case = TRUE) | 
      region == "North Carolina" ~ "North Carolina",
    region == "Mainland North America" ~ "Other US",
    TRUE ~ "Unknown"
  )
}

# Process data for Question 4: Imports to the US by decade, region, port, and approximated state
us_imports_by_decade_region <- combined %>%
  filter(is_us == TRUE) %>%
  mutate(state = map_port_to_state(dis_port, dis_broad)) %>%
  group_by(decade, dis_broad, dis_port, state) %>%
  summarise(total_disembarked = sum(slaves_disembarked, na.rm = TRUE), .groups = "drop") %>%
  arrange(decade, state, dis_broad, dis_port)

# Output the table
print("Table of Slave Imports to the US by Decade, Region, Port, and State:")
## [1] "Table of Slave Imports to the US by Decade, Region, Port, and State:"
print(us_imports_by_decade_region)
## # A tibble: 272 × 5
##    decade dis_broad              dis_port                state total_disembarked
##     <dbl> <chr>                  <chr>                   <chr>             <dbl>
##  1   1610 Mainland North America Hampton                 Othe…                29
##  2   1620 Mainland North America Virginia, port unspeci… Virg…                 3
##  3   1630 Mainland North America Boston                  Mass…                 7
##  4   1630 Mainland North America New York                New …                53
##  5   1630 Mainland North America Virginia, port unspeci… Virg…                13
##  6   1640 Mainland North America New York                New …                69
##  7   1640 Mainland North America Virginia, port unspeci… Virg…                12
##  8   1650 Mainland North America Maryland, port unspeci… Mary…                 5
##  9   1650 Mainland North America New York                New …               477
## 10   1650 Mainland North America Virginia, port unspeci… Virg…               125
## # ℹ 262 more rows
# Create faceted bar plot
ggplot(data = us_imports_by_decade_region, aes(x = factor(decade), y = total_disembarked)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  facet_wrap(~ state, scales = "free_y") +
  labs(
    title = "Slave Imports to the US by Decade and State",
    x = "Decade",
    y = "Number of Slaves Disembarked"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    strip.text = element_text(size = 10, face = "bold"),
    panel.spacing = unit(1, "lines")
  )

Question 5: Countries participating in export from Africa, by decade: From Trans-Atlantic dataset, group by decade and voyage_ship__imputed_nationality__name (as “country”), count unique voyages or sum slaves_embarked. Display in a table.

# Process Trans-Atlantic data for Question 5: Countries exporting slaves from Africa by decade
trans_exports_by_decade_country <- trans %>%
  group_by(decade, country = voyage_ship__imputed_nationality__name) %>%
  summarise(
    unique_voyages = n_distinct(voyage_id),
    total_slaves_embarked = sum(slaves_embarked, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  arrange(decade, country)

# Output the table
print("Table of Countries Participating in Slave Exports from Africa by Decade:")
## [1] "Table of Countries Participating in Slave Exports from Africa by Decade:"
print(trans_exports_by_decade_country)
## # A tibble: 203 × 4
##    decade country           unique_voyages total_slaves_embarked
##     <dbl> <chr>                      <int>                 <dbl>
##  1   1510 0                              9                   223
##  2   1510 Spain / Uruguay                8                   144
##  3   1520 0                              2                   373
##  4   1520 Spain / Uruguay                3                  1043
##  5   1530 0                              8                  1418
##  6   1530 Portugal / Brazil              2                   560
##  7   1530 Spain / Uruguay                1                   224
##  8   1540 0                             23                  7750
##  9   1540 Portugal / Brazil              1                   160
## 10   1550 0                             27                  9390
## # ℹ 193 more rows

Question 6: Create at least 2 plots (e.g., bar for US imports by decade, faceted bar for US by decade/region).

# Function to approximate state from port or region (focused on three states)
map_port_to_state <- function(port, region) {
  case_when(
    grepl("Charleston|South Carolina", port, ignore.case = TRUE) | 
      region == "South Carolina" ~ "South Carolina",
    grepl("Virginia|Norfolk|Richmond", port, ignore.case = TRUE) | 
      region == "Virginia" ~ "Virginia",
    grepl("Rhode Island|Providence|Newport", port, ignore.case = TRUE) | 
      region == "Rhode Island" ~ "Rhode Island",
    TRUE ~ "Other" # All other ports/regions are excluded
  )
}

# Diagnostic: Check available states and decades in the filtered US data
us_data <- combined %>%
  filter(is_us == TRUE, !is.na(decade), !is.na(slaves_disembarked)) %>%
  mutate(state = map_port_to_state(dis_port, dis_broad)) %>%
  filter(state %in% c("Rhode Island", "South Carolina", "Virginia"))

print("Available States in Filtered US Data (Rhode Island, South Carolina, Virginia):")
## [1] "Available States in Filtered US Data (Rhode Island, South Carolina, Virginia):"
print(unique(us_data$state))
## [1] "Virginia"       "South Carolina" "Rhode Island"
print("Available Decades in Filtered US Data:")
## [1] "Available Decades in Filtered US Data:"
print(unique(us_data$decade))
##  [1] 1680 1670 1690 1700 1710 1720 1730 1740 1750 1760 1770 1780 1800 1790 1650
## [16] 1640 1620 1630 1660 1850
# Plot 1: Bar plot of total US slave imports by decade (unchanged, includes all US data)
us_imports_by_decade <- combined %>%
  filter(is_us == TRUE, !is.na(decade), !is.na(slaves_disembarked)) %>%
  group_by(decade) %>%
  summarise(total_disembarked = sum(slaves_disembarked, na.rm = TRUE), .groups = "drop") %>%
  complete(decade, fill = list(total_disembarked = 0)) # Ensure all decades

# Print data for Plot 1
print("Data for Plot 1 (Total US Imports by Decade):")
## [1] "Data for Plot 1 (Total US Imports by Decade):"
print(us_imports_by_decade)
## # A tibble: 26 × 2
##    decade total_disembarked
##     <dbl>             <dbl>
##  1   1610                29
##  2   1620                 3
##  3   1630                73
##  4   1640                81
##  5   1650               607
##  6   1660               459
##  7   1670              1330
##  8   1680              2317
##  9   1690              3936
## 10   1700             11284
## # ℹ 16 more rows
# Check if data is empty
if (nrow(us_imports_by_decade) == 0) {
  stop("No data available for Plot 1. Check the 'is_us' filter or 'decade' column.")
}

# Define all possible decades to ensure they appear on x-axis
all_decades <- seq(min(combined$decade, na.rm = TRUE), 
                   max(combined$decade, na.rm = TRUE), 
                   by = 10)

plot1 <- ggplot(data = us_imports_by_decade, 
                aes(x = factor(decade, levels = all_decades), y = total_disembarked)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  labs(
    title = "Total Slave Imports to the US by Decade",
    x = "Decade",
    y = "Number of Slaves Disembarked"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 10),
    axis.text.y = element_text(size = 10),
    axis.title = element_text(size = 12),
    plot.title = element_text(hjust = 0.5, face = "bold", size = 14)
  )

# Plot 2: Faceted bar plot of US slave imports by decade for Rhode Island, South Carolina, Virginia
us_imports_by_decade_state <- us_data %>%
  group_by(decade, state) %>%
  summarise(total_disembarked = sum(slaves_disembarked, na.rm = TRUE), .groups = "drop") %>%
  complete(decade, state = c("Rhode Island", "South Carolina", "Virginia"), 
           fill = list(total_disembarked = 0)) # Ensure all decades and three states

# Print data for Plot 2
print("Data for Plot 2 (Imports by Decade for Rhode Island, South Carolina, Virginia):")
## [1] "Data for Plot 2 (Imports by Decade for Rhode Island, South Carolina, Virginia):"
print(us_imports_by_decade_state)
## # A tibble: 60 × 3
##    decade state          total_disembarked
##     <dbl> <chr>                      <dbl>
##  1   1620 Rhode Island                   0
##  2   1620 South Carolina                 0
##  3   1620 Virginia                       3
##  4   1630 Rhode Island                   0
##  5   1630 South Carolina                 0
##  6   1630 Virginia                      13
##  7   1640 Rhode Island                   0
##  8   1640 South Carolina                 0
##  9   1640 Virginia                      12
## 10   1650 Rhode Island                   0
## # ℹ 50 more rows
# Check if data is empty
if (nrow(us_imports_by_decade_state) == 0) {
  stop("No data available for Plot 2. Check the 'state' filter or data for Rhode Island, South Carolina, Virginia.")
}

plot2 <- ggplot(data = us_imports_by_decade_state, 
                aes(x = factor(decade, levels = all_decades), y = total_disembarked)) +
  geom_bar(stat = "identity", fill = "darkgreen") +
  facet_wrap(~ state, scales = "free_y", ncol = 3) +
  labs(
    title = "Slave Imports to Rhode Island, South Carolina, and Virginia by Decade",
    x = "Decade",
    y = "Number of Slaves Disembarked"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
    axis.text.y = element_text(size = 8),
    axis.title = element_text(size = 10),
    strip.text = element_text(size = 10, face = "bold"),
    plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
    panel.spacing = unit(1, "lines")
  )

# Display the plots
print(plot1)

print(plot2)

Question 7: Write a summary of what you have uncovered from this assignment.

The analysis gives a clear look at how the slave trade affected the United States. It shows that the majority of slaves that came to the U.S. where not re-exported. This assignment also uncovered that only 5 percent of these slaves where from Africa which was a very surprising find. I learned that the U.S. recieved the most imports in 1800 after reflecting on the data from question 3. Lastly I have learned that slaves where imported to South carolina and Rhode Island after after Virginia. After completing this assignment I have a clear look as to how slave trade affected the United States.