library(tidyverse)
col_types_spec <- cols_only(
id = col_integer(),
voyage_id = col_integer(),
voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year = col_double(),
voyage_slaves_numbers__imp_total_num_slaves_disembarked = col_double(),
voyage_slaves_numbers__imp_total_num_slaves_embarked = col_double(),
voyage_dates__length_middle_passage_days = col_double(),
voyage_dates__imp_length_home_to_disembark = col_double(),
voyage_crew__crew_first_landing = col_double(),
voyage_crew__crew_voyage_outset = col_double(),
voyage_ship__tonnage_mod = col_double(),
voyage_slaves_numbers__imp_jamaican_cash_price = col_double(),
voyage_slaves_numbers__imp_mortality_ratio = col_double(),
voyage_slaves_numbers__percentage_women_among_embarked_slaves = col_double(),
voyage_outcome__vessel_captured_outcome__name = col_character(),
voyage_ship__imputed_nationality__name = col_character(),
voyage_itinerary__imp_region_voyage_begin__name = col_character(),
voyage_ship__rig_of_vessel__name = col_character(),
voyage_itinerary__place_voyage_ended__name = col_character(),
voyage_dates__slave_purchase_began_sparsedate__month = col_double(),
voyage_slaves_numbers__percentage_men = col_double(),
voyage_dates__voyage_completed_sparsedate__month = col_double(),
voyage_itinerary__region_of_return__name = col_character(),
voyage_slaves_numbers__percentage_boy = col_double(),
voyage_itinerary__imp_principal_region_slave_dis__name = col_character(),
voyage_itinerary__imp_principal_region_of_slave_purchase__name = col_character(),
voyage_dates__date_departed_africa_sparsedate__month = col_double(),
voyage_dates__voyage_began_sparsedate__month = col_double(),
voyage_itinerary__imp_port_voyage_begin__name = col_character(),
voyage_dates__first_dis_of_slaves_sparsedate__month = col_double(),
voyage_itinerary__imp_broad_region_slave_dis__name = col_character(),
voyage_slaves_numbers__percentage_girl = col_double(),
voyage_outcome__particular_outcome__name = col_character(),
voyage_itinerary__imp_principal_port_slave_dis__name = col_character(),
voyage_slaves_numbers__percentage_child = col_double(),
voyage_slaves_numbers__percentage_women = col_double(),
voyage_dates__departure_last_place_of_landing_sparsedate__month = col_double(),
voyage_outcome__outcome_owner__name = col_character(),
voyage_outcome__outcome_slaves__name = col_character(),
voyage_itinerary__imp_principal_place_of_slave_purchase__name = col_character(),
voyage_outcome__resistance__name = col_character(),
voyage_slaves_numbers__percentage_male = col_double(),
voyage_slaves_numbers__percentage_female = col_double(),
voyage_itinerary__imp_broad_region_voyage_begin__name = col_character(),
voyage_itinerary__imp_broad_region_of_slave_purchase__name = col_character(),
voyage_sources = col_character(),
enslavers = col_character()
)
trans <- read_csv(
"https://raw.githubusercontent.com/imowerman-prog/data-3210/refs/heads/main/Data/trans-atlantic.csv",
col_types = col_types_spec
)
intra <- read_csv(
"https://raw.githubusercontent.com/imowerman-prog/data-3210/refs/heads/main/Data/intra-american.csv",
col_types = col_types_spec
)
trans_renamed <- trans %>%
rename(
year = voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year,
slaves_embarked = voyage_slaves_numbers__imp_total_num_slaves_embarked,
slaves_disembarked = voyage_slaves_numbers__imp_total_num_slaves_disembarked,
outcome_slaves = voyage_outcome__outcome_slaves__name,
outcome_particular = voyage_outcome__particular_outcome__name,
dis_broad = voyage_itinerary__imp_broad_region_slave_dis__name,
dis_region = voyage_itinerary__imp_principal_region_slave_dis__name,
dis_port = voyage_itinerary__imp_principal_port_slave_dis__name,
country = voyage_ship__imputed_nationality__name
)
intra_renamed <- intra %>%
rename(
year = voyage_dates__imp_arrival_at_port_of_dis_sparsedate__year,
slaves_embarked = voyage_slaves_numbers__imp_total_num_slaves_embarked,
slaves_disembarked = voyage_slaves_numbers__imp_total_num_slaves_disembarked,
outcome_slaves = voyage_outcome__outcome_slaves__name,
outcome_particular = voyage_outcome__particular_outcome__name,
dis_broad = voyage_itinerary__imp_broad_region_slave_dis__name,
dis_region = voyage_itinerary__imp_principal_region_slave_dis__name,
dis_port = voyage_itinerary__imp_principal_port_slave_dis__name,
country = voyage_ship__imputed_nationality__name
)
trans_clean <- trans_renamed %>%
mutate(
year = as.integer(year),
slaves_embarked = as.numeric(slaves_embarked),
slaves_disembarked = as.numeric(slaves_disembarked)
)
intra_clean <- intra_renamed %>%
mutate(
year = as.integer(year),
slaves_embarked = as.numeric(slaves_embarked),
slaves_disembarked = as.numeric(slaves_disembarked)
)
trans_clean <- trans_clean %>%
filter(!is.na(slaves_disembarked), slaves_disembarked > 0)
intra_clean <- intra_clean %>%
filter(!is.na(slaves_disembarked), slaves_disembarked > 0)
success_values <- c("Slaves disembarked", "Voyage completed", "Sold slaves")
trans_clean <- trans_renamed %>%
mutate(
year = as.integer(year),
slaves_embarked = as.numeric(slaves_embarked),
slaves_disembarked = as.numeric(slaves_disembarked)
) %>%
filter(!is.na(slaves_disembarked), slaves_disembarked > 0)
intra_clean <- intra_renamed %>%
mutate(
year = as.integer(year),
slaves_embarked = as.numeric(slaves_embarked),
slaves_disembarked = as.numeric(slaves_disembarked)
) %>%
filter(!is.na(slaves_disembarked), slaves_disembarked > 0)
trans_final <- trans_clean %>%
mutate(
decade = floor(year / 10) * 10,
estimated_deaths = slaves_embarked - slaves_disembarked,
is_us = dis_broad %in% c("Mainland North America", "North America") |
dis_region %in% c("New York", "Massachusetts", "Virginia", "Louisiana",
"South Carolina", "North Carolina", "Georgia",
"Florida", "Maryland", "Texas") |
dis_port %in% c("New Orleans", "New York", "Boston", "Charleston",
"Savannah", "Norfolk", "Mobile", "Annapolis",
"Baltimore", "Newport"),
source_type = "Trans-Atlantic"
)
intra_final <- intra_clean %>%
mutate(
decade = floor(year / 10) * 10,
estimated_deaths = slaves_embarked - slaves_disembarked,
is_us = dis_broad %in% c("Mainland North America", "North America") |
dis_region %in% c("New York", "Massachusetts", "Virginia", "Louisiana",
"South Carolina", "North Carolina", "Georgia",
"Florida", "Maryland", "Texas") |
dis_port %in% c("New Orleans", "New York", "Boston", "Charleston",
"Savannah", "Norfolk", "Mobile", "Annapolis",
"Baltimore", "Newport"),
source_type = "Intra-American"
)
combined_data <- bind_rows(trans_final, intra_final)
us_total <- combined_data %>%
filter(is_us == TRUE) %>%
summarise(total_slaves_imported_to_us = sum(slaves_disembarked, na.rm = TRUE))
us_total
## # A tibble: 1 × 1
## total_slaves_imported_to_us
## <dbl>
## 1 439667
us_total_value <- combined_data %>%
filter(is_us == TRUE) %>%
summarise(total_slaves_imported_to_us = sum(slaves_disembarked, na.rm = TRUE)) %>%
pull(total_slaves_imported_to_us)
africa_total_value <- trans_final %>%
summarise(total_taken_from_africa = sum(slaves_embarked, na.rm = TRUE)) %>%
pull(total_taken_from_africa)
proportion_africa_to_us <- tibble(
us_total = us_total_value,
total_taken_from_africa = africa_total_value,
proportion = us_total_value / africa_total_value
)
proportion_africa_to_us
## # A tibble: 1 × 3
## us_total total_taken_from_africa proportion
## <dbl> <dbl> <dbl>
## 1 439667 10575764 0.0416
us_by_decade <- combined_data %>%
filter(is_us == TRUE) %>%
group_by(decade) %>%
summarise(total_imported = sum(slaves_disembarked, na.rm = TRUE), .groups = "drop")
ggplot(data = us_by_decade, aes(x = factor(decade), y = total_imported)) +
geom_col() +
labs(
title = "Slave Imports to the United States by Decade",
x = "Decade",
y = "Total Slaves Disembarked"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
us_region_port_state <- combined_data %>%
filter(is_us == TRUE, !is.na(decade)) %>%
mutate(
state_approx = case_when(
str_detect(coalesce(dis_port, ""), regex("new orleans", ignore_case = TRUE)) ~ "Louisiana",
str_detect(coalesce(dis_port, ""), regex("new york", ignore_case = TRUE)) ~ "New York",
str_detect(coalesce(dis_port, ""), regex("boston", ignore_case = TRUE)) ~ "Massachusetts",
str_detect(coalesce(dis_port, ""), regex("charleston", ignore_case = TRUE)) ~ "South Carolina",
str_detect(coalesce(dis_port, ""), regex("savannah", ignore_case = TRUE)) ~ "Georgia",
str_detect(coalesce(dis_port, ""), regex("norfolk", ignore_case = TRUE)) ~ "Virginia",
str_detect(coalesce(dis_port, ""), regex("mobile", ignore_case = TRUE)) ~ "Alabama",
str_detect(coalesce(dis_port, ""), regex("annapolis", ignore_case = TRUE)) ~ "Maryland",
str_detect(coalesce(dis_port, ""), regex("baltimore", ignore_case = TRUE)) ~ "Maryland",
str_detect(coalesce(dis_port, ""), regex("newport", ignore_case = TRUE)) ~ "Rhode Island",
str_detect(coalesce(dis_region, ""), regex("louisiana", ignore_case = TRUE)) ~ "Louisiana",
str_detect(coalesce(dis_region, ""), regex("new york", ignore_case = TRUE)) ~ "New York",
str_detect(coalesce(dis_region, ""), regex("massachusetts", ignore_case = TRUE)) ~ "Massachusetts",
str_detect(coalesce(dis_region, ""), regex("south carolina", ignore_case = TRUE)) ~ "South Carolina",
str_detect(coalesce(dis_region, ""), regex("north carolina", ignore_case = TRUE)) ~ "North Carolina",
str_detect(coalesce(dis_region, ""), regex("georgia", ignore_case = TRUE)) ~ "Georgia",
str_detect(coalesce(dis_region, ""), regex("virginia", ignore_case = TRUE)) ~ "Virginia",
str_detect(coalesce(dis_region, ""), regex("florida", ignore_case = TRUE)) ~ "Florida",
str_detect(coalesce(dis_region, ""), regex("maryland", ignore_case = TRUE)) ~ "Maryland",
str_detect(coalesce(dis_region, ""), regex("texas", ignore_case = TRUE)) ~ "Texas",
TRUE ~ "Other/Unknown"
)
) %>%
group_by(decade, dis_region, dis_port, state_approx) %>%
summarise(total_imported = sum(slaves_disembarked, na.rm = TRUE), .groups = "drop") %>%
arrange(decade, desc(total_imported))
head(us_region_port_state, 25)
## # A tibble: 25 × 5
## decade dis_region dis_port state_approx total_imported
## <dbl> <chr> <chr> <chr> <dbl>
## 1 1610 Virginia Hampton Virginia 29
## 2 1620 Virginia Virginia, port unspecified Virginia 3
## 3 1630 New York New York New York 53
## 4 1630 Virginia Virginia, port unspecified Virginia 13
## 5 1630 Massachusetts Boston Massachusetts 7
## 6 1640 Virginia Virginia, port unspecified Virginia 435
## 7 1640 New York New York New York 69
## 8 1650 New York New York New York 477
## 9 1650 Virginia Virginia, port unspecified Virginia 469
## 10 1650 Maryland Maryland, port unspecified Maryland 5
## # ℹ 15 more rows
countries_by_decade <- trans_final %>%
filter(!is.na(decade), !is.na(country), country != "") %>%
group_by(decade, country) %>%
summarise(
voyages = n_distinct(voyage_id),
total_embarked = sum(slaves_embarked, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(decade, desc(total_embarked))
ggplot(us_by_decade, aes(x = factor(decade), y = total_imported)) +
geom_col(fill = "steelblue") +
labs(
title = "Slave Imports to the United States by Decade",
x = "Decade",
y = "Total Slaves Disembarked"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplot(head(us_region_port_state, 20), aes(x = reorder(dis_port, total_imported), y = total_imported)) +
geom_col() +
coord_flip() +
labs(
title = "Top U.S. Ports by Slave Imports",
x = "Port",
y = "Total Slaves Disembarked"
) +
theme_minimal()
In this assignment, I cleaned and analyzed data from both the Trans-Atlantic and Intra-American slave trade datasets using R and the tidyverse package. I renamed long variable names, converted key variables to numeric formats, filtered out incomplete voyages, and created new variables including decade, estimated deaths during voyages, and an indicator for whether enslaved people were disembarked in the United States. After cleaning the data, I combined the two datasets to analyze overall patterns.
The analysis showed how slave imports to the United States changed over time by decade. The results indicate that slave imports were concentrated in particular time periods and were associated with specific ports and regions. Ports such as New Orleans and other major coastal trading locations appeared frequently in the data, showing how important these areas were in the slave trade network.
The visualizations helped highlight how the number of enslaved people arriving in the United States varied across decades and locations. Additionally, examining the trans-Atlantic dataset revealed that several countries were involved in exporting enslaved people from Africa, demonstrating the international nature of the slave trade.
Overall, the analysis demonstrates the scale and geographic complexity of the slave trade. By cleaning and combining the datasets, it becomes easier to observe patterns in where enslaved people were transported, which ports were most active, and how participation by exporting countries changed over time.