library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data <- read.csv('I-Am1.0.csv')
head(data)
library(dplyr)
data_America <- data|> filter(
(fate2==1)
)
head(data_America)
num_slave_arrived <- sum(data_America$slaarriv, na.rm = TRUE)
sprintf("Number of slave arrived in America : %d",num_slave_arrived)
## [1] "Number of slave arrived in America : 310383"
slave_sold_America <- data_America|> filter(fate==49)|>
summarize(
"Number of slaves sold in America" = sum(slaarriv, na.rm = TRUE ))
sprintf("The approximated number of slaves sold in America is %d",slave_sold_America$`Number of slaves sold in America`)
## [1] "The approximated number of slaves sold in America is 261053"
unslaved<- num_slave_arrived - slave_sold_America
unslaved
#Comments:
#The findings represents that the number of slave arrived in America is 310383 while the approximated number of slaves sold in America is 261053 and the difference is 49330 that is unslaved.
library(ggplot2)
summary_slaves<-newData|>reframe(
Slave_Status = c("Embarked","Arrived in America"),
Number_of_Slaves = c(sum(slaximp, na.rm = TRUE), sum(slaarriv, na.rm = TRUE))
)
summary_slaves
#Comments: The findings represents that there are total 455193 number of slaves who were initially embarked on ships and there are 310383 subset of slaves who successfully arrived in America. The difference between Embarked and Arrived in America is 144,810 which means there are 144,810 number of slaves who did not arrive in America.
require(scales)
## Loading required package: scales
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
ggplot(data = summary_slaves, aes(x=Slave_Status, y=Number_of_Slaves, fill=Slave_Status))+
geom_col()+
scale_y_continuous(labels = scales::comma)
#Comments: The figure represents that number of slaves are higher in those who were initially embarked on ships while there are less number of slaves who arrived in america.
slaves_Gend_dis<-newData1|>reframe(
Slave_Status = c("Male", "Females","Children"),
Number_of_Slaves = c( sum(slaarriv, na.rm = TRUE)
-round((sum(female7, na.rm = TRUE))),
round(sum(female7, na.rm = TRUE)),
round(sum(child7, na.rm = TRUE)) )
)
slaves_Gend_dis
#Comments: The findings revealed that there are 292676 male salves, 17707 female slaves and 22706 children who are slaves. And it can be seen that the males number of slaves are higher than females and children.
# Check for missing values in the dataset
missing_values <- is.na(newData)
# Calculate the percentage of missing values for each column
missing_percentage <- colMeans(missing_values) * 100
# Display the results
print("Number of missing values for each column:")
## [1] "Number of missing values for each column:"
print(colSums(missing_values))
## slaximp slaarriv
## 0 4406
print("Percentage of missing values for each column:")
## [1] "Percentage of missing values for each column:"
print(missing_percentage)
## slaximp slaarriv
## 0.00000 38.27311
#Comments: It can be seen that there are 0% slaximp and 4406(38.27311) number of slaves who are missed.
national <- c(1, 2, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 24, 25, 26, 27)
country<- c("Spain", "Uruguay", "Portugal", "Brazil", "Great Britain", "Netherlands", "U.S.A.",
"France", "Denmark", "Hanse Towns, Brandenburg", "Sweden", "Norway", "Argentina",
"Russia", "Sardinia", "Mexico", "Genoa", "Duchy of Courland", "Prussia", "Bremen")
countryData <- data.frame(national = national, own_country = country)
head(countryData)
#Comments: It can be seen that the country code with country name.
ships_data<-newData|> reframe(
Nation = c("Spain", "Uruguay", "Portugal", "Brazil", "Great Britain", "Netherlands", "U.S.A.",
"France", "Denmark", "Hanse Towns, Brandenburg", "Sweden", "Norway", "Argentina",
"Russia", "Sardinia", "Mexico", "Genoa", "Duchy of Courland", "Prussia", "Bremen", 'NA'),
Number_of_Ships = c(sum(national==1, na.rm = TRUE), sum(national==2, na.rm = TRUE),
sum(national==4, na.rm = TRUE), sum(national==5, na.rm = TRUE),
sum(national==7, na.rm = TRUE), sum(national==8, na.rm = TRUE),
sum(national==9, na.rm = TRUE), sum(national==10, na.rm = TRUE),
sum(national==11, na.rm = TRUE), sum(national==12, na.rm = TRUE),
sum(national==13, na.rm = TRUE), sum(national==14, na.rm = TRUE),
sum(national==16, na.rm = TRUE), sum(national==17, na.rm = TRUE),
sum(national==18, na.rm = TRUE), sum(national==19, na.rm = TRUE),
sum(national==24, na.rm = TRUE), sum(national==25, na.rm = TRUE),
sum(national==26, na.rm = TRUE), sum(national==27, na.rm = TRUE),
sum(is.na(national)))
)
ships_data
#Comments: The findings tell that 3905 number of ships trading slaves are registered in the Great Britain. US registered number of ships involved in slave trade are 861,which is less than Great Britain. It can also be seen that 1479 number of ships trading slaves are registered in the Spain that is on second number.
require(scales)
ggplot(data = ships_data, aes(x=Nation, y= Number_of_Ships, fill=Nation))+
geom_col()+
theme(axis.text.x = element_text(angle = 90, hjust=1))+
labs(title = "Owner of ships involved in slave trade",
y ="Number of Ships",
x = "Nation of ship")
#Comments: The graph is also representing that Great Britain number of ships are on top carrier of slaves, after that Spain and at last US. These number of ships forefully captured people and traded them in these countries.
fate =11 => Captured by British (after embarkation of slaves) fate =15 => Captured by Spanish (after embarkation of slaves) fate =51 => Captured by French (after embarkation of slaves) fate =161 => Captured by USA (after embarkation of slaves)
data|> group_by(fate)|>
summarize(
total_ship = n())|>
filter(
fate==11 | fate ==15 | fate == 51 | fate == 161
)
#Comments: It can also be seen that how many Ships with slaves captured by the US and Greater Britain, Spain, France. The results shows that there are only 4 ships with slaves captured by British, 5 ships with slave captured by Spanish, 2 ships with slaves captured by French and only 4 ships with slaves captured by USA.
This can be done using the sum() but there are na values in the column ‘slaarriv’ so we have to use na.rm=TRUE inside sum()
num_slave_arrived <- sum(newData$slaarriv, na.rm = TRUE)
sprintf("Number of slaves arrived in America using the data are %d", num_slave_arrived)
## [1] "Number of slaves arrived in America using the data are 310383"
#Comments: It can be seen that there are 310383 Number of slaves arrived in America using the data.