Discussion thread created by : Subhalaxmi Rout
People who have migrated across the countries all over the world and it was prepared and published by United Nation. Each of the origin countries, where the migrants are coming from is presented in each column and each of the destination countries, where the migrants are going to is represented in each row.The file contains a bunch of worksheets to include different years and data broken down by total / male / female. But I’m going to import ‘Table 16’ which contains the total migrants data for 2015 for this post.
Link: Dataset link
#install.packages("dplyr")
#install.packages("tidyr")
#install.packages("ggplot2")
#install.packages("DT")
library(dplyr)##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Data is stored in the Github and loaded data from Github to Rstudio using read.csv() method.
# read csv file data in a variable using read.csv and skip first 15 rows
data <- read.csv('https://raw.githubusercontent.com/SubhalaxmiRout002/Data-607-Project-2-Dataset-1/master/US_immigrants_data2.csv', skip = 16, header = TRUE, fileEncoding = "Latin1",check.names = F,stringsAsFactors = F)
# convert data to data frame
data <- data.frame(data)
# display data using datatable
datatable(data,options = list(scrollX = TRUE, paging=TRUE,fixedHeader=TRUE))There are many pair of contries people did not migratate, so remove those contries.
This part shows relation between origin country and desitnation country. The darkred color country has low imigrants and the blue color country has more number of immirants.
Note: Due to filter immigrants count shows in million.
# stored data in data1
data1 <- data.frame(data1)
# plot heatmap using ggplot()
ggplot(data = data1, mapping = aes(x = destination_country, y = origin_country)) +
geom_tile(aes(fill = migrants)) + scale_fill_gradient(low = "darkred", high = "midnightblue") +
theme(panel.grid.major.x=element_blank(),
panel.grid.minor.x=element_blank(),
panel.grid.major.y=element_blank(),
panel.grid.minor.y=element_blank(),
panel.background=element_rect(fill="gray90"),
axis.text.x = element_text(angle=90, hjust = 1,vjust=1,size = 12,face = "bold"),
plot.title = element_text(size=20,face="bold"),
axis.text.y = element_text(size = 12,face = "bold")) +
ggtitle("Migration above 1M (million)") +
theme(plot.title = element_text(hjust = 0.5))# filter with destination country
data2 <- data.frame(data) %>% filter(data$destination_country == "United States of America")
# order by desc with immigrants
data2 <- data2 %>% arrange(desc(migrants))
# display data using datatable
datatable(data2,options = list(scrollX = TRUE, paging=TRUE,fixedHeader=TRUE))# stored top 20 top 20 country, people mostly migrated to United States, stored data in var (data2)
data2 <- head(data2, 20)
# plot bar graph using ggplot()
ggplot(data = data2, mapping = aes(x = origin_country, y = migrants)) +
geom_bar(aes(reorder(origin_country,migrants),migrants),stat = "identity",fill = "steelblue") +
coord_flip() + xlab("Origin Country") + ylab("#imigrants") +
ggtitle("Top 20 Countries - people migranted to United States") +
theme(plot.title = element_text(hjust = 0.5),panel.background = element_rect(fill = "white", color = NA)) +
geom_text(aes( y = migrants,label=migrants), hjust = -0.20, color="black", size=3.5)# filter with origin country
data3 <- data.frame(data) %>% filter(data$origin_country == "United.States.of.America")
# order by desc with immigrants
data3 <- data3 %>% arrange(desc(migrants))
# display data using datatable
datatable(data3,options = list(scrollX = TRUE, paging=TRUE,fixedHeader=TRUE))# top 20 contries where Americans mostly migrated, stored data in var (data3)
data3 <- head(data3, 20)
ggplot(data = data3, mapping = aes(x = destination_country, y = migrants)) +
geom_bar(aes(reorder(destination_country,migrants),migrants),stat = "identity",fill = "steelblue") +
coord_flip() + xlab("Destination Country") + ylab("#imigrants") +
ggtitle("Top 20 Countries - Americans migranted") +
theme(plot.title = element_text(hjust = 0.5),panel.background = element_rect(fill = "white", color = NA)) +
geom_text(aes( y = migrants,label=migrants), hjust = -0.20, color="black", size=3.5)The data set contains 232 columns, using differnt method of tydr, dplyr converted those colums to row. Applied filter condition to get below analysis.