Exploratory Question Analysis
Q7 - Which countries would have a dominant immigrants’ population? Visualized by waffle chart and word cloud.
Import the necessary libraries
library(tidyr) #For data manipulation and ggplot
library(dplyr) #For data manipulation
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggpubr) #For sub plotting
## Loading required package: ggplot2
library(waffle) # for plotting waffle chart
library(wordcloud) # for plotting word cloud
## Loading required package: RColorBrewer
Import the Dataset
DF <- read.csv('/Users/salahkaf/Desktop/UpdatedDF.csv') #Reading the DF
head(DF)
## Country Continent Region DevName X1980 X1981 X1982
## 1 Afghanistan Asia Southern Asia Developing regions 16 39 39
## 2 Albania Europe Southern Europe Developed regions 1 0 0
## 3 Algeria Africa Northern Africa Developing regions 80 67 71
## 4 American Samoa Oceania Polynesia Developing regions 0 1 0
## 5 Andorra Europe Southern Europe Developed regions 0 0 0
## 6 Angola Africa Middle Africa Developing regions 1 3 6
## X1983 X1984 X1985 X1986 X1987 X1988 X1989 X1990 X1991 X1992 X1993 X1994 X1995
## 1 47 71 340 496 741 828 1076 1028 1378 1170 713 858 1537
## 2 0 0 0 1 2 2 3 3 21 56 96 71 63
## 3 69 63 44 69 132 242 434 491 872 795 717 595 1106
## 4 0 0 0 0 1 0 1 2 0 0 0 0 0
## 5 0 0 0 2 0 0 0 3 0 1 0 0 0
## 6 6 4 3 5 5 11 6 8 23 26 22 8 26
## X1996 X1997 X1998 X1999 X2000 X2001 X2002 X2003 X2004 X2005 X2006 X2007 X2008
## 1 2212 2555 1999 2395 3326 4067 3697 3479 2978 3436 3009 2652 2111
## 2 113 307 574 1264 1816 1602 1021 853 1450 1223 856 702 560
## 3 2054 1842 2292 2389 2867 3418 3406 3072 3616 3626 4807 3623 4005
## 4 0 0 0 0 0 0 0 0 0 0 1 0 0
## 5 0 0 2 0 0 1 0 2 0 0 1 1 0
## 6 38 27 58 49 70 169 168 165 268 295 184 106 76
## X2009 X2010 X2011 X2012 X2013 Total
## 1 1746 1758 2203 2635 2004 58639
## 2 716 561 539 620 603 15699
## 3 5393 4752 4325 3774 4331 69439
## 4 0 0 0 0 0 6
## 5 0 0 0 1 1 15
## 6 62 61 39 70 45 2113
Wrangling the table part 1
Q7DF <- DF[c("Country", "Total")] #Choosing these two columns only
Q7DF <- Q7DF[-c(nrow(Q7DF), nrow(Q7DF)-1),] # removing the last two rows ["Unknown" "Total"]
Plot the word cloud
Q7Word_plot <- wordcloud(words = Q7DF$Country, freq = Q7DF$Total, min.freq = 1,
max.words=2000, random.order=FALSE, rot.per=0.35,
colors=brewer.pal(8, "Dark2"))
## Warning in wordcloud(words = Q7DF$Country, freq = Q7DF$Total, min.freq = 1, :
## United Kingdom of Great Britain and Northern Ireland could not be fit on page.
## It will not be plotted.

Q7Word_plot
## NULL
Wrangling the table part 2
Q7DF2 <-arrange(Q7DF, desc(Total))
Q7DF2 <- Q7DF2[1:8,] #Choosing the first 8 rows >> top 8
Q7DF2$Total <- Q7DF2$Total%/%10000 # scaling down the values by a factor of 10000 to be plottable
Q7DF2
## Country Total
## 1 India 69
## 2 China 65
## 3 United Kingdom of Great Britain and Northern Ireland 55
## 4 Philippines 51
## 5 Pakistan 24
## 6 United States of America 24
## 7 Iran (Islamic Republic of) 17
## 8 Sri Lanka 14
waffle_list <- split(Q7DF2$Total, Q7DF2$Country ) # used split() to make a list that contains countries names and their values which is the total number of immigrants
waffle_list <- unlist(waffle_list) # unlist the data because waffle() doesn't support lists
Plot the waffle chart
waffle(waffle_list)

selecting individual countries for waffle chart
Q7DF3 <- filter(Q7DF, Total >100)
Q7DF3 <- filter(Q7DF3, Country %in% c("India" ,"China", "Sudan")) # the chosen countries ## To be automated
Q7DF3$Total <- Q7DF3$Total%/%min(Q7DF3$Total)
waffle_list2 <- split(Q7DF3$Total, Q7DF3$Country ) # used split() to make a lsit that contains countries names and their values which is the total number of immigrants
waffle_list2 <- unlist(waffle_list2) # unlist the data because waffle() doesn't support lists
Plot the waffle chart for three chosen countries
Q7waffle <- waffle(waffle_list2)
Q7waffle
