Exploratory Question Analysis

Q7 - Which countries would have a dominant immigrants’ population? Visualized by waffle chart and word cloud.

Import the necessary libraries

library(tidyr) #For data manipulation and ggplot
library(dplyr) #For data manipulation
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggpubr) #For sub plotting
## Loading required package: ggplot2
library(waffle) # for plotting waffle chart
library(wordcloud) # for plotting word cloud
## Loading required package: RColorBrewer

Import the Dataset

DF <- read.csv('/Users/salahkaf/Desktop/UpdatedDF.csv') #Reading the DF
head(DF)
##          Country Continent          Region            DevName X1980 X1981 X1982
## 1    Afghanistan      Asia   Southern Asia Developing regions    16    39    39
## 2        Albania    Europe Southern Europe  Developed regions     1     0     0
## 3        Algeria    Africa Northern Africa Developing regions    80    67    71
## 4 American Samoa   Oceania       Polynesia Developing regions     0     1     0
## 5        Andorra    Europe Southern Europe  Developed regions     0     0     0
## 6         Angola    Africa   Middle Africa Developing regions     1     3     6
##   X1983 X1984 X1985 X1986 X1987 X1988 X1989 X1990 X1991 X1992 X1993 X1994 X1995
## 1    47    71   340   496   741   828  1076  1028  1378  1170   713   858  1537
## 2     0     0     0     1     2     2     3     3    21    56    96    71    63
## 3    69    63    44    69   132   242   434   491   872   795   717   595  1106
## 4     0     0     0     0     1     0     1     2     0     0     0     0     0
## 5     0     0     0     2     0     0     0     3     0     1     0     0     0
## 6     6     4     3     5     5    11     6     8    23    26    22     8    26
##   X1996 X1997 X1998 X1999 X2000 X2001 X2002 X2003 X2004 X2005 X2006 X2007 X2008
## 1  2212  2555  1999  2395  3326  4067  3697  3479  2978  3436  3009  2652  2111
## 2   113   307   574  1264  1816  1602  1021   853  1450  1223   856   702   560
## 3  2054  1842  2292  2389  2867  3418  3406  3072  3616  3626  4807  3623  4005
## 4     0     0     0     0     0     0     0     0     0     0     1     0     0
## 5     0     0     2     0     0     1     0     2     0     0     1     1     0
## 6    38    27    58    49    70   169   168   165   268   295   184   106    76
##   X2009 X2010 X2011 X2012 X2013 Total
## 1  1746  1758  2203  2635  2004 58639
## 2   716   561   539   620   603 15699
## 3  5393  4752  4325  3774  4331 69439
## 4     0     0     0     0     0     6
## 5     0     0     0     1     1    15
## 6    62    61    39    70    45  2113

Wrangling the table part 1

Q7DF <- DF[c("Country", "Total")] #Choosing these two columns only
Q7DF <- Q7DF[-c(nrow(Q7DF), nrow(Q7DF)-1),] # removing the last two rows ["Unknown" "Total"]

Plot the word cloud

Q7Word_plot <- wordcloud(words = Q7DF$Country, freq = Q7DF$Total, min.freq = 1,
          max.words=2000, random.order=FALSE, rot.per=0.35,
          colors=brewer.pal(8, "Dark2"))
## Warning in wordcloud(words = Q7DF$Country, freq = Q7DF$Total, min.freq = 1, :
## United Kingdom of Great Britain and Northern Ireland could not be fit on page.
## It will not be plotted.

Q7Word_plot
## NULL

Wrangling the table part 2

Q7DF2 <-arrange(Q7DF, desc(Total))
Q7DF2 <- Q7DF2[1:8,] #Choosing the first 8 rows >> top 8
Q7DF2$Total <- Q7DF2$Total%/%10000 # scaling down the values by a factor of 10000 to be plottable 
Q7DF2
##                                                Country Total
## 1                                                India    69
## 2                                                China    65
## 3 United Kingdom of Great Britain and Northern Ireland    55
## 4                                          Philippines    51
## 5                                             Pakistan    24
## 6                             United States of America    24
## 7                           Iran (Islamic Republic of)    17
## 8                                            Sri Lanka    14
waffle_list <- split(Q7DF2$Total, Q7DF2$Country ) # used split() to make a list that contains countries names and their values which is the total number of immigrants
waffle_list <- unlist(waffle_list) # unlist the data because waffle() doesn't support lists

Plot the waffle chart

waffle(waffle_list)

selecting individual countries for waffle chart

Q7DF3 <- filter(Q7DF, Total >100) 
Q7DF3 <- filter(Q7DF3, Country %in% c("India" ,"China", "Sudan")) # the chosen countries ## To be automated
Q7DF3$Total <- Q7DF3$Total%/%min(Q7DF3$Total)
waffle_list2 <- split(Q7DF3$Total, Q7DF3$Country ) # used split() to make a lsit that contains countries names and their values which is the total number of immigrants
waffle_list2 <- unlist(waffle_list2) # unlist the data because waffle() doesn't support lists

Plot the waffle chart for three chosen countries

Q7waffle <- waffle(waffle_list2)
Q7waffle