Descriptive Analysis Question

Q3 - How many immigrants to Canada by country from 1980-2013. Visualized by line charts, bar chart and box plot.

Import the necessary libraries

library(tidyr) #For data manipulation and ggplot
library(dplyr) #For data manipulation
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggpubr) #For sub plotting
## Loading required package: ggplot2

Import the Dataset

DF <- read.csv('/Users/salahkaf/Desktop/UpdatedDF.csv') #Reading the DF
head(DF)
##          Country Continent          Region            DevName X1980 X1981 X1982
## 1    Afghanistan      Asia   Southern Asia Developing regions    16    39    39
## 2        Albania    Europe Southern Europe  Developed regions     1     0     0
## 3        Algeria    Africa Northern Africa Developing regions    80    67    71
## 4 American Samoa   Oceania       Polynesia Developing regions     0     1     0
## 5        Andorra    Europe Southern Europe  Developed regions     0     0     0
## 6         Angola    Africa   Middle Africa Developing regions     1     3     6
##   X1983 X1984 X1985 X1986 X1987 X1988 X1989 X1990 X1991 X1992 X1993 X1994 X1995
## 1    47    71   340   496   741   828  1076  1028  1378  1170   713   858  1537
## 2     0     0     0     1     2     2     3     3    21    56    96    71    63
## 3    69    63    44    69   132   242   434   491   872   795   717   595  1106
## 4     0     0     0     0     1     0     1     2     0     0     0     0     0
## 5     0     0     0     2     0     0     0     3     0     1     0     0     0
## 6     6     4     3     5     5    11     6     8    23    26    22     8    26
##   X1996 X1997 X1998 X1999 X2000 X2001 X2002 X2003 X2004 X2005 X2006 X2007 X2008
## 1  2212  2555  1999  2395  3326  4067  3697  3479  2978  3436  3009  2652  2111
## 2   113   307   574  1264  1816  1602  1021   853  1450  1223   856   702   560
## 3  2054  1842  2292  2389  2867  3418  3406  3072  3616  3626  4807  3623  4005
## 4     0     0     0     0     0     0     0     0     0     0     1     0     0
## 5     0     0     2     0     0     1     0     2     0     0     1     1     0
## 6    38    27    58    49    70   169   168   165   268   295   184   106    76
##   X2009 X2010 X2011 X2012 X2013 Total
## 1  1746  1758  2203  2635  2004 58639
## 2   716   561   539   620   603 15699
## 3  5393  4752  4325  3774  4331 69439
## 4     0     0     0     0     0     6
## 5     0     0     0     1     1    15
## 6    62    61    39    70    45  2113

Wrangling the table part 1

Q3DF <- DF[DF$Country=="Haiti",5:38] #Here "Total" is the user input, the purpose is to automate this option
Q3DF <- gather(Q3DF,Year,Total,1:34) #Transposing the dataset 
Q3DF$Year<-gsub("X","",as.character(Q3DF$Year)) #Removing X from years
Q3DF #Final DF ready to be plotted
##    Year Total
## 1  1980  1666
## 2  1981  3692
## 3  1982  3498
## 4  1983  2860
## 5  1984  1418
## 6  1985  1321
## 7  1986  1753
## 8  1987  2132
## 9  1988  1829
## 10 1989  2377
## 11 1990  2379
## 12 1991  2829
## 13 1992  2399
## 14 1993  3655
## 15 1994  2100
## 16 1995  2014
## 17 1996  1955
## 18 1997  1645
## 19 1998  1295
## 20 1999  1439
## 21 2000  1631
## 22 2001  2433
## 23 2002  2174
## 24 2003  1930
## 25 2004  1652
## 26 2005  1682
## 27 2006  1619
## 28 2007  1598
## 29 2008  2491
## 30 2009  2080
## 31 2010  4744
## 32 2011  6503
## 33 2012  5868
## 34 2013  4152

Plot the line graph

Q3plot_line <- ggplot(data=Q3DF, aes(x=Year, y=Total, group=1)) +
  geom_line(color='darkred')+ #Draw a line
  geom_point(color="Blue") + #Draw points 
  scale_x_discrete(breaks = seq(1980, 2013, by = 5))+ #A jump of 5 years
  ggtitle("Number of Immigrants per Year") +
  theme(plot.title = element_text(hjust = 0.5))+
  xlab("Years") + ylab("Number of Immigrants")
Q3plot_line

Plot the bar graph

Q3plot_bar <- ggplot(data=Q3DF, aes(x=Year,y=Total)) +
  geom_bar(stat= "identity", fill= "black",color="white")   + 
  scale_x_discrete(breaks = seq(1980, 2013, by = 5)) +  #A jump of 5 years
  ggtitle("Number of Immigrants per Year") +
  theme(plot.title = element_text(hjust = 0.5))+
  xlab("Years") + ylab("Number of Immigrants")
Q3plot_bar

Wrangling the table part 2

one_country <- DF[DF$Country == "Chad"] #This must be automated (User input)
one_country <- one_country[-nrow(one_country),]
one_country <- as.data.frame(one_country)
one_country
##     one_country
## 1          1758
## 2           561
## 3          4752
## 4             0
## 5             0
## 6            61
## 7            27
## 8           459
## 9           252
## 10          933
## 11          124
## 12          209
## 13           25
## 14           28
## 15         4721
## 16          121
## 17          438
## 18          363
## 19           39
## 20          290
## 21         1464
## 22          180
## 23          168
## 24           42
## 25         2598
## 26           12
## 27          556
## 28          186
## 29          529
## 30            3
## 31          200
## 32         1800
## 33            0
## 34           26
## 35           98
## 36          340
## 37        30391
## 38          623
## 39           21
## 40         5218
## 41           22
## 42           75
## 43          190
## 44         1066
## 45           91
## 46          961
## 47           18
## 48          168
## 49           45
## 50         1239
## 51           92
## 52          123
## 53           43
## 54          479
## 55          353
## 56         5982
## 57          787
## 58            4
## 59          931
## 60           28
## 61         1865
## 62          398
## 63           63
## 64         4646
## 65           64
## 66           37
## 67          126
## 68         2956
## 69          802
## 70          101
## 71          208
## 72          266
## 73          395
## 74            2
## 75          953
## 76         4744
## 77          386
## 78          354
## 79           30
## 80        34235
## 81          712
## 82         7477
## 83         5941
## 84          547
## 85         2755
## 86          434
## 87         2321
## 88         1168
## 89         1831
## 90          377
## 91          507
## 92            0
## 93           67
## 94          157
## 95           54
## 96           70
## 97         3432
## 98            7
## 99          162
## 100         384
## 101           0
## 102          57
## 103           8
## 104         178
## 105          28
## 106         802
## 107           4
## 108         170
## 109          17
## 110           0
## 111          74
## 112        1455
## 113        3865
## 114           4
## 115         169
## 116          14
## 117        6242
## 118           7
## 119         556
## 120          16
## 121           0
## 122        1392
## 123         759
## 124           0
## 125         490
## 126          94
## 127          80
## 128        3906
## 129          46
## 130          14
## 131        6811
## 132           0
## 133          65
## 134           3
## 135          89
## 136        1283
## 137       38617
## 138         795
## 139         629
## 140          18
## 141        5537
## 142        1988
## 143        1922
## 144        2288
## 145         396
## 146          22
## 147         249
## 148         434
## 149           5
## 150           1
## 151           2
## 152         330
## 153         765
## 154         271
## 155          10
## 156          63
## 157         805
## 158         135
## 159          12
## 160        1528
## 161        1238
## 162         174
## 163        4422
## 164         654
## 165         612
## 166          13
## 167           3
## 168         159
## 169         285
## 170        1039
## 171          52
## 172         499
## 173         188
## 174         354
## 175           5
## 176         915
## 177        1299
## 178        1492
## 179          30
## 180           0
## 181         216
## 182        3159
## 183          86
## 184        8724
## 185         200
## 186        8142
## 187          93
## 188         289
## 189           0
## 190         998
## 191        1942
## 192           0
## 193         211
## 194         102
## 195         494
## 196        3731

Plot the boxplot graph

Q3plot_box <- ggplot(data = one_country, aes(x = "", y = one_country)) + 
  geom_boxplot(fill="white") +
  coord_cartesian(ylim = c(0,10000)) + # I set the y axis scale so the plot looks better.
  ggtitle("Number of Immigrants distributed by a box plot") +
    theme(plot.title = element_text(hjust = 0.5))+
  xlab("chosen country") + ylab("Number of Immigrants")
Q3plot_box