Descriptive Analysis Question
Q3 - How many immigrants to Canada by country from 1980-2013. Visualized by line charts, bar chart and box plot.
Import the necessary libraries
library(tidyr) #For data manipulation and ggplot
library(dplyr) #For data manipulation
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggpubr) #For sub plotting
## Loading required package: ggplot2
Import the Dataset
DF <- read.csv('/Users/salahkaf/Desktop/UpdatedDF.csv') #Reading the DF
head(DF)
## Country Continent Region DevName X1980 X1981 X1982
## 1 Afghanistan Asia Southern Asia Developing regions 16 39 39
## 2 Albania Europe Southern Europe Developed regions 1 0 0
## 3 Algeria Africa Northern Africa Developing regions 80 67 71
## 4 American Samoa Oceania Polynesia Developing regions 0 1 0
## 5 Andorra Europe Southern Europe Developed regions 0 0 0
## 6 Angola Africa Middle Africa Developing regions 1 3 6
## X1983 X1984 X1985 X1986 X1987 X1988 X1989 X1990 X1991 X1992 X1993 X1994 X1995
## 1 47 71 340 496 741 828 1076 1028 1378 1170 713 858 1537
## 2 0 0 0 1 2 2 3 3 21 56 96 71 63
## 3 69 63 44 69 132 242 434 491 872 795 717 595 1106
## 4 0 0 0 0 1 0 1 2 0 0 0 0 0
## 5 0 0 0 2 0 0 0 3 0 1 0 0 0
## 6 6 4 3 5 5 11 6 8 23 26 22 8 26
## X1996 X1997 X1998 X1999 X2000 X2001 X2002 X2003 X2004 X2005 X2006 X2007 X2008
## 1 2212 2555 1999 2395 3326 4067 3697 3479 2978 3436 3009 2652 2111
## 2 113 307 574 1264 1816 1602 1021 853 1450 1223 856 702 560
## 3 2054 1842 2292 2389 2867 3418 3406 3072 3616 3626 4807 3623 4005
## 4 0 0 0 0 0 0 0 0 0 0 1 0 0
## 5 0 0 2 0 0 1 0 2 0 0 1 1 0
## 6 38 27 58 49 70 169 168 165 268 295 184 106 76
## X2009 X2010 X2011 X2012 X2013 Total
## 1 1746 1758 2203 2635 2004 58639
## 2 716 561 539 620 603 15699
## 3 5393 4752 4325 3774 4331 69439
## 4 0 0 0 0 0 6
## 5 0 0 0 1 1 15
## 6 62 61 39 70 45 2113
Wrangling the table part 1
Q3DF <- DF[DF$Country=="Haiti",5:38] #Here "Total" is the user input, the purpose is to automate this option
Q3DF <- gather(Q3DF,Year,Total,1:34) #Transposing the dataset
Q3DF$Year<-gsub("X","",as.character(Q3DF$Year)) #Removing X from years
Q3DF #Final DF ready to be plotted
## Year Total
## 1 1980 1666
## 2 1981 3692
## 3 1982 3498
## 4 1983 2860
## 5 1984 1418
## 6 1985 1321
## 7 1986 1753
## 8 1987 2132
## 9 1988 1829
## 10 1989 2377
## 11 1990 2379
## 12 1991 2829
## 13 1992 2399
## 14 1993 3655
## 15 1994 2100
## 16 1995 2014
## 17 1996 1955
## 18 1997 1645
## 19 1998 1295
## 20 1999 1439
## 21 2000 1631
## 22 2001 2433
## 23 2002 2174
## 24 2003 1930
## 25 2004 1652
## 26 2005 1682
## 27 2006 1619
## 28 2007 1598
## 29 2008 2491
## 30 2009 2080
## 31 2010 4744
## 32 2011 6503
## 33 2012 5868
## 34 2013 4152
Plot the line graph
Q3plot_line <- ggplot(data=Q3DF, aes(x=Year, y=Total, group=1)) +
geom_line(color='darkred')+ #Draw a line
geom_point(color="Blue") + #Draw points
scale_x_discrete(breaks = seq(1980, 2013, by = 5))+ #A jump of 5 years
ggtitle("Number of Immigrants per Year") +
theme(plot.title = element_text(hjust = 0.5))+
xlab("Years") + ylab("Number of Immigrants")
Q3plot_line

Plot the bar graph
Q3plot_bar <- ggplot(data=Q3DF, aes(x=Year,y=Total)) +
geom_bar(stat= "identity", fill= "black",color="white") +
scale_x_discrete(breaks = seq(1980, 2013, by = 5)) + #A jump of 5 years
ggtitle("Number of Immigrants per Year") +
theme(plot.title = element_text(hjust = 0.5))+
xlab("Years") + ylab("Number of Immigrants")
Q3plot_bar

Wrangling the table part 2
one_country <- DF[DF$Country == "Chad"] #This must be automated (User input)
one_country <- one_country[-nrow(one_country),]
one_country <- as.data.frame(one_country)
one_country
## one_country
## 1 1758
## 2 561
## 3 4752
## 4 0
## 5 0
## 6 61
## 7 27
## 8 459
## 9 252
## 10 933
## 11 124
## 12 209
## 13 25
## 14 28
## 15 4721
## 16 121
## 17 438
## 18 363
## 19 39
## 20 290
## 21 1464
## 22 180
## 23 168
## 24 42
## 25 2598
## 26 12
## 27 556
## 28 186
## 29 529
## 30 3
## 31 200
## 32 1800
## 33 0
## 34 26
## 35 98
## 36 340
## 37 30391
## 38 623
## 39 21
## 40 5218
## 41 22
## 42 75
## 43 190
## 44 1066
## 45 91
## 46 961
## 47 18
## 48 168
## 49 45
## 50 1239
## 51 92
## 52 123
## 53 43
## 54 479
## 55 353
## 56 5982
## 57 787
## 58 4
## 59 931
## 60 28
## 61 1865
## 62 398
## 63 63
## 64 4646
## 65 64
## 66 37
## 67 126
## 68 2956
## 69 802
## 70 101
## 71 208
## 72 266
## 73 395
## 74 2
## 75 953
## 76 4744
## 77 386
## 78 354
## 79 30
## 80 34235
## 81 712
## 82 7477
## 83 5941
## 84 547
## 85 2755
## 86 434
## 87 2321
## 88 1168
## 89 1831
## 90 377
## 91 507
## 92 0
## 93 67
## 94 157
## 95 54
## 96 70
## 97 3432
## 98 7
## 99 162
## 100 384
## 101 0
## 102 57
## 103 8
## 104 178
## 105 28
## 106 802
## 107 4
## 108 170
## 109 17
## 110 0
## 111 74
## 112 1455
## 113 3865
## 114 4
## 115 169
## 116 14
## 117 6242
## 118 7
## 119 556
## 120 16
## 121 0
## 122 1392
## 123 759
## 124 0
## 125 490
## 126 94
## 127 80
## 128 3906
## 129 46
## 130 14
## 131 6811
## 132 0
## 133 65
## 134 3
## 135 89
## 136 1283
## 137 38617
## 138 795
## 139 629
## 140 18
## 141 5537
## 142 1988
## 143 1922
## 144 2288
## 145 396
## 146 22
## 147 249
## 148 434
## 149 5
## 150 1
## 151 2
## 152 330
## 153 765
## 154 271
## 155 10
## 156 63
## 157 805
## 158 135
## 159 12
## 160 1528
## 161 1238
## 162 174
## 163 4422
## 164 654
## 165 612
## 166 13
## 167 3
## 168 159
## 169 285
## 170 1039
## 171 52
## 172 499
## 173 188
## 174 354
## 175 5
## 176 915
## 177 1299
## 178 1492
## 179 30
## 180 0
## 181 216
## 182 3159
## 183 86
## 184 8724
## 185 200
## 186 8142
## 187 93
## 188 289
## 189 0
## 190 998
## 191 1942
## 192 0
## 193 211
## 194 102
## 195 494
## 196 3731
Plot the boxplot graph
Q3plot_box <- ggplot(data = one_country, aes(x = "", y = one_country)) +
geom_boxplot(fill="white") +
coord_cartesian(ylim = c(0,10000)) + # I set the y axis scale so the plot looks better.
ggtitle("Number of Immigrants distributed by a box plot") +
theme(plot.title = element_text(hjust = 0.5))+
xlab("chosen country") + ylab("Number of Immigrants")
Q3plot_box
