Descriptive Analysis Question

Q1 - How many total immigrants to Canada from 1980 to 2013. Visualized by line and bar charts.

Import the necessary libraries

library(tidyverse) #For data manipulation and ggplot
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.1.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dplyr) #For data manipulation
library(ggpubr) #For sub plotting

Import the Dataset

DF <- read.csv('/Users/salahkaf/Desktop/UpdatedDF.csv') #Reading the DF
head(DF)
##          Country Continent          Region            DevName X1980 X1981 X1982
## 1    Afghanistan      Asia   Southern Asia Developing regions    16    39    39
## 2        Albania    Europe Southern Europe  Developed regions     1     0     0
## 3        Algeria    Africa Northern Africa Developing regions    80    67    71
## 4 American Samoa   Oceania       Polynesia Developing regions     0     1     0
## 5        Andorra    Europe Southern Europe  Developed regions     0     0     0
## 6         Angola    Africa   Middle Africa Developing regions     1     3     6
##   X1983 X1984 X1985 X1986 X1987 X1988 X1989 X1990 X1991 X1992 X1993 X1994 X1995
## 1    47    71   340   496   741   828  1076  1028  1378  1170   713   858  1537
## 2     0     0     0     1     2     2     3     3    21    56    96    71    63
## 3    69    63    44    69   132   242   434   491   872   795   717   595  1106
## 4     0     0     0     0     1     0     1     2     0     0     0     0     0
## 5     0     0     0     2     0     0     0     3     0     1     0     0     0
## 6     6     4     3     5     5    11     6     8    23    26    22     8    26
##   X1996 X1997 X1998 X1999 X2000 X2001 X2002 X2003 X2004 X2005 X2006 X2007 X2008
## 1  2212  2555  1999  2395  3326  4067  3697  3479  2978  3436  3009  2652  2111
## 2   113   307   574  1264  1816  1602  1021   853  1450  1223   856   702   560
## 3  2054  1842  2292  2389  2867  3418  3406  3072  3616  3626  4807  3623  4005
## 4     0     0     0     0     0     0     0     0     0     0     1     0     0
## 5     0     0     2     0     0     1     0     2     0     0     1     1     0
## 6    38    27    58    49    70   169   168   165   268   295   184   106    76
##   X2009 X2010 X2011 X2012 X2013 Total
## 1  1746  1758  2203  2635  2004 58639
## 2   716   561   539   620   603 15699
## 3  5393  4752  4325  3774  4331 69439
## 4     0     0     0     0     0     6
## 5     0     0     0     1     1    15
## 6    62    61    39    70    45  2113

Select a “country” or “total” as an input option

Q1DF <- DF[DF$Country=="Total",5:38] #Here "Total" is as the user input, the purpose here is to automate this option, like if he/she chooses "China, or India etc."
Q1DF
##      X1980  X1981  X1982 X1983 X1984 X1985 X1986  X1987  X1988  X1989  X1990
## 197 143137 128641 121175 89185 88272 84346 99351 152075 161585 191550 216451
##      X1991  X1992  X1993  X1994  X1995  X1996  X1997  X1998  X1999  X2000
## 197 232802 254787 256638 224382 212864 226071 216036 174195 189950 227455
##      X2001  X2002  X2003  X2004  X2005  X2006  X2007  X2008  X2009  X2010
## 197 250636 229049 221349 235822 262242 251640 236753 247244 252170 280687
##      X2011  X2012  X2013
## 197 248748 257903 259021

Transposing the subsetted dataset

Q1DF <- gather(Q1DF,Year,Total,1:34) #Transposing the dataset 
Q1DF$Year<-gsub("X","",as.character(Q1DF$Year)) #Removing X from years
Q1DF #Final DF ready to be plotted
##    Year  Total
## 1  1980 143137
## 2  1981 128641
## 3  1982 121175
## 4  1983  89185
## 5  1984  88272
## 6  1985  84346
## 7  1986  99351
## 8  1987 152075
## 9  1988 161585
## 10 1989 191550
## 11 1990 216451
## 12 1991 232802
## 13 1992 254787
## 14 1993 256638
## 15 1994 224382
## 16 1995 212864
## 17 1996 226071
## 18 1997 216036
## 19 1998 174195
## 20 1999 189950
## 21 2000 227455
## 22 2001 250636
## 23 2002 229049
## 24 2003 221349
## 25 2004 235822
## 26 2005 262242
## 27 2006 251640
## 28 2007 236753
## 29 2008 247244
## 30 2009 252170
## 31 2010 280687
## 32 2011 248748
## 33 2012 257903
## 34 2013 259021

Plotting Code - Line plot

Q1plot_line <- ggplot(data=Q1DF, aes(x=Year, y=Total, group=1)) +
  geom_line(color="darkred")+ #Draw a line
  geom_point() + #Draw points 
   scale_x_discrete(breaks = seq(1980, 2013, by = 5))+ #A jump of 5 years
   scale_y_continuous(breaks = seq(10000, 300000, by = 10000)) + #Scale y axis
  ggtitle("Number of Immigrants per Year") +
  theme(plot.title = element_text(hjust = 0.5))+
  xlab("Years") + ylab("Number of Immigrants")
Q1plot_line

###Plotting Code - Bar plot

Q1plot_bar <- ggplot(data=Q1DF, aes(x=Year,y=Total)) +
  geom_bar(stat= "identity",fill="#56B4E9") + 
  scale_x_discrete(breaks = seq(1980, 2013, by = 5)) +  #A jump of 5 years
  scale_y_continuous(breaks = seq(10000, 300000, by = 10000)) +
  ggtitle("Number of Immigrants per Year") +
  theme(plot.title = element_text(hjust = 0.5))+
  xlab("Years") + ylab("Number of Immigrants")
Q1plot_bar

For combining them in one plot (subplotting)

 A <- ggarrange(Q1plot_line, Q1plot_bar,
          labels = c("A", "B"),
           ncol = 2, nrow = 1)
A

To save the plot as PDF

# pdf("ggplot.pdf")
# print(A)  
# dev.off()