Import the necessary libraries
library(tidyverse) #For data manipulation and ggplot
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.1.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr) #For Data manipulation
library(ggpubr) #For sub plotting
Wrangling the table part 1
DF <- DF[-c(196,197),] #Remove last two rows as they do not have a continent
Q2DF1 <- tapply(DF$Total,DF$Continent,sum) #Sum total immigrants per continent
Q2DF1 <- as.data.frame.table(Q2DF1) #Convert it to a data frame
Q2DF1
## Var1 Freq
## 1 Africa 618948
## 2 Asia 3317794
## 3 Europe 1410947
## 4 Latin America and the Caribbean 765148
## 5 Northern America 241142
## 6 Oceania 55174
Wrangling the table part 2
names <- c("Africa", "Asia", "Europe", "Latin America and the Caribbean","Northern America","Oceania") #Names of the continents
percentage <- round(Q2DF1$Freq/sum(Q2DF1$Freq)*100,2) #Count the percentage of each Continent
lebals <- paste(names, percentage) # add percents to labels
lebals <- paste(lebals,"%",sep="") # add % to labels
Q2DF1$Continent <- lebals #Add the labels as a new column "Continent"
Q2DF1<- Q2DF1[,-1] #Remove the old column of names
names(Q2DF1)[1] <- "Total" #Rename Freq to Total
Q2DF1 #Final DF for part 1
## Total Continent
## 1 618948 Africa 9.66%
## 2 3317794 Asia 51.77%
## 3 1410947 Europe 22.01%
## 4 765148 Latin America and the Caribbean 11.94%
## 5 241142 Northern America 3.76%
## 6 55174 Oceania 0.86%
Plot the Pie Chart
Q2plot_Pie <- ggplot(Q2DF1, aes(x="", y=Total, fill=Continent)) +
geom_bar(stat="identity", width=1, color="White") +
coord_polar("y", start=0) +
ggtitle("Percentage of Immigrants per Continent [1980-2013]") +
theme(plot.title = element_text(hjust = 0.5))+
theme_void() # remove background, grid, numeric labels
Q2plot_Pie

Line plot of [Total number of immigrants per continant from 1980 to 2013]
Creating a suitable DF for the line plot - Part A
Continent <- c("Africa","Asia","Europe","Latin America and the Caribbean","Northern America","Oceania")
Q2DF2 <- data.frame(Continent)
Q2DF2
## Continent
## 1 Africa
## 2 Asia
## 3 Europe
## 4 Latin America and the Caribbean
## 5 Northern America
## 6 Oceania
years <- names(DF[,-c(1,2,3,4,39)]) #Keeping only the years columns
# looping through each column and sum number of immigrants per continent
for (col in years) {
summation <- tapply(DF[,col],DF$Continent,sum) #Sum total immigrants per continent for each year
Q2DF2[col] <- summation
}
Q2DF2
## Continent X1980 X1981 X1982 X1983 X1984 X1985 X1986
## 1 Africa 3951 4363 3819 2671 2639 2650 3782
## 2 Asia 31025 34314 30214 24696 27274 23850 28739
## 3 Europe 39760 44802 42720 24638 22287 20844 24370
## 4 Latin America and the Caribbean 13081 15215 16769 15427 13678 15171 21179
## 5 Northern America 9378 10030 9074 7100 6661 6543 7074
## 6 Oceania 1942 1839 1675 1018 878 920 904
## X1987 X1988 X1989 X1990 X1991 X1992 X1993 X1994 X1995 X1996 X1997 X1998
## 1 7494 7552 9894 11012 14228 19242 16531 13072 14806 15700 14471 13791
## 2 43203 47454 60256 72829 89964 90752 98737 88852 90424 103030 106383 90929
## 3 46698 54726 60893 68301 57938 64123 62937 62531 55764 55642 48841 36719
## 4 28471 21924 25060 27942 36827 37853 33840 21341 20262 18645 17174 13830
## 5 7705 6469 6790 5895 6057 6846 7438 5902 4891 5516 4753 4437
## 6 1200 1181 1539 2075 2495 2871 2566 1967 1565 1552 1263 1021
## X1999 X2000 X2001 X2002 X2003 X2004 X2005 X2006 X2007 X2008 X2009
## 1 15996 20346 24292 22710 23366 28192 27523 29188 28284 29890 34534
## 2 106844 134544 148083 137653 131769 134850 159253 149054 133459 139894 141434
## 3 35639 38215 42779 36798 34556 38082 35955 33053 33495 34692 35078
## 4 15088 16898 20067 19317 20263 22181 24747 24676 26011 26547 26867
## 5 5196 5433 5604 4948 5543 6990 8394 9613 9463 10190 8995
## 6 1055 1276 1818 1685 1800 1788 1585 1473 1693 1834 1860
## X2010 X2011 X2012 X2013
## 1 40892 35441 38083 38543
## 2 163845 146894 152218 155075
## 3 33425 26778 29177 28691
## 4 28818 27856 27173 24950
## 5 8142 7677 7892 8503
## 6 1834 1548 1679 1775
Creating a suitable DF for the line plot - Part B
Q2DF2 <- Q2DF2[Q2DF2$Continent=="Africa",2:35] #Here "Africa" is the user input, the purpose is to automate this option
Q2DF2
## X1980 X1981 X1982 X1983 X1984 X1985 X1986 X1987 X1988 X1989 X1990 X1991 X1992
## 1 3951 4363 3819 2671 2639 2650 3782 7494 7552 9894 11012 14228 19242
## X1993 X1994 X1995 X1996 X1997 X1998 X1999 X2000 X2001 X2002 X2003 X2004 X2005
## 1 16531 13072 14806 15700 14471 13791 15996 20346 24292 22710 23366 28192 27523
## X2006 X2007 X2008 X2009 X2010 X2011 X2012 X2013
## 1 29188 28284 29890 34534 40892 35441 38083 38543
The steps of line plot is repeated as before in Q1
Transposing the subsetted dataset
Q2DF2 <- gather(Q2DF2,Year,Total,1:34) #Transposing the dataset
Q2DF2$Year <- gsub("X","",as.character(Q2DF2$Year)) #Removing X from years
Q2DF2 #Final DF ready to be plotted
## Year Total
## 1 1980 3951
## 2 1981 4363
## 3 1982 3819
## 4 1983 2671
## 5 1984 2639
## 6 1985 2650
## 7 1986 3782
## 8 1987 7494
## 9 1988 7552
## 10 1989 9894
## 11 1990 11012
## 12 1991 14228
## 13 1992 19242
## 14 1993 16531
## 15 1994 13072
## 16 1995 14806
## 17 1996 15700
## 18 1997 14471
## 19 1998 13791
## 20 1999 15996
## 21 2000 20346
## 22 2001 24292
## 23 2002 22710
## 24 2003 23366
## 25 2004 28192
## 26 2005 27523
## 27 2006 29188
## 28 2007 28284
## 29 2008 29890
## 30 2009 34534
## 31 2010 40892
## 32 2011 35441
## 33 2012 38083
## 34 2013 38543
#### Plotting Code - Line plot
Q2plot_line <- ggplot(data=Q2DF2, aes(x=Year, y=Total, group=1)) +
geom_line(color = "Green")+ #Draw a line
geom_point() + #Draw points
scale_x_discrete(breaks = seq(1980, 2013, by = 5))+ #A jump of 5 years
scale_y_continuous(breaks = seq(10000, 50000, by = 5000)) +
ggtitle("Number of Immigrants per Year") +
theme(plot.title = element_text(hjust = 0.5))+
xlab("Years") + ylab("Number of Immigrants")
Q2plot_line
