1 Data

In this report, we use data from the famous http://www.gapminder.org website. Let’s look at some random rows of this dataframe:

library(gapminder)
data(gapminder)
gapminder %>% sample_n(10) %>% kable
country continent year lifeExp pop gdpPercap
Czech Republic Europe 1987 72 10311597 16310
Sweden Europe 1982 76 8325260 20667
Japan Asia 1992 79 124329269 26825
Cote d’Ivoire Africa 1957 42 3300000 1501
Malaysia Asia 2007 74 24821286 12452
Burundi Africa 1977 46 3834415 556
United States Americas 1982 75 232187835 25010
Netherlands Europe 1972 74 13329874 18795
Yemen, Rep. Asia 2007 63 22211743 2281
Serbia Europe 1952 58 6860147 3581

Here we see how many different values we have per column:

df1 <- gapminder %>% sapply(n_distinct) %>% t %>% t
data.frame(Variable = row.names(df1), Values = df1[,1],row.names = NULL) %>% kable
Variable Values
country 142
continent 5
year 12
lifeExp 1626
pop 1704
gdpPercap 1704

Here is how many countries we have per continent:

gapminder %>% group_by(continent) %>% summarise("Number of Countries" = n_distinct(country)) %>% kable
continent Number of Countries
Africa 52
Americas 25
Asia 33
Europe 30
Oceania 2

In Oceania, we only have Australia and New Zealand.

2 Population Growth

First, we are going to look at the Evolution of Total Population per continent:

library(highcharter)
PG <- gapminder %>% group_by(continent, year) %>% summarise("pop" = sum(as.numeric(pop)))

hchart(PG, "line", hcaes(x = year, y = pop, group = continent)) %>%
   hc_tooltip(shared=TRUE, table=TRUE)
# library(plotly)
# p <- ggplot(PG, aes(x=year, y=pop, color=continent)) +
#    geom_line(size=1)
# ggplotly(p)

The population growth in Asia is striking! Africa’s recent growth and Europe’s stagnation is also visible.

We can also display the stacked values in order to see the total population:

PG$continent <- factor(PG$continent,levels = rev(c("Oceania","Europe","Americas","Africa","Asia")))

hchart(PG,"area",hcaes(x=year,y=pop,group=continent)) %>%
   hc_tooltip(shared=TRUE, table=TRUE,crosshairs=TRUE) %>%
   hc_plotOptions(area= list(stacking="normal"))
# library(streamgraph)
# streamgraph(PG, "continent", "pop", "year", offset="zero") %>%
#   sg_fill_brewer("Blues")

3 Life Expectancy

First, let’s look at the life expectancy. The ribbons of the following figure represent the 25- and 75-percentile for each continent over time.

LE <- gapminder %>% group_by(continent, year) %>% summarise(LE = mean(lifeExp),
                                                            low = quantile(lifeExp,0.25),
                                                            high = quantile(lifeExp,0.75))

hchart(LE,"arearange", hcaes(group=continent)) %>%
   hc_xAxis(categories = LE$year) %>%
   hc_tooltip(shared=TRUE,crosshairs=TRUE)
# ggplot(LE, aes(x=year, y=LE, fill=continent)) +
#   geom_line(aes(color=continent), size=1) +
#   geom_ribbon(aes(ymin=low, ymax=high), alpha=0.5) +
#   scale_fill_brewer(palette = 'Spectral') +
#   scale_color_brewer(palette = 'Spectral')

Note that the ribbon for Oceania is very narrow because there are only 2 Countries in this category. We notice that LE in Asia increased a lot, while LE in Africa started decreasing in the mid 1980s.

4 GDP per capita

Now we are going to investigate the GDP per capita

# remove Kuwait because very odd values prior to 1970s
gapminder %>% filter(!(country %in% "Kuwait")) %>%
   hchart("line",hcaes(x=year,y=gdpPercap,group=country, color=continent)) %>%
   hc_legend(enabled=FALSE) %>%
   hc_yAxis(title=list(text="GDP per capita"))

Too crowded, let’s look at the grouped data:

GDP <- gapminder %>% group_by(continent, year) %>% summarise(gdp = mean(gdpPercap),
                                                            low = quantile(gdpPercap,0.25) %>% round(),
                                                            high = quantile(gdpPercap,0.75) %>% round())

hchart(GDP,"arearange", hcaes(group=continent)) %>%
   hc_xAxis(categories = GDP$year) %>%
   hc_tooltip(shared=TRUE,crosshairs=TRUE)

From 1987 onwards, especially Europe and Asia are very large. Let’s cut these groups in two.

GDP1 <- gapminder %>% filter(year>1985) %>% filter(continent %in% c("Asia","Europe")) %>%
   group_by(continent,country) %>% summarize(avg=mean(gdpPercap)) %>% arrange(avg) %>% mutate(g=ifelse(avg<1e4,"low","high")) %>% ungroup() %>% mutate(group=paste(continent,g))

g4 <- GDP1 %>% group_by(group) %>% select(country) %>% ungroup()
g4$country <- as.character(g4$country)

x <- list()
for(i in unique(g4$group)) {
   x[[i]] <- g4 %>% filter(group==i) %>% select(country) %>% distinct(country) %>% pull
}
x
## $`Asia low`
##  [1] "Myanmar"            "Afghanistan"        "Cambodia"          
##  [4] "Nepal"              "Bangladesh"         "Vietnam"           
##  [7] "India"              "Pakistan"           "Yemen, Rep."       
## [10] "Mongolia"           "Korea, Dem. Rep."   "Philippines"       
## [13] "China"              "Indonesia"          "Sri Lanka"         
## [16] "Syria"              "Jordan"             "West Bank and Gaza"
## [19] "Thailand"           "Iraq"               "Lebanon"           
## [22] "Iran"               "Malaysia"          
## 
## $`Europe low`
## [1] "Albania"                "Bosnia and Herzegovina"
## [3] "Turkey"                 "Bulgaria"              
## [5] "Montenegro"             "Romania"               
## 
## $`Europe high`
##  [1] "Serbia"          "Poland"          "Croatia"        
##  [4] "Slovak Republic" "Hungary"         "Czech Republic" 
##  [7] "Portugal"        "Slovenia"        "Greece"         
## [10] "Spain"           "Italy"           "Finland"        
## [13] "Ireland"         "France"          "United Kingdom" 
## [16] "Sweden"          "Belgium"         "Germany"        
## [19] "Iceland"         "Austria"         "Denmark"        
## [22] "Netherlands"     "Switzerland"     "Norway"         
## 
## $`Asia high`
##  [1] "Korea, Rep."      "Taiwan"           "Oman"            
##  [4] "Israel"           "Saudi Arabia"     "Bahrain"         
##  [7] "Japan"            "Hong Kong, China" "Singapore"       
## [10] "Kuwait"
g1 <- GDP1 %>% filter(g=="low") %>% select(country,group) %>% inner_join(gapminder) %>% 
   group_by(group, year) %>% summarise(gdp = mean(gdpPercap),
                                           low = quantile(gdpPercap,0.25) %>% round(),
                                           high = quantile(gdpPercap,0.75) %>% round())

g2 <- GDP1 %>% filter(g=="high") %>% select(country,group) %>% inner_join(gapminder) %>% 
   group_by(group, year) %>% summarise(gdp = mean(gdpPercap),
                                           low = quantile(gdpPercap,0.25) %>% round(),
                                           high = quantile(gdpPercap,0.75) %>% round())

g3 <- bind_rows(g1,g2)
g3 %>% ungroup() %>% 
   hchart("arearange",hcaes(group=group)) %>%
   hc_xAxis(categories = g3$year)

We can clearly see that there are two categories of countries in both continents Europe and Asia. It’s striking that the GDP per capita in the Europe low group fell between 1987 and 1992. Might this have something to do with the fall of the communist regimes?

5 worst-of

Now, let’s investigate some countries which suffered a decrease in GDP per capita. Between 1987 - 1992, 43% of the countries suffered a decrease in GDP per capita.

gdp <- gapminder %>% filter(year %in% c(1987,1992)) 
gdp <- gdp %>% 
   group_by(continent,country) %>% 
   summarize(diff=100*(last(gdpPercap)-first(gdpPercap))/first(gdpPercap)) %>%
   arrange(diff)

hchart(gdp[1:20,], "column", hcaes(x=country,y=round(-diff))) %>%
   hc_yAxis(title= list(text="GDP decrease in %"))

Especially Iraq has been hit very strongly (see Gulf War 1990/91)

The following figure shows the 10 countries with the worst GDP performance between 1987 and 1992:

gdp2 <- gdp[1:10,] %>% ungroup() %>% select(country) %>%
   inner_join(gapminder) %>% filter(year>1986)

hchart(gdp2,"line",hcaes(x=year,y=gdpPercap,group=country))

6 Factors of GDP Increase

Here, we are going to focus on the 26 African and 13 Asian countries which had a GDP per capita below 1000 USD in the 1950s.

gdp3 <- gapminder %>% filter(year %in% c(1952,1957)) %>% group_by(country) %>%
   summarise(avg=mean(gdpPercap)) %>% arrange(avg)
# gdp3$avg[1:39]
# gdp3[1:39,"country"] %>% inner_join(gapminder %>% filter(year==1987) %>% select(country,continent)) %>% select(continent) %>% table

gdp3[1:39,"country"] %>% inner_join(gapminder) %>%
   hchart("line",hcaes(x=year,y=gdpPercap,group=country)) %>%
   hc_legend(enabled=FALSE)

Now, some of them had an impressive development! Up to you to check the individual cases and find the reasons behind these promising examples.