1 Data

In this report, we use data from the famous http://www.gapminder.org website. Let’s look at some random rows of this dataframe:

library(gapminder)
data(gapminder)
gapminder %>% sample_n(10) %>% kable
country continent year lifeExp pop gdpPercap
Central African Republic Africa 1967 41 1733638 1136
Benin Africa 1987 52 4243788 1226
Algeria Africa 1952 43 9279525 2449
Egypt Africa 2007 71 80264543 5581
Oman Asia 1997 72 2283635 19702
Uruguay Americas 2007 76 3447496 10611
Singapore Asia 2002 79 4197776 36023
Gabon Africa 2007 57 1454867 13206
Tunisia Africa 1962 50 4286552 1660
Ecuador Americas 1962 55 4681707 4086

Here we see how many different values we have per column:

df1 <- gapminder %>% sapply(n_distinct) %>% t %>% t
data.frame(Variable = row.names(df1), Values = df1[,1],row.names = NULL) %>% kable
Variable Values
country 142
continent 5
year 12
lifeExp 1626
pop 1704
gdpPercap 1704

Here is how many countries we have per continent:

gapminder %>% group_by(continent) %>% summarise("Number of Countries" = n_distinct(country)) %>% kable
continent Number of Countries
Africa 52
Americas 25
Asia 33
Europe 30
Oceania 2

In Oceania, we only have Australia and New Zealand.

2 Population Growth

First, we are going to look at the Evolution of Total Population per continent:

library(highcharter)
PG <- gapminder %>% group_by(continent, year) %>% summarise("pop" = sum(as.numeric(pop)))

hchart(PG, "line", hcaes(x = year, y = pop, group = continent)) %>%
   hc_tooltip(shared=TRUE, table=TRUE)
# library(plotly)
# p <- ggplot(PG, aes(x=year, y=pop, color=continent)) +
#    geom_line(size=1)
# ggplotly(p)

The population growth in Asia is striking! Africa’s recent growth and Europe’s stagnation is also visible.

We can also display the stacked values in order to see the total population:

PG$continent <- factor(PG$continent,levels = rev(c("Oceania","Europe","Americas","Africa","Asia")))

hchart(PG,"area",hcaes(x=year,y=pop,group=continent)) %>%
   hc_tooltip(shared=TRUE, table=TRUE,crosshairs=TRUE) %>%
   hc_plotOptions(area= list(stacking="normal"))
# library(streamgraph)
# streamgraph(PG, "continent", "pop", "year", offset="zero") %>%
#   sg_fill_brewer("Blues")

3 Life Expectancy

First, let’s look at the life expectancy. The ribbons of the following figure represent the 25- and 75-percentile for each continent over time.

LE <- gapminder %>% group_by(continent, year) %>% summarise(LE = mean(lifeExp),
                                                            low = quantile(lifeExp,0.25),
                                                            high = quantile(lifeExp,0.75))

hchart(LE,"arearange", hcaes(group=continent)) %>%
   hc_xAxis(categories = LE$year) %>%
   hc_tooltip(shared=TRUE,crosshairs=TRUE)
# ggplot(LE, aes(x=year, y=LE, fill=continent)) +
#   # geom_line(aes(color=continent), size=1) +
#   geom_line(data=gapminder, aes(x=year,y=lifeExp,group=country,color=continent), size=1.5, alpha=.2) +
#   geom_ribbon(aes(ymin=low, ymax=high), alpha=0.8) +
#   scale_fill_brewer(palette = 'Spectral') +
#   scale_color_brewer(palette = 'Spectral')

What is that outlier? In 1992, Rwanda hat a Life Expectancy of 23.6 years …

Note that the ribbon for Oceania is very narrow because there are only 2 Countries in this category. We notice that LE in Asia increased a lot, while LE in Africa started decreasing in the mid 1980s.

4 GDP per capita

Now we are going to investigate the GDP per capita

# remove Kuwait because very odd values prior to 1970s
gapminder %>% filter(!(country %in% "Kuwait")) %>%
   hchart("line",hcaes(x=year,y=gdpPercap,group=country, color=continent)) %>%
   hc_legend(enabled=FALSE) %>%
   hc_yAxis(title=list(text="GDP per capita"))

Too crowded, let’s look at the grouped data:

GDP <- gapminder %>% group_by(continent, year) %>% summarise(gdp = mean(gdpPercap),
                                                            low = quantile(gdpPercap,0.25) %>% round(),
                                                            high = quantile(gdpPercap,0.75) %>% round())

hchart(GDP,"arearange", hcaes(group=continent)) %>%
   hc_xAxis(categories = GDP$year) %>%
   hc_tooltip(shared=TRUE,crosshairs=TRUE)

From 1987 onwards, especially Europe and Asia are very large. Let’s cut these groups in two.

GDP1 <- gapminder %>% filter(year>1985) %>% filter(continent %in% c("Asia","Europe")) %>%
   group_by(continent,country) %>% summarize(avg=mean(gdpPercap)) %>% arrange(avg) %>% mutate(g=ifelse(avg<1e4,"low","high")) %>% ungroup() %>% mutate(group=paste(continent,g))

g4 <- GDP1 %>% group_by(group) %>% select(country) %>% ungroup()
g4$country <- as.character(g4$country)

x <- list()
for(i in unique(g4$group)) {
   x[[i]] <- g4 %>% filter(group==i) %>% select(country) %>% distinct(country) %>% pull
}
x
## $`Asia low`
##  [1] "Myanmar"            "Afghanistan"        "Cambodia"          
##  [4] "Nepal"              "Bangladesh"         "Vietnam"           
##  [7] "India"              "Pakistan"           "Yemen, Rep."       
## [10] "Mongolia"           "Korea, Dem. Rep."   "Philippines"       
## [13] "China"              "Indonesia"          "Sri Lanka"         
## [16] "Syria"              "Jordan"             "West Bank and Gaza"
## [19] "Thailand"           "Iraq"               "Lebanon"           
## [22] "Iran"               "Malaysia"          
## 
## $`Europe low`
## [1] "Albania"                "Bosnia and Herzegovina"
## [3] "Turkey"                 "Bulgaria"              
## [5] "Montenegro"             "Romania"               
## 
## $`Europe high`
##  [1] "Serbia"          "Poland"          "Croatia"        
##  [4] "Slovak Republic" "Hungary"         "Czech Republic" 
##  [7] "Portugal"        "Slovenia"        "Greece"         
## [10] "Spain"           "Italy"           "Finland"        
## [13] "Ireland"         "France"          "United Kingdom" 
## [16] "Sweden"          "Belgium"         "Germany"        
## [19] "Iceland"         "Austria"         "Denmark"        
## [22] "Netherlands"     "Switzerland"     "Norway"         
## 
## $`Asia high`
##  [1] "Korea, Rep."      "Taiwan"           "Oman"            
##  [4] "Israel"           "Saudi Arabia"     "Bahrain"         
##  [7] "Japan"            "Hong Kong, China" "Singapore"       
## [10] "Kuwait"
g1 <- GDP1 %>% filter(g=="low") %>% select(country,group) %>% inner_join(gapminder) %>% 
   group_by(group, year) %>% summarise(gdp = mean(gdpPercap),
                                           low = quantile(gdpPercap,0.25) %>% round(),
                                           high = quantile(gdpPercap,0.75) %>% round())

g2 <- GDP1 %>% filter(g=="high") %>% select(country,group) %>% inner_join(gapminder) %>% 
   group_by(group, year) %>% summarise(gdp = mean(gdpPercap),
                                           low = quantile(gdpPercap,0.25) %>% round(),
                                           high = quantile(gdpPercap,0.75) %>% round())

g3 <- bind_rows(g1,g2)
g3 %>% ungroup() %>% 
   hchart("arearange",hcaes(group=group)) %>%
   hc_xAxis(categories = g3$year)

We can clearly see that there are two categories of countries in both continents Europe and Asia. It’s striking that the GDP per capita in the Europe low group fell between 1987 and 1992. Might this have something to do with the fall of the communist regimes?

5 worst-of

Now, let’s investigate some countries which suffered a decrease in GDP per capita. Between 1987 - 1992, 43% of the countries suffered a decrease in GDP per capita.

gdp <- gapminder %>% filter(year %in% c(1987,1992)) 
gdp <- gdp %>% 
   group_by(continent,country) %>% 
   summarize(diff=100*(last(gdpPercap)-first(gdpPercap))/first(gdpPercap)) %>%
   arrange(diff)

hchart(gdp[1:20,], "column", hcaes(x=country,y=round(-diff))) %>%
   hc_yAxis(title= list(text="GDP decrease in %"))

Especially Iraq has been hit very strongly (see Gulf War 1990/91)

The following figure shows the 10 countries with the worst GDP performance between 1987 and 1992:

gdp2 <- gdp[1:10,] %>% ungroup() %>% select(country) %>%
   inner_join(gapminder) %>% filter(year>1986)

hchart(gdp2,"line",hcaes(x=year,y=gdpPercap,group=country))

6 Factors of GDP Increase

Here, we are going to focus on the 26 African and 13 Asian countries which had a GDP per capita below 1000 USD in the 1950s.

gdp3 <- gapminder %>% filter(year %in% c(1952,1957)) %>% group_by(country) %>%
   summarise(avg=mean(gdpPercap)) %>% arrange(avg)
# gdp3$avg[1:39]
# gdp3[1:39,"country"] %>% inner_join(gapminder %>% filter(year==1987) %>% select(country,continent)) %>% select(continent) %>% table

gdp3[1:39,"country"] %>% inner_join(gapminder) %>%
   hchart("line",hcaes(x=year,y=gdpPercap,group=country)) %>%
   hc_legend(enabled=FALSE)

Now, some of them had an impressive development! Up to you to check the individual cases and find the reasons behind these promising examples.

7 Animate

library(plotly)
p <- ggplot(gapminder, aes(gdpPercap, lifeExp, color = continent)) +
  geom_point(aes(size = pop, frame = year, ids = country)) +
  scale_x_log10()

ggplotly(p) %>% animation_opts(1000, easing = "bounce", redraw = FALSE)