Summary

This short analysis is done for a self-training purpose. The dataset was found on Gapminder. The data origin is from the WHO.

Data source: https://www.gapminder.org/data/
Data file: https://docs.google.com/spreadsheets/d/1UTd3D_D89fViE_ZN5wnIiemv2dyhtTS-92qIv_lJhcI/pub?gid=0

The indicator represents the recorded and estimated average alcohol consumption, adult (15+) per capita consumption in litres pure alcohol.

library(ggplot2)
library(dplyr)
library(tidyr)
library(countrycode)
library(maps)
library(forcats)
data.source <- read.csv("C:/Users/marc/Desktop/Data/160905_alcohol/indicator_alcohol_consumption_20100830.csv", sep = ";", header= TRUE, stringsAsFactors = FALSE)
data.source <- tbl_df(data.source)

Data cleaning

We will have a look at 2008 data only so we can remove the rest.

colnames(data.source)
##  [1] "X"     "X1985" "X1986" "X1987" "X1988" "X1989" "X1990" "X1991"
##  [9] "X1992" "X1993" "X1994" "X1995" "X1996" "X1997" "X1998" "X1999"
## [17] "X2000" "X2001" "X2002" "X2003" "X2004" "X2005" "X2006" "X2007"
## [25] "X2008"
data.cleaned <- select(data.source, X, X2008)
colnames(data.cleaned) <- c("region", "alcohol.consumption")

Prepare data for the chart

We need to map countries from the dataset with their related longitude/latitude coordinates. We will use the map_data() fonction for this.

world <- map_data("world")
check <- data.cleaned$region %in% world$region
#Few countries don't match.. so we are going to adjust their names manually
data.cleaned$region[check != TRUE]
##  [1] "Antigua and Barbuda"              "Congo [DRC]"                     
##  [3] "Congo [Republic]"                 "Côte d'Ivoire"                   
##  [5] "Macedonia [FYROM]"                "Myanmar [Burma]"                 
##  [7] "Saint Kitts and Nevis"            "Saint Vincent and the Grenadines"
##  [9] "São Tomé and Príncipe"            "Trinidad and Tobago"             
## [11] "Tuvalu"                           "United Kingdom"                  
## [13] "United States"
data.cleaned$region <- as.factor(data.cleaned$region)

#1. coutries we can replace
data.cleaned$region <- fct_recode(data.cleaned$region,
                                   `USA` = "United States",
                                   `Republic of Congo` = "Congo [Republic]",
                                   `Democratic Republic of the Congo` = "Congo [DRC]",
                                   `Ivory Coast` = "Côte d'Ivoire",
                                   `Macedonia` = "Macedonia [FYROM]",
                                   `Myanmar` = "Myanmar [Burma]",
                                   `Sao Tome and Principe` = "São Tomé and Príncipe",
                                   `UK` = "United Kingdom")

#2. country to remove: I couldn't find Tuvalu in the country list
data.cleaned <- filter(data.cleaned, region != "Tuvalu")

#3. countries to remove. Some countries such as "Antigua and Barbuda" are considered as two seperate countries/regions in the world$region (in this case "Antigua" and "Barbada"). As I don't have a more detailed data, I will remove them. 
data.cleaned <- filter(data.cleaned, region != "Antigua and Barbuda",
                                   region != "Trinidad and Tobago",
                                   region != "Saint Vincent and the Grenadines",
                                   region != "Saint Kitts and Nevis")

Chart

data.chart <- merge(world, data.cleaned, by= "region", sort = FALSE)
data.chart <- arrange(data.chart, order)

#palette was found on http://www.colorhunt.co/
my.palette <- c("#94ED88", "#3BB873", "#406661", "#384137")
ggplot(data.chart, aes(long, lat)) +
      geom_polygon(aes(group = group, fill = alcohol.consumption)) +
      theme_classic() +
      theme(panel.border = element_blank(),
      axis.text = element_blank(),
      line = element_blank(),
      axis.title = element_blank())+
      coord_map(xlim=c(-180,180))+
      scale_fill_gradientn(colours = my.palette, breaks = c(5, 10, 15, 20))+
      ggtitle("Alcohol consumption per adult (15+) in 2008 in litres")