Data source - The World Bank For computation, the following indicators were used:

# load economic indicators
fdi <- read_csv("./data/csv/fdi.csv", skip = 4)
cpi <- read_csv("./data/csv/cpi.csv", skip = 4)
gdp <- read_csv("./data/csv/gdp.csv", skip = 4)
unemployment <- read_csv("./data/csv/unemployment.csv", skip = 4)

Extracting last values of indicators, concatenation of the dataset

Here I created a custom function to get rid of all unnecessary data and keep only the most recent value of each indicator. Function searches for 1st nonempty field from the ‘tail’ side. For those countries with no value specified for no year, it takes unproper string from column preceeding indicator values. This string is being changed to NA (for clarity). Function returns table with columns: ” country name, country code, . Then, tables are merged to econ_data (to have all indicators available at once) and NAs removed.

# df - original data frame
# indicatorAbbrv - string, abbreviated name of economic indicator, to be used as a column header, e.g. GDP, CPI
# nullableStr - string that should be replaced with null

lastValAndFormatting <- function(df, indicatorAbbrv, nullableStr) {
  
    df_lastVal <- na.omit(melt(as.data.table(df, keep.rownames = TRUE), id=c("Country Name", "Country Code")))[, value[.N], by = c("Country Name", "Country Code")]
  colnames(df_lastVal)  <- c("CountryName", "CountryCode", "IndicatorValue")
  df_lastVal <- replace_with_na(df_lastVal, replace = list(IndicatorValue = nullableStr))
  df_lastVal$IndicatorValue <- as.numeric(df_lastVal$IndicatorValue)
  colnames(df_lastVal)  <- c("CountryName", "CountryCode", indicatorAbbrv)
  return(df_lastVal)
  
}


fdi_last <- lastValAndFormatting(fdi, "FDI", "BN.KLT.DINV.CD")
cpi_last <- lastValAndFormatting(cpi, "CPI", "FP.CPI.TOTL")
gdp_last <- lastValAndFormatting(gdp, "GDP", "NY.GDP.MKTP.CD")
ur_last <- lastValAndFormatting(unemployment, "UR", "SL.UEM.TOTL.ZS")

econ_data <- cbind(gdp_last, CPI=cpi_last$CPI, FDI=fdi_last$FDI, UR=ur_last$UR)

econ_data <- na.omit(econ_data)

Clustering countries

Figure out the proper number of clusters

From the scree plots produced with fviz_nbclust() function, I can conclude that optimal number of cluster is 5 - as computed by the elbow method (also close t the result computed by average silhouette width method.

# Elbow method
fviz_nbclust(scale(econ_data[,3:6]), kmeans, method = "wss")+ geom_vline(xintercept = 5, linetype = 2)

# Silhouete method
fviz_nbclust(scale(econ_data[,3:6]), kmeans, method = "silhouette")

Perform clustering for k=5

Seed is set to make the result reproducible.

set.seed(1234)

kmeans_k5 <- kmeans(scale(econ_data[,3:6]), 5, nstart = 100)
# plot the clusters
fviz_cluster(kmeans_k5, data = scale(econ_data[,3:6]), geom = c("point"),ellipse.type = "euclid")

Briefly summarize clusters

Below I check how numerous is representation of each cluster and what are mean values of economic indicators within.

count(econ_data, kmeans_k5$cluster)
##    kmeans_k5$cluster   n
## 1:                 1   2
## 2:                 2   6
## 3:                 3   2
## 4:                 4 128
## 5:                 5  37
econ_data[,3:6] %>% 
  group_by(kmeans_k5$cluster) %>%
  summarize_all(funs(mean(.)))
## # A tibble: 5 × 5
##   `kmeans_k5$cluster`     GDP    CPI      FDI    UR
##                 <int>   <dbl>  <dbl>    <dbl> <dbl>
## 1                   1 2.32e10 19408. -2.33e 8 16.9 
## 2                   2 2.32e12   116.  9.92e10  4.46
## 3                   3 2.05e13   127. -1.16e11  5.14
## 4                   4 2.64e11   238. -1.75e 9  5.81
## 5                   5 1.55e11   164. -3.17e 9 18.1

Create interactive map with leaflet

# Polygons of countries: http://thematicmapping.org/downloads/TM_WORLD_BORDERS_SIMPL-0.3.zip

# Reading shape file

world_polygon <- readOGR( 
  dsn= "./data/map_polygons/TM_WORLD_BORDERS_SIMPL-0.3" , 
  layer="TM_WORLD_BORDERS_SIMPL-0.3",
  verbose=FALSE
)

world_coordinates <- c(38.02213230455896, 18.457031704152197)

merged_data <- merge(world_polygon, econ_data_clustered, by.x = "ISO3", by.y = "CountryCode")


merged_data@data$cluster<- as.numeric(merged_data@data$cluster)

#Labels for pop-ups
labels <- paste(
        "<b> Country: </b>", merged_data@data$NAME, "<br>",
        "<b> Cluster: </b>", merged_data@data$cluster, "<br>",sep="") %>%
  lapply(htmltools::HTML)

factpal <- colorFactor(heat.colors(5), merged_data@data$cluster)



# creating choropleth map using Leaflet 
leaflet(merged_data) %>% 
  addProviderTiles("OpenStreetMap.Mapnik") %>%
  setView( lat=world_coordinates[2], lng=world_coordinates[1] , zoom=3) %>%
  addPolygons(stroke = FALSE, 
              fillOpacity = 0.5, 
              color = ~factpal(cluster),
              label = labels ) %>% 
  addLegend(pal = factpal, values = ~cluster, position = "bottomleft")