Formating dataset

Importing the dataset from the xls source file. Use a library readxl

library(readxl)
inequalityIndex <- read_excel("data 2022/Coefficient of human inequality until 2019.xlsx", na = "..", skip = 4)
lifeExpect <- read_excel("data 2022/Inequality in life expectancy (%).xlsx", na = "..", skip = 4)
income <- read_excel("data 2022/Inequality in income (%).xlsx", na = "..", skip = 4)
education <- read_excel("data 2022/Inequality in education (%).xlsx", na = "..", skip = 4)
countryCodesRegions <- read_excel("data 2022/Country codes and regions.xlsx")

Creating a map color coded by sub-regions

library(ggplot2)
library(dplyr)

# Get map and remove antarctica
latLong <- map_data('world') %>% filter(region!= 'Antarctica')

latLong$subregion <- NULL
names(latLong)[5]<-"Country_Name"

# Add regions and sub-regions to map coordinate. Data from https://github.com/lukes/ISO-3166-Countries-with-Regional-Codes/blob/master/all/all.csv
latLong <- left_join(latLong,countryCodesRegions, by = c('Country_Name'= 'name'))

#Rename Cols
names(latLong)[c(6,7)]<-c("ISO2", "ISO3")

# create map
map <- ggplot(latLong, aes(x=long, y=lat, group = group))
map <- map + geom_polygon(aes(fill =`sub-region`))

# The customized palette :
cPalette <- c("#c8522c","#4bafd0","#d34459","#64b948","#a35ac7","#b4b335","#6a75c8","#d24699","#5dc18a","#a04a6d","#598233","#d389c3","#388864","#d07a6c","#b5a861","#886a2c","#db923b")

map <- map + scale_fill_manual(values = cPalette)

map <- map + labs(title = "Sub-regions of the world") + theme(legend.position="bottom")
map

Adding HDI data to the map

# consolidate datasets
globeMap <- left_join(latLong,inequalityIndex, by = c('ISO3'= 'ISO3'))

# create map
map <- ggplot(globeMap, aes(x=long, y=lat, group = group))
map <- map + geom_polygon(aes(fill =`2019`))
map <- map + scale_fill_continuous(type = 'viridis')
map <- map + labs(title = "Human Development Index 2019", caption = "Data from United Nations report of Human Development. 2022")
map

Exporting data in CSV format

Wide to Long: Merging years into a single column

library(reshape2)

# Add the names of the columns you want to preserve. Any other column will be "melted" down.
merged <- melt(inequalityIndex, id=(c("HDI Rank","Country" ,"ISO3")))

# Life expectancy
mergedLE <- melt(lifeExpect, id=(c("HDI Rank","Country" ,"ISO3")))

# income
mergedInc <- melt(income, id=(c("HDI Rank","Country" ,"ISO3")))

# education
mergedEd <- melt(education, id=(c("HDI Rank","Country" ,"ISO3")))

# Save files
write.csv2(merged, "output_2022/Coefficient_of_human_inequality_2010-2019.csv")
write.csv2(mergedLE, "output_2022/Inequality_Life_Expectancy_2010-2019.csv")
write.csv2(mergedInc, "output_2022/Inequality_Income_2010-2019.csv")
write.csv2(mergedEd, "output_2022/Inequality_Education_2010-2019.csv")
write.csv2(latLong, "output_2022/map.csv")