Introduction

This report analyzes the Global Multidimensional Poverty Index (MPI) data for various countries and regions around the world.

Data Loading and Preparation

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
library(knitr)
library(scales)

Load the dataset

data <- read.csv("hdx_hapi_poverty_rate_global.csv", stringsAsFactors = FALSE)

Clean the dataset

data_cleaned <- data[-1, ]
colnames(data_cleaned) <- c('location_code', 'has_hrp', 'in_gho', 'provider_admin1_name', 'admin1_code', 'admin1_name', 'mpi', 'headcount_ratio', 'intensity_of_deprivation', 'vulnerable_to_poverty', 'in_severe_poverty', 'reference_period_start', 'reference_period_end')

Convert numeric columns to appropriate data types

numeric_columns <- c('mpi', 'headcount_ratio', 'intensity_of_deprivation', 'vulnerable_to_poverty', 'in_severe_poverty')
data_cleaned[numeric_columns] <- lapply(data_cleaned[numeric_columns], as.numeric)

Convert date columns to Date format

data_cleaned$reference_period_start <- as.Date(data_cleaned$reference_period_start)
data_cleaned$reference_period_end <- as.Date(data_cleaned$reference_period_end)

Analysis

Summary Statistics

summary_statistics <- summary(data_cleaned[numeric_columns])
kable(summary_statistics)
mpi headcount_ratio intensity_of_deprivation vulnerable_to_poverty in_severe_poverty
Min. :0.0000 Min. : 0.000 Min. : 0.00 Min. : 0.000 Min. : 0.0000
1st Qu.:0.0244 1st Qu.: 6.168 1st Qu.:39.60 1st Qu.: 7.924 1st Qu.: 0.8403
Median :0.1114 Median :25.771 Median :44.26 Median :15.203 Median : 7.2929
Mean :0.1772 Mean :34.124 Mean :45.45 Mean :15.284 Mean :17.9962
3rd Qu.:0.3021 3rd Qu.:60.373 3rd Qu.:50.65 3rd Qu.:21.586 3rd Qu.:30.3959
Max. :0.7384 Max. :99.492 Max. :75.89 Max. :50.182 Max. :94.1871

Top 10 Countries with Highest MPI

country_summary <- data_cleaned %>%
group_by(location_code) %>%
summarise(across(all_of(numeric_columns), mean, na.rm = TRUE))
## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `across(all_of(numeric_columns), mean, na.rm = TRUE)`.
## ℹ In group 1: `location_code = "AFG"`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
## 
##   # Previously
##   across(a:b, mean, na.rm = TRUE)
## 
##   # Now
##   across(a:b, \(x) mean(x, na.rm = TRUE))
top_10_mpi <- country_summary %>%
arrange(desc(mpi)) %>%
head(10)
kable(top_10_mpi)
location_code mpi headcount_ratio intensity_of_deprivation vulnerable_to_poverty in_severe_poverty
TCD 0.6016106 91.17772 65.43287 6.021026 75.62268
NER 0.5840333 86.73578 66.26881 6.285494 73.88559
CAF 0.4812875 81.79876 57.81771 12.028612 56.96986
BFA 0.4523393 77.59735 57.07156 10.758389 54.13713
MLI 0.4380071 76.17523 56.35877 11.521079 54.06448
MDG 0.4156371 73.61946 55.69504 13.203784 51.15671
BDI 0.3993667 73.53615 53.53338 16.032307 44.61298
GIN 0.3915037 68.89243 55.52337 14.826467 46.19869
COD 0.3903879 73.83552 52.37478 16.288131 45.14210
MOZ 0.3785306 66.33908 54.61580 14.225853 44.76680
ggplot(top_10_mpi, aes(x = reorder(location_code, mpi), y = mpi)) +
geom_bar(stat = "identity", fill = "steelblue") +
coord_flip() +
labs(title = "Top 10 Countries with Highest MPI",
x = "Country Code",
y = "Multidimensional Poverty Index (MPI)") +
theme_minimal()

Correlation Analysis

correlation_matrix <- cor(data_cleaned[numeric_columns], use = "complete.obs")
kable(correlation_matrix, digits = 3)
mpi headcount_ratio intensity_of_deprivation vulnerable_to_poverty in_severe_poverty
mpi 1.000 0.986 0.894 0.129 0.985
headcount_ratio 0.986 1.000 0.870 0.244 0.948
intensity_of_deprivation 0.894 0.870 1.000 0.160 0.886
vulnerable_to_poverty 0.129 0.244 0.160 1.000 -0.008
in_severe_poverty 0.985 0.948 0.886 -0.008 1.000
ggplot(data = reshape2::melt(correlation_matrix), aes(x = Var1, y = Var2, fill = value)) +
geom_tile() +
geom_text(aes(label = round(value, 2)), color = "white", size = 3) +
scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Correlation Heatmap of Poverty Indicators")

Time Series Analysis

time_series_data <- data_cleaned %>%
group_by(reference_period_start) %>%
summarise(across(all_of(numeric_columns), mean, na.rm = TRUE))
ggplot(time_series_data, aes(x = reference_period_start, y = mpi)) +
geom_line() +
geom_point() +
labs(title = "Global MPI Trend Over Time",
x = "Year",
y = "Average Multidimensional Poverty Index (MPI)") +
theme_minimal()

## Conclusion

an overview of the Global Multidimensional Poverty Index data.