Introduction

An analysis of the Global Multidimensional Poverty Index (MPI) data for various countries and regions around the world.

Data Loading and Preparation

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
library(knitr)
library(scales)
library(sf)
## Linking to GEOS 3.11.2, GDAL 3.8.2, PROJ 9.3.1; sf_use_s2() is TRUE
library(cluster)

Load the dataset

data <- read.csv('hdx_hapi_poverty_rate_global.csv', stringsAsFactors = FALSE)

Clean the dataset

data_cleaned <- data[-1, ]
colnames(data_cleaned) <- c('location_code', 'has_hrp', 'in_gho', 'provider_admin1_name', 'admin1_code', 'admin1_name', 'mpi', 'headcount_ratio', 'intensity_of_deprivation', 'vulnerable_to_poverty', 'in_severe_poverty', 'reference_period_start', 'reference_period_end')

Convert numeric columns to appropriate data types

numeric_columns <- c('mpi', 'headcount_ratio', 'intensity_of_deprivation', 'vulnerable_to_poverty', 'in_severe_poverty')
data_cleaned[numeric_columns] <- lapply(data_cleaned[numeric_columns], as.numeric)

Convert date columns to Date format

data_cleaned$reference_period_start <- as.Date(data_cleaned$reference_period_start)
data_cleaned$reference_period_end <- as.Date(data_cleaned$reference_period_end)

Analysis

Summary Statistics

summary_statistics <- summary(data_cleaned[numeric_columns])
kable(summary_statistics)
mpi headcount_ratio intensity_of_deprivation vulnerable_to_poverty in_severe_poverty
Min. :0.0000 Min. : 0.000 Min. : 0.00 Min. : 0.000 Min. : 0.0000
1st Qu.:0.0244 1st Qu.: 6.168 1st Qu.:39.60 1st Qu.: 7.924 1st Qu.: 0.8403
Median :0.1114 Median :25.771 Median :44.26 Median :15.203 Median : 7.2929
Mean :0.1772 Mean :34.124 Mean :45.45 Mean :15.284 Mean :17.9962
3rd Qu.:0.3021 3rd Qu.:60.373 3rd Qu.:50.65 3rd Qu.:21.586 3rd Qu.:30.3959
Max. :0.7384 Max. :99.492 Max. :75.89 Max. :50.182 Max. :94.1871

Top 10 Countries with Highest MPI

country_summary <- data_cleaned %>%
group_by(location_code) %>%
summarise(across(all_of(numeric_columns), mean, na.rm = TRUE))
## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `across(all_of(numeric_columns), mean, na.rm = TRUE)`.
## ℹ In group 1: `location_code = "AFG"`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
## 
##   # Previously
##   across(a:b, mean, na.rm = TRUE)
## 
##   # Now
##   across(a:b, \(x) mean(x, na.rm = TRUE))
top_10_mpi <- country_summary %>%
arrange(desc(mpi)) %>%
head(10)
kable(top_10_mpi)
location_code mpi headcount_ratio intensity_of_deprivation vulnerable_to_poverty in_severe_poverty
TCD 0.6016106 91.17772 65.43287 6.021026 75.62268
NER 0.5840333 86.73578 66.26881 6.285494 73.88559
CAF 0.4812875 81.79876 57.81771 12.028612 56.96986
BFA 0.4523393 77.59735 57.07156 10.758389 54.13713
MLI 0.4380071 76.17523 56.35877 11.521079 54.06448
MDG 0.4156371 73.61946 55.69504 13.203784 51.15671
BDI 0.3993667 73.53615 53.53338 16.032307 44.61298
GIN 0.3915037 68.89243 55.52337 14.826467 46.19869
COD 0.3903879 73.83552 52.37478 16.288131 45.14210
MOZ 0.3785306 66.33908 54.61580 14.225853 44.76680
ggplot(top_10_mpi, aes(x = reorder(location_code, mpi), y = mpi)) +
geom_bar(stat = "identity", fill = "steelblue") +
coord_flip() +
labs(title = "Top 10 Countries with Highest MPI",
x = "Country Code",
y = "Multidimensional Poverty Index (MPI)") +
theme_minimal()

Correlation Analysis

correlation_matrix <- cor(data_cleaned[numeric_columns], use = "complete.obs")
kable(correlation_matrix)
mpi headcount_ratio intensity_of_deprivation vulnerable_to_poverty in_severe_poverty
mpi 1.0000000 0.9863885 0.8938514 0.1293102 0.9851414
headcount_ratio 0.9863885 1.0000000 0.8704221 0.2439347 0.9481129
intensity_of_deprivation 0.8938514 0.8704221 1.0000000 0.1601427 0.8860146
vulnerable_to_poverty 0.1293102 0.2439347 0.1601427 1.0000000 -0.0077697
in_severe_poverty 0.9851414 0.9481129 0.8860146 -0.0077697 1.0000000
ggplot(data = reshape2::melt(correlation_matrix), aes(x = Var1, y = Var2, fill = value)) +
geom_tile() +
geom_text(aes(label = round(value, 2)), color = "white", size = 3) +
scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Correlation Heatmap of Poverty Indicators")

Time Series Analysis

time_series_data <- data_cleaned %>%
group_by(reference_period_start) %>%
summarise(across(all_of(numeric_columns), mean, na.rm = TRUE))
ggplot(time_series_data, aes(x = reference_period_start, y = mpi)) +
geom_line() +
geom_point() +
labs(title = "Global MPI Trend Over Time",
x = "Year",
y = "Average Multidimensional Poverty Index (MPI)") +
theme_minimal()

Regional Analysis

regional_summary <- data_cleaned %>%
group_by(admin1_name) %>%
summarise(across(all_of(numeric_columns), mean, na.rm = TRUE)) %>%
arrange(desc(mpi))
top_20_regions <- head(regional_summary, 20)
kable(top_20_regions)
admin1_name mpi headcount_ratio intensity_of_deprivation vulnerable_to_poverty in_severe_poverty
Lac 0.7065000 97.53510 72.42900 1.882833 89.57247
Sila 0.6988333 97.66173 71.55697 1.989833 91.64073
Wadi Fira 0.6970667 97.54923 71.45373 1.486067 92.58220
Kanem 0.6898000 98.49097 70.02897 1.197033 89.57427
Salamat 0.6888000 96.94567 71.00207 2.318067 86.88500
Hadjer-Lamis 0.6810333 97.21193 70.03547 2.024000 87.17007
Batha 0.6776333 96.22830 70.41970 2.549567 87.64880
Tillaberi 0.6738000 94.91290 70.86310 3.797850 84.53650
Maradi 0.6686500 95.32000 70.13090 2.834650 85.78585
Tahoua 0.6652000 95.09980 69.90930 3.682850 84.77675
Ouaddai 0.6634000 94.30803 70.32827 2.835467 86.95670
Dosso 0.6608000 95.26330 69.30980 2.964050 83.78345
Ennedi Est 0.6598000 98.12420 67.23630 1.340800 90.38860
Zinder 0.6569500 94.51395 69.47955 3.782800 84.22380
Barh-El-Gazel 0.6525000 96.80120 67.41683 2.260833 85.61973
Chari-Baguirmi 0.6507000 96.39190 67.50610 2.862033 82.71860
Guera 0.6451000 94.25793 68.38773 4.521300 81.49673
Sahel 0.6356500 92.65400 68.65210 3.987200 83.63785
Diffa 0.6079000 94.05595 64.57360 3.268150 80.85905
Ennedi Ouest 0.6024000 93.43470 64.46930 4.597100 80.63540
ggplot(top_20_regions, aes(x = reorder(admin1_name, mpi), y = mpi)) +
geom_bar(stat = "identity", fill = "darkgreen") +
coord_flip() +
labs(title = "Top 20 Regions with Highest MPI",
x = "Region",
y = "Multidimensional Poverty Index (MPI)") +
theme_minimal() +
theme(axis.text.y = element_text(size = 8))

Additional Analyses

Regional Disparities Analysis

regional_summary <- data_cleaned %>%
group_by(admin1_name) %>%
summarise(
mean_mpi = mean(mpi, na.rm = TRUE),
sd_mpi = sd(mpi, na.rm = TRUE),
min_mpi = min(mpi, na.rm = TRUE),
max_mpi = max(mpi, na.rm = TRUE)
)
anova_result <- aov(mpi ~ admin1_name, data = data_cleaned)
summary(anova_result)
##               Df Sum Sq Mean Sq F value Pr(>F)    
## admin1_name 1000  68.05 0.06805   5.099 <2e-16 ***
## Residuals   2164  28.88 0.01335                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Temporal Trend Analysis

temporal_trends <- data_cleaned %>%
group_by(location_code, admin1_name) %>%
arrange(reference_period_start) %>%
summarise(
start_mpi = first(mpi),
end_mpi = last(mpi),
years = as.numeric(difftime(last(reference_period_end), first(reference_period_start), units = "days")) / 365,
avg_annual_change = (end_mpi - start_mpi) / years
)
## `summarise()` has grouped output by 'location_code'. You can override using the
## `.groups` argument.
ggplot(data_cleaned, aes(x = reference_period_start, y = mpi, color = admin1_name)) +
geom_line() +
theme_minimal() +
labs(title = "MPI Trends Over Time", x = "Year", y = "MPI", color = "Region")

Decomposition Analysis

indicators <- c("mpi", "headcount_ratio", "intensity_of_deprivation")
data_cleaned %>%
gather(key = "indicator", value = "value", indicators) %>%
group_by(admin1_name, indicator) %>%
summarise(mean_value = mean(value, na.rm = TRUE)) %>%
ggplot(aes(x = admin1_name, y = mean_value, fill = indicator)) +
geom_bar(stat = "identity", position = "stack") +
theme_minimal() +
labs(title = "MPI Composition by Region", x = "Region", y = "Contribution to MPI")
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(indicators)
## 
##   # Now:
##   data %>% select(all_of(indicators))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'admin1_name'. You can override using the
## `.groups` argument.

Cluster Analysis

set.seed(123)
kmeans_result <- kmeans(data_cleaned[c("mpi", "headcount_ratio", "intensity_of_deprivation")], centers = 5)
data_cleaned$cluster <- kmeans_result$cluster
ggplot(data_cleaned, aes(x = headcount_ratio, y = intensity_of_deprivation, color = factor(cluster))) +
geom_point() +
theme_minimal() +
labs(title = "Cluster Analysis of Poverty Profiles", color = "Cluster")

Inequality Analysis

gini_coefficients <- data_cleaned %>%
group_by(admin1_name) %>%
summarise(gini_mpi = ineq::Gini(mpi))
ggplot(data_cleaned, aes(x = cumsum(mpi) / sum(mpi), y = seq_along(mpi) / length(mpi))) +
geom_line() +
geom_abline(linetype = "dashed") +
theme_minimal() +
labs(title = "Lorenz Curve of MPI", x = "Cumulative Share of MPI", y = "Cumulative Share of Population")

Conclusion

This is an analysis of the Global Multidimensional Poverty Index data revealing variations in poverty levels across countries and regions, as well as over time..