Global multidimensional poverty index

Introduction

An analysis of the Global Multidimensional Poverty Index (MPI) data for various countries and regions around the world.

Data Loading and Preparation

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)
library(tidyr)
library(knitr)
library(scales)
library(sf)

## Linking to GEOS 3.11.2, GDAL 3.8.2, PROJ 9.3.1; sf_use_s2() is TRUE

library(cluster)

Load the dataset

data <- read.csv('hdx_hapi_poverty_rate_global.csv', stringsAsFactors = FALSE)

Clean the dataset

data_cleaned <- data[-1, ]
colnames(data_cleaned) <- c('location_code', 'has_hrp', 'in_gho', 'provider_admin1_name', 'admin1_code', 'admin1_name', 'mpi', 'headcount_ratio', 'intensity_of_deprivation', 'vulnerable_to_poverty', 'in_severe_poverty', 'reference_period_start', 'reference_period_end')

Convert numeric columns to appropriate data types

numeric_columns <- c('mpi', 'headcount_ratio', 'intensity_of_deprivation', 'vulnerable_to_poverty', 'in_severe_poverty')
data_cleaned[numeric_columns] <- lapply(data_cleaned[numeric_columns], as.numeric)

Convert date columns to Date format

data_cleaned$reference_period_start <- as.Date(data_cleaned$reference_period_start)
data_cleaned$reference_period_end <- as.Date(data_cleaned$reference_period_end)

Analysis

Summary Statistics

summary_statistics <- summary(data_cleaned[numeric_columns])
kable(summary_statistics)

mpi	headcount_ratio	intensity_of_deprivation	vulnerable_to_poverty	in_severe_poverty
Min. :0.0000	Min. : 0.000	Min. : 0.00	Min. : 0.000	Min. : 0.0000
1st Qu.:0.0244	1st Qu.: 6.168	1st Qu.:39.60	1st Qu.: 7.924	1st Qu.: 0.8403
Median :0.1114	Median :25.771	Median :44.26	Median :15.203	Median : 7.2929
Mean :0.1772	Mean :34.124	Mean :45.45	Mean :15.284	Mean :17.9962
3rd Qu.:0.3021	3rd Qu.:60.373	3rd Qu.:50.65	3rd Qu.:21.586	3rd Qu.:30.3959
Max. :0.7384	Max. :99.492	Max. :75.89	Max. :50.182	Max. :94.1871

Top 10 Countries with Highest MPI

country_summary <- data_cleaned %>%
group_by(location_code) %>%
summarise(across(all_of(numeric_columns), mean, na.rm = TRUE))

## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `across(all_of(numeric_columns), mean, na.rm = TRUE)`.
## ℹ In group 1: `location_code = "AFG"`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
## 
##   # Previously
##   across(a:b, mean, na.rm = TRUE)
## 
##   # Now
##   across(a:b, \(x) mean(x, na.rm = TRUE))

top_10_mpi <- country_summary %>%
arrange(desc(mpi)) %>%
head(10)
kable(top_10_mpi)

location_code	mpi	headcount_ratio	intensity_of_deprivation	vulnerable_to_poverty	in_severe_poverty
TCD	0.6016106	91.17772	65.43287	6.021026	75.62268
NER	0.5840333	86.73578	66.26881	6.285494	73.88559
CAF	0.4812875	81.79876	57.81771	12.028612	56.96986
BFA	0.4523393	77.59735	57.07156	10.758389	54.13713
MLI	0.4380071	76.17523	56.35877	11.521079	54.06448
MDG	0.4156371	73.61946	55.69504	13.203784	51.15671
BDI	0.3993667	73.53615	53.53338	16.032307	44.61298
GIN	0.3915037	68.89243	55.52337	14.826467	46.19869
COD	0.3903879	73.83552	52.37478	16.288131	45.14210
MOZ	0.3785306	66.33908	54.61580	14.225853	44.76680

ggplot(top_10_mpi, aes(x = reorder(location_code, mpi), y = mpi)) +
geom_bar(stat = "identity", fill = "steelblue") +
coord_flip() +
labs(title = "Top 10 Countries with Highest MPI",
x = "Country Code",
y = "Multidimensional Poverty Index (MPI)") +
theme_minimal()

Correlation Analysis

correlation_matrix <- cor(data_cleaned[numeric_columns], use = "complete.obs")
kable(correlation_matrix)

	mpi	headcount_ratio	intensity_of_deprivation	vulnerable_to_poverty	in_severe_poverty
mpi	1.0000000	0.9863885	0.8938514	0.1293102	0.9851414
headcount_ratio	0.9863885	1.0000000	0.8704221	0.2439347	0.9481129
intensity_of_deprivation	0.8938514	0.8704221	1.0000000	0.1601427	0.8860146
vulnerable_to_poverty	0.1293102	0.2439347	0.1601427	1.0000000	-0.0077697
in_severe_poverty	0.9851414	0.9481129	0.8860146	-0.0077697	1.0000000

ggplot(data = reshape2::melt(correlation_matrix), aes(x = Var1, y = Var2, fill = value)) +
geom_tile() +
geom_text(aes(label = round(value, 2)), color = "white", size = 3) +
scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Correlation Heatmap of Poverty Indicators")

Time Series Analysis

time_series_data <- data_cleaned %>%
group_by(reference_period_start) %>%
summarise(across(all_of(numeric_columns), mean, na.rm = TRUE))
ggplot(time_series_data, aes(x = reference_period_start, y = mpi)) +
geom_line() +
geom_point() +
labs(title = "Global MPI Trend Over Time",
x = "Year",
y = "Average Multidimensional Poverty Index (MPI)") +
theme_minimal()

Regional Analysis

regional_summary <- data_cleaned %>%
group_by(admin1_name) %>%
summarise(across(all_of(numeric_columns), mean, na.rm = TRUE)) %>%
arrange(desc(mpi))
top_20_regions <- head(regional_summary, 20)
kable(top_20_regions)

admin1_name	mpi	headcount_ratio	intensity_of_deprivation	vulnerable_to_poverty	in_severe_poverty
Lac	0.7065000	97.53510	72.42900	1.882833	89.57247
Sila	0.6988333	97.66173	71.55697	1.989833	91.64073
Wadi Fira	0.6970667	97.54923	71.45373	1.486067	92.58220
Kanem	0.6898000	98.49097	70.02897	1.197033	89.57427
Salamat	0.6888000	96.94567	71.00207	2.318067	86.88500
Hadjer-Lamis	0.6810333	97.21193	70.03547	2.024000	87.17007
Batha	0.6776333	96.22830	70.41970	2.549567	87.64880
Tillaberi	0.6738000	94.91290	70.86310	3.797850	84.53650
Maradi	0.6686500	95.32000	70.13090	2.834650	85.78585
Tahoua	0.6652000	95.09980	69.90930	3.682850	84.77675
Ouaddai	0.6634000	94.30803	70.32827	2.835467	86.95670
Dosso	0.6608000	95.26330	69.30980	2.964050	83.78345
Ennedi Est	0.6598000	98.12420	67.23630	1.340800	90.38860
Zinder	0.6569500	94.51395	69.47955	3.782800	84.22380
Barh-El-Gazel	0.6525000	96.80120	67.41683	2.260833	85.61973
Chari-Baguirmi	0.6507000	96.39190	67.50610	2.862033	82.71860
Guera	0.6451000	94.25793	68.38773	4.521300	81.49673
Sahel	0.6356500	92.65400	68.65210	3.987200	83.63785
Diffa	0.6079000	94.05595	64.57360	3.268150	80.85905
Ennedi Ouest	0.6024000	93.43470	64.46930	4.597100	80.63540

ggplot(top_20_regions, aes(x = reorder(admin1_name, mpi), y = mpi)) +
geom_bar(stat = "identity", fill = "darkgreen") +
coord_flip() +
labs(title = "Top 20 Regions with Highest MPI",
x = "Region",
y = "Multidimensional Poverty Index (MPI)") +
theme_minimal() +
theme(axis.text.y = element_text(size = 8))

Additional Analyses

Regional Disparities Analysis

regional_summary <- data_cleaned %>%
group_by(admin1_name) %>%
summarise(
mean_mpi = mean(mpi, na.rm = TRUE),
sd_mpi = sd(mpi, na.rm = TRUE),
min_mpi = min(mpi, na.rm = TRUE),
max_mpi = max(mpi, na.rm = TRUE)
)
anova_result <- aov(mpi ~ admin1_name, data = data_cleaned)
summary(anova_result)

##               Df Sum Sq Mean Sq F value Pr(>F)    
## admin1_name 1000  68.05 0.06805   5.099 <2e-16 ***
## Residuals   2164  28.88 0.01335                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Temporal Trend Analysis

temporal_trends <- data_cleaned %>%
group_by(location_code, admin1_name) %>%
arrange(reference_period_start) %>%
summarise(
start_mpi = first(mpi),
end_mpi = last(mpi),
years = as.numeric(difftime(last(reference_period_end), first(reference_period_start), units = "days")) / 365,
avg_annual_change = (end_mpi - start_mpi) / years
)

## `summarise()` has grouped output by 'location_code'. You can override using the
## `.groups` argument.

ggplot(data_cleaned, aes(x = reference_period_start, y = mpi, color = admin1_name)) +
geom_line() +
theme_minimal() +
labs(title = "MPI Trends Over Time", x = "Year", y = "MPI", color = "Region")

Decomposition Analysis

indicators <- c("mpi", "headcount_ratio", "intensity_of_deprivation")
data_cleaned %>%
gather(key = "indicator", value = "value", indicators) %>%
group_by(admin1_name, indicator) %>%
summarise(mean_value = mean(value, na.rm = TRUE)) %>%
ggplot(aes(x = admin1_name, y = mean_value, fill = indicator)) +
geom_bar(stat = "identity", position = "stack") +
theme_minimal() +
labs(title = "MPI Composition by Region", x = "Region", y = "Contribution to MPI")

## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(indicators)
## 
##   # Now:
##   data %>% select(all_of(indicators))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## `summarise()` has grouped output by 'admin1_name'. You can override using the
## `.groups` argument.

Cluster Analysis

set.seed(123)
kmeans_result <- kmeans(data_cleaned[c("mpi", "headcount_ratio", "intensity_of_deprivation")], centers = 5)
data_cleaned$cluster <- kmeans_result$cluster
ggplot(data_cleaned, aes(x = headcount_ratio, y = intensity_of_deprivation, color = factor(cluster))) +
geom_point() +
theme_minimal() +
labs(title = "Cluster Analysis of Poverty Profiles", color = "Cluster")

Inequality Analysis

gini_coefficients <- data_cleaned %>%
group_by(admin1_name) %>%
summarise(gini_mpi = ineq::Gini(mpi))
ggplot(data_cleaned, aes(x = cumsum(mpi) / sum(mpi), y = seq_along(mpi) / length(mpi))) +
geom_line() +
geom_abline(linetype = "dashed") +
theme_minimal() +
labs(title = "Lorenz Curve of MPI", x = "Cumulative Share of MPI", y = "Cumulative Share of Population")

Conclusion

This is an analysis of the Global Multidimensional Poverty Index data revealing variations in poverty levels across countries and regions, as well as over time..