This report analyzes the Global Multidimensional Poverty Index (MPI) data for various countries and regions around the world.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
library(knitr)
library(scales)
Load the dataset
data <- read.csv("hdx_hapi_poverty_rate_global.csv", stringsAsFactors = FALSE)
Clean the dataset
data_cleaned <- data[-1, ]
colnames(data_cleaned) <- c('location_code', 'has_hrp', 'in_gho', 'provider_admin1_name', 'admin1_code', 'admin1_name', 'mpi', 'headcount_ratio', 'intensity_of_deprivation', 'vulnerable_to_poverty', 'in_severe_poverty', 'reference_period_start', 'reference_period_end')
Convert numeric columns to appropriate data types
numeric_columns <- c('mpi', 'headcount_ratio', 'intensity_of_deprivation', 'vulnerable_to_poverty', 'in_severe_poverty')
data_cleaned[numeric_columns] <- lapply(data_cleaned[numeric_columns], as.numeric)
Convert date columns to Date format
data_cleaned$reference_period_start <- as.Date(data_cleaned$reference_period_start)
data_cleaned$reference_period_end <- as.Date(data_cleaned$reference_period_end)
summary_statistics <- summary(data_cleaned[numeric_columns])
kable(summary_statistics)
mpi | headcount_ratio | intensity_of_deprivation | vulnerable_to_poverty | in_severe_poverty | |
---|---|---|---|---|---|
Min. :0.0000 | Min. : 0.000 | Min. : 0.00 | Min. : 0.000 | Min. : 0.0000 | |
1st Qu.:0.0244 | 1st Qu.: 6.168 | 1st Qu.:39.60 | 1st Qu.: 7.924 | 1st Qu.: 0.8403 | |
Median :0.1114 | Median :25.771 | Median :44.26 | Median :15.203 | Median : 7.2929 | |
Mean :0.1772 | Mean :34.124 | Mean :45.45 | Mean :15.284 | Mean :17.9962 | |
3rd Qu.:0.3021 | 3rd Qu.:60.373 | 3rd Qu.:50.65 | 3rd Qu.:21.586 | 3rd Qu.:30.3959 | |
Max. :0.7384 | Max. :99.492 | Max. :75.89 | Max. :50.182 | Max. :94.1871 |
country_summary <- data_cleaned %>%
group_by(location_code) %>%
summarise(across(all_of(numeric_columns), mean, na.rm = TRUE))
## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `across(all_of(numeric_columns), mean, na.rm = TRUE)`.
## ℹ In group 1: `location_code = "AFG"`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
##
## # Previously
## across(a:b, mean, na.rm = TRUE)
##
## # Now
## across(a:b, \(x) mean(x, na.rm = TRUE))
top_10_mpi <- country_summary %>%
arrange(desc(mpi)) %>%
head(10)
kable(top_10_mpi)
location_code | mpi | headcount_ratio | intensity_of_deprivation | vulnerable_to_poverty | in_severe_poverty |
---|---|---|---|---|---|
TCD | 0.6016106 | 91.17772 | 65.43287 | 6.021026 | 75.62268 |
NER | 0.5840333 | 86.73578 | 66.26881 | 6.285494 | 73.88559 |
CAF | 0.4812875 | 81.79876 | 57.81771 | 12.028612 | 56.96986 |
BFA | 0.4523393 | 77.59735 | 57.07156 | 10.758389 | 54.13713 |
MLI | 0.4380071 | 76.17523 | 56.35877 | 11.521079 | 54.06448 |
MDG | 0.4156371 | 73.61946 | 55.69504 | 13.203784 | 51.15671 |
BDI | 0.3993667 | 73.53615 | 53.53338 | 16.032307 | 44.61298 |
GIN | 0.3915037 | 68.89243 | 55.52337 | 14.826467 | 46.19869 |
COD | 0.3903879 | 73.83552 | 52.37478 | 16.288131 | 45.14210 |
MOZ | 0.3785306 | 66.33908 | 54.61580 | 14.225853 | 44.76680 |
ggplot(top_10_mpi, aes(x = reorder(location_code, mpi), y = mpi)) +
geom_bar(stat = "identity", fill = "steelblue") +
coord_flip() +
labs(title = "Top 10 Countries with Highest MPI",
x = "Country Code",
y = "Multidimensional Poverty Index (MPI)") +
theme_minimal()
correlation_matrix <- cor(data_cleaned[numeric_columns], use = "complete.obs")
kable(correlation_matrix, digits = 3)
mpi | headcount_ratio | intensity_of_deprivation | vulnerable_to_poverty | in_severe_poverty | |
---|---|---|---|---|---|
mpi | 1.000 | 0.986 | 0.894 | 0.129 | 0.985 |
headcount_ratio | 0.986 | 1.000 | 0.870 | 0.244 | 0.948 |
intensity_of_deprivation | 0.894 | 0.870 | 1.000 | 0.160 | 0.886 |
vulnerable_to_poverty | 0.129 | 0.244 | 0.160 | 1.000 | -0.008 |
in_severe_poverty | 0.985 | 0.948 | 0.886 | -0.008 | 1.000 |
ggplot(data = reshape2::melt(correlation_matrix), aes(x = Var1, y = Var2, fill = value)) +
geom_tile() +
geom_text(aes(label = round(value, 2)), color = "white", size = 3) +
scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Correlation Heatmap of Poverty Indicators")
time_series_data <- data_cleaned %>%
group_by(reference_period_start) %>%
summarise(across(all_of(numeric_columns), mean, na.rm = TRUE))
ggplot(time_series_data, aes(x = reference_period_start, y = mpi)) +
geom_line() +
geom_point() +
labs(title = "Global MPI Trend Over Time",
x = "Year",
y = "Average Multidimensional Poverty Index (MPI)") +
theme_minimal()
## Conclusion
an overview of the Global Multidimensional Poverty Index data.