In this report, we explore global poverty metrics, focusing on indicators such as the Multidimensional Poverty Index (MPI), headcount ratio, intensity of deprivation, vulnerability to poverty, and severe poverty levels using data cleaning, descriptive statistics, and visualization techniques to reveal trends, regional disparities, and correlations.
# Clean data
colnames(data) <- data[1, ] # Use the first row as column headers
data <- data[-1, ] # Remove the first row now
colnames(data) <- c(
"location_code", "has_hrp", "in_gho", "provider_admin1_name",
"admin1_code", "admin1_name", "mpi", "headcount_ratio",
"intensity_of_deprivation", "vulnerable_to_poverty",
"in_severe_poverty", "reference_period_start", "reference_period_end"
)
numeric_cols <- c("mpi", "headcount_ratio", "intensity_of_deprivation",
"vulnerable_to_poverty", "in_severe_poverty")
data[numeric_cols] <- lapply(data[numeric_cols], as.numeric)
# Display structure
str(data)
## 'data.frame': 629 obs. of 13 variables:
## $ location_code : chr "AFG" "AFG" "AFG" "AFG" ...
## $ has_hrp : chr "True" "True" "True" "True" ...
## $ in_gho : chr "True" "True" "True" "True" ...
## $ provider_admin1_name : chr "Central" "Central" "Central Highlands" "Central Highlands" ...
## $ admin1_code : chr "AFG-XXX" "AFG-XXX" "AFG-XXX" "AFG-XXX" ...
## $ admin1_name : chr "UNSPECIFIED" "UNSPECIFIED" "UNSPECIFIED" "UNSPECIFIED" ...
## $ mpi : num 0.296 0.199 0.459 0.36 0.419 ...
## $ headcount_ratio : num 54.6 39.6 77.9 67.8 75.2 ...
## $ intensity_of_deprivation: num 54.3 50.3 59 53 55.7 ...
## $ vulnerable_to_poverty : num 16.6 18.2 14.7 22.3 12.4 ...
## $ in_severe_poverty : num 31 31 53.5 53.5 46.2 ...
## $ reference_period_start : chr "2010-01-01" "2015-01-01" "2010-01-01" "2015-01-01" ...
## $ reference_period_end : chr "2011-12-31" "2016-12-31" "2011-12-31" "2016-12-31" ...
# Summary statistics
summary(data[numeric_cols])
## mpi headcount_ratio intensity_of_deprivation
## Min. :0.0002508 Min. : 0.05816 Min. :33.33
## 1st Qu.:0.0763920 1st Qu.:17.50151 1st Qu.:43.85
## Median :0.2568674 Median :50.51932 Median :50.22
## Mean :0.2828486 Mean :49.81679 Mean :51.25
## 3rd Qu.:0.4598473 3rd Qu.:80.35057 3rd Qu.:58.08
## Max. :0.7406410 Max. :99.67902 Max. :75.89
## vulnerable_to_poverty in_severe_poverty
## Min. : 0.183 Min. : 0.000
## 1st Qu.: 8.291 1st Qu.: 6.274
## Median :14.365 Median :28.545
## Mean :14.876 Mean :34.744
## 3rd Qu.:20.834 3rd Qu.:61.458
## Max. :41.093 Max. :94.187
library(ggplot2)
# Histogram of MPI
ggplot(data, aes(x = mpi)) +
geom_histogram(binwidth = 0.05, fill = "blue", alpha = 0.7) +
theme_minimal() +
labs(title = "Distribution of MPI", x = "MPI", y = "Frequency")
# Convert dates to Date type
data$reference_period_start <- as.Date(data$reference_period_start)
data$reference_period_end <- as.Date(data$reference_period_end)
# Line plot of MPI over time
ggplot(data, aes(x = reference_period_start, y = mpi, color = admin1_name)) +
geom_line() +
theme_minimal() +
labs(title = "MPI Trends Over Time", x = "Year", y = "MPI")
# Scatter plot of MPI vs. Headcount Ratio
ggplot(data, aes(x = mpi, y = headcount_ratio)) +
geom_point(alpha = 0.7) +
theme_minimal() +
labs(title = "MPI vs Headcount Ratio", x = "MPI", y = "Headcount Ratio")
Compare poverty indicators (e.g., MPI, headcount ratio) across different regions (admin1_name).
# Average MPI by region
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
regional_mpi <- data %>%
group_by(admin1_name) %>%
summarize(avg_mpi = mean(mpi, na.rm = TRUE)) %>%
arrange(desc(avg_mpi))
# Bar plot
ggplot(regional_mpi, aes(x = reorder(admin1_name, -avg_mpi), y = avg_mpi)) +
geom_bar(stat = "identity", fill = "skyblue") +
theme_minimal() +
labs(title = "Average MPI by Region", x = "Region", y = "Average MPI") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Analyze how poverty indicators (e.g., mpi, headcount_ratio) evolve over time.
# Line plot of headcount ratio over time
ggplot(data, aes(x = reference_period_start, y = headcount_ratio, color = admin1_name)) +
geom_line() +
theme_minimal() +
labs(title = "Headcount Ratio Trends Over Time", x = "Year", y = "Headcount Ratio")
Explore the relationship between poverty indicators, such as mpi, headcount_ratio, and intensity_of_deprivation.
# Correlation matrix of numeric variables
library(corrplot)
## corrplot 0.95 loaded
numeric_data <- data[numeric_cols]
cor_matrix <- cor(numeric_data, use = "complete.obs")
# Correlation plot
corrplot(cor_matrix, method = "circle", type = "upper", tl.cex = 0.8)
Identify regions or time periods with severe poverty levels.
Highlight Regions with Severe Poverty
# Filter data for severe poverty
severe_poverty <- data %>%
filter(in_severe_poverty > 50) # Example threshold for severe poverty
# Table of regions with severe poverty
severe_poverty %>%
select(admin1_name, reference_period_start, in_severe_poverty) %>%
arrange(desc(in_severe_poverty))
## admin1_name reference_period_start in_severe_poverty
## 1 UNSPECIFIED 2019-01-01 94.18713
## 2 UNSPECIFIED 2010-01-01 92.76336
## 3 UNSPECIFIED 2014-01-01 92.76336
## 4 UNSPECIFIED 2006-01-01 92.67624
## 5 UNSPECIFIED 2012-01-01 92.67624
## 6 UNSPECIFIED 2019-01-01 91.99853
## 7 UNSPECIFIED 2010-01-01 91.99088
## 8 UNSPECIFIED 2014-01-01 91.99088
## 9 UNSPECIFIED 2019-01-01 91.94398
## 10 UNSPECIFIED 2010-01-01 91.22666
## 11 UNSPECIFIED 2014-01-01 91.22666
## 12 UNSPECIFIED 2019-01-01 90.37550
## 13 UNSPECIFIED 2006-01-01 89.96588
## 14 UNSPECIFIED 2012-01-01 89.96588
## 15 UNSPECIFIED 2019-01-01 89.76591
## 16 UNSPECIFIED 2010-01-01 89.66761
## 17 UNSPECIFIED 2014-01-01 89.66761
## 18 UNSPECIFIED 2019-01-01 89.45431
## 19 UNSPECIFIED 2006-01-01 88.90052
## 20 UNSPECIFIED 2012-01-01 88.90052
## 21 UNSPECIFIED 2006-01-01 88.70196
## 22 UNSPECIFIED 2012-01-01 88.70196
## 23 UNSPECIFIED 2006-01-01 88.59160
## 24 UNSPECIFIED 2012-01-01 88.59160
## 25 UNSPECIFIED 2010-01-01 88.49968
## 26 UNSPECIFIED 2019-01-01 88.31121
## 27 UNSPECIFIED 2010-01-01 87.54323
## 28 UNSPECIFIED 2014-01-01 87.54323
## 29 UNSPECIFIED 2010-01-01 86.78815
## 30 UNSPECIFIED 2014-01-01 86.78815
## 31 UNSPECIFIED 2006-01-01 86.64694
## 32 UNSPECIFIED 2012-01-01 86.64694
## 33 UNSPECIFIED 2019-01-01 86.62932
## 34 UNSPECIFIED 2010-01-01 86.07037
## 35 UNSPECIFIED 2014-01-01 86.07037
## 36 UNSPECIFIED 2010-01-01 86.03351
## 37 UNSPECIFIED 2014-01-01 86.03351
## 38 UNSPECIFIED 2010-01-01 85.60341
## 39 UNSPECIFIED 2014-01-01 85.60341
## 40 UNSPECIFIED 2019-01-01 85.57519
## 41 UNSPECIFIED 2019-01-01 85.00178
## 42 UNSPECIFIED 2010-01-01 84.45917
## 43 UNSPECIFIED 2014-01-01 84.45917
## 44 UNSPECIFIED 2010-01-01 83.75741
## 45 UNSPECIFIED 2010-01-01 82.69446
## 46 UNSPECIFIED 2014-01-01 82.69446
## 47 UNSPECIFIED 2010-01-01 82.35354
## 48 UNSPECIFIED 2014-01-01 82.35354
## 49 UNSPECIFIED 2000-01-01 82.31025
## 50 UNSPECIFIED 2010-01-01 82.31025
## 51 UNSPECIFIED 2019-01-01 81.78364
## 52 UNSPECIFIED 2003-01-01 81.48199
## 53 UNSPECIFIED 2011-01-01 81.48199
## 54 UNSPECIFIED 2006-01-01 80.63445
## 55 UNSPECIFIED 2015-01-01 80.63445
## 56 UNSPECIFIED 2000-01-01 80.11763
## 57 UNSPECIFIED 2010-01-01 80.11763
## 58 UNSPECIFIED 2003-01-01 79.32308
## 59 UNSPECIFIED 2011-01-01 79.32308
## 60 UNSPECIFIED 2010-01-01 78.64556
## 61 UNSPECIFIED 2014-01-01 78.64556
## 62 UNSPECIFIED 2019-01-01 78.63210
## 63 UNSPECIFIED 2000-01-01 77.95217
## 64 UNSPECIFIED 2010-01-01 77.95217
## 65 UNSPECIFIED 2000-01-01 77.80555
## 66 UNSPECIFIED 2010-01-01 77.80555
## 67 UNSPECIFIED 2010-01-01 77.56249
## 68 UNSPECIFIED 2014-01-01 77.56249
## 69 UNSPECIFIED 2000-01-01 77.52427
## 70 UNSPECIFIED 2010-01-01 77.52427
## 71 UNSPECIFIED 2003-01-01 77.41421
## 72 UNSPECIFIED 2011-01-01 77.41421
## 73 UNSPECIFIED 2003-01-01 77.20384
## 74 UNSPECIFIED 2011-01-01 77.20384
## 75 UNSPECIFIED 2010-01-01 76.77138
## 76 UNSPECIFIED 2010-01-01 76.17450
## 77 UNSPECIFIED 2014-01-01 76.17450
## 78 UNSPECIFIED 2018-01-01 75.61750
## 79 UNSPECIFIED 2003-01-01 74.58991
## 80 UNSPECIFIED 2011-01-01 74.58991
## 81 UNSPECIFIED 2010-01-01 72.95418
## 82 UNSPECIFIED 2011-01-01 72.94791
## 83 UNSPECIFIED 2016-01-01 72.94791
## 84 UNSPECIFIED 2010-01-01 72.84281
## 85 UNSPECIFIED 2018-01-01 72.64809
## 86 UNSPECIFIED 2006-01-01 72.61611
## 87 UNSPECIFIED 2015-01-01 72.61611
## 88 UNSPECIFIED 2010-01-01 72.23191
## 89 UNSPECIFIED 2010-01-01 71.79485
## 90 UNSPECIFIED 2014-01-01 71.79485
## 91 UNSPECIFIED 2011-01-01 71.66309
## 92 UNSPECIFIED 2016-01-01 71.66309
## 93 UNSPECIFIED 2010-01-01 71.48906
## 94 UNSPECIFIED 2007-01-01 71.47957
## 95 UNSPECIFIED 2013-01-01 71.47957
## 96 UNSPECIFIED 2019-01-01 71.32949
## 97 UNSPECIFIED 2018-01-01 71.19439
## 98 UNSPECIFIED 2006-01-01 70.91844
## 99 UNSPECIFIED 2015-01-01 70.91844
## 100 UNSPECIFIED 2006-01-01 70.70520
## 101 UNSPECIFIED 2015-01-01 70.70520
## 102 UNSPECIFIED 2018-01-01 70.68489
## 103 UNSPECIFIED 2000-01-01 70.61557
## 104 UNSPECIFIED 2010-01-01 70.61557
## 105 UNSPECIFIED 2018-01-01 70.40144
## 106 UNSPECIFIED 2019-01-01 69.62147
## 107 UNSPECIFIED 2011-01-01 69.44511
## 108 UNSPECIFIED 2016-01-01 69.44511
## 109 UNSPECIFIED 2010-01-01 68.55777
## 110 UNSPECIFIED 2014-01-01 68.55777
## 111 UNSPECIFIED 2007-01-01 68.53120
## 112 UNSPECIFIED 2013-01-01 68.53120
## 113 UNSPECIFIED 2013-01-01 68.48426
## 114 UNSPECIFIED 2016-01-01 68.48426
## 115 UNSPECIFIED 2011-01-01 68.23656
## 116 UNSPECIFIED 2016-01-01 68.23656
## 117 UNSPECIFIED 2010-01-01 68.18000
## 118 UNSPECIFIED 2015-01-01 68.18000
## 119 UNSPECIFIED 2010-01-01 68.15199
## 120 UNSPECIFIED 2014-01-01 68.15199
## 121 UNSPECIFIED 2019-01-01 67.87388
## 122 UNSPECIFIED 2019-01-01 67.80797
## 123 UNSPECIFIED 2010-01-01 66.85831
## 124 UNSPECIFIED 2011-01-01 66.76200
## 125 UNSPECIFIED 2014-01-01 66.76200
## 126 UNSPECIFIED 2010-01-01 66.69556
## 127 UNSPECIFIED 2006-01-01 66.63097
## 128 UNSPECIFIED 2015-01-01 66.63097
## 129 UNSPECIFIED 2010-01-01 65.66816
## 130 UNSPECIFIED 2003-01-01 65.47248
## 131 UNSPECIFIED 2011-01-01 65.47248
## 132 UNSPECIFIED 2003-01-01 65.27379
## 133 UNSPECIFIED 2011-01-01 65.27379
## 134 UNSPECIFIED 2011-01-01 64.92234
## 135 UNSPECIFIED 2016-01-01 64.92234
## 136 UNSPECIFIED 2019-01-01 64.51530
## 137 UNSPECIFIED 2018-01-01 64.34212
## 138 UNSPECIFIED 2018-01-01 64.30396
## 139 UNSPECIFIED 2007-01-01 64.26486
## 140 UNSPECIFIED 2013-01-01 64.26486
## 141 UNSPECIFIED 2006-01-01 64.15202
## 142 UNSPECIFIED 2015-01-01 64.15202
## 143 UNSPECIFIED 2007-01-01 63.44816
## 144 UNSPECIFIED 2013-01-01 63.44816
## 145 UNSPECIFIED 2010-01-01 63.41221
## 146 UNSPECIFIED 2014-01-01 63.41221
## 147 UNSPECIFIED 2011-01-01 63.24015
## 148 UNSPECIFIED 2016-01-01 63.24015
## 149 UNSPECIFIED 2010-01-01 63.07241
## 150 UNSPECIFIED 2014-01-01 62.86707
## 151 UNSPECIFIED 2003-01-01 62.48232
## 152 UNSPECIFIED 2011-01-01 62.48232
## 153 UNSPECIFIED 2007-01-01 62.31233
## 154 UNSPECIFIED 2013-01-01 62.31233
## 155 UNSPECIFIED 2019-01-01 62.03584
## 156 UNSPECIFIED 2006-01-01 61.56455
## 157 UNSPECIFIED 2015-01-01 61.56455
## 158 UNSPECIFIED 2003-01-01 61.45777
## 159 UNSPECIFIED 2011-01-01 61.45777
## 160 UNSPECIFIED 2018-01-01 61.21812
## 161 UNSPECIFIED 2006-01-01 61.07919
## 162 UNSPECIFIED 2012-01-01 61.07919
## 163 UNSPECIFIED 2013-01-01 60.19625
## 164 UNSPECIFIED 2016-01-01 60.19625
## 165 UNSPECIFIED 2021-01-01 59.93430
## 166 UNSPECIFIED 2011-01-01 59.49878
## 167 UNSPECIFIED 2016-01-01 59.49878
## 168 UNSPECIFIED 2021-01-01 59.08237
## 169 UNSPECIFIED 2018-01-01 58.93027
## 170 UNSPECIFIED 2019-01-01 58.85009
## 171 UNSPECIFIED 2018-01-01 58.33613
## 172 UNSPECIFIED 2013-01-01 57.75579
## 173 UNSPECIFIED 2016-01-01 57.75579
## 174 UNSPECIFIED 2019-01-01 57.59431
## 175 UNSPECIFIED 2010-01-01 56.41344
## 176 UNSPECIFIED 2007-01-01 55.69084
## 177 UNSPECIFIED 2013-01-01 55.69084
## 178 UNSPECIFIED 2019-01-01 55.66616
## 179 UNSPECIFIED 2019-01-01 55.63552
## 180 UNSPECIFIED 2010-01-01 55.55790
## 181 UNSPECIFIED 2015-01-01 55.55790
## 182 UNSPECIFIED 2014-01-01 55.41747
## 183 UNSPECIFIED 2017-01-01 54.91716
## 184 UNSPECIFIED 2017-01-01 54.33861
## 185 UNSPECIFIED 2014-01-01 54.30882
## 186 UNSPECIFIED 2018-01-01 54.29043
## 187 UNSPECIFIED 2014-01-01 54.05859
## 188 UNSPECIFIED 2011-01-01 53.99309
## 189 UNSPECIFIED 2014-01-01 53.99309
## 190 UNSPECIFIED 2021-01-01 53.97297
## 191 UNSPECIFIED 2013-01-01 53.73095
## 192 UNSPECIFIED 2010-01-01 53.68111
## 193 UNSPECIFIED 2015-01-01 53.68111
## 194 UNSPECIFIED 2007-01-01 53.61741
## 195 UNSPECIFIED 2013-01-01 53.61741
## 196 UNSPECIFIED 2010-01-01 53.53306
## 197 UNSPECIFIED 2015-01-01 53.53306
## 198 UNSPECIFIED 2019-01-01 53.24220
## 199 UNSPECIFIED 2010-01-01 52.61413
## 200 UNSPECIFIED 2015-01-01 52.61413
## 201 UNSPECIFIED 2013-01-01 52.20366
## 202 UNSPECIFIED 2016-01-01 52.20366
## 203 UNSPECIFIED 2013-01-01 51.95842
## 204 UNSPECIFIED 2016-01-01 51.95842
## 205 UNSPECIFIED 2018-01-01 51.24005
## 206 UNSPECIFIED 2014-01-01 51.22190
## 207 UNSPECIFIED 2005-01-01 50.98148
## 208 UNSPECIFIED 2011-01-01 50.98148
## 209 UNSPECIFIED 2013-01-01 50.79729
## 210 UNSPECIFIED 2016-01-01 50.79729
## 211 UNSPECIFIED 2018-01-01 50.52719
## 212 UNSPECIFIED 2018-01-01 50.43653
## 213 UNSPECIFIED 2018-01-01 50.20177
Analyze the percentage of people vulnerable to poverty (vulnerable_to_poverty). ## Vulnerability to Poverty by Region
# Box plot for vulnerable populations by region
ggplot(data, aes(x = admin1_name, y = vulnerable_to_poverty)) +
geom_boxplot(fill = "lightgreen", alpha = 0.7) +
theme_minimal() +
labs(title = "Vulnerability to Poverty by Region", x = "Region", y = "Vulnerability (%)") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Detect extreme values for poverty metrics.
# Boxplot for MPI to detect outliers
ggplot(data, aes(x = "", y = mpi)) +
geom_boxplot(fill = "orange", alpha = 0.7) +
theme_minimal() +
labs(title = "Outliers in MPI", x = "", y = "MPI")
Generate an integrated dashboard with multiple visualizations using the flexdashboard or shiny package.
# Example: Shiny Dashboard
library(shiny)
library(ggplot2)
ui <- fluidPage(
titlePanel("Poverty Indicators Dashboard"),
sidebarLayout(
sidebarPanel(
selectInput("indicator", "Select Indicator", choices = numeric_cols)
),
mainPanel(
plotOutput("trendPlot")
)
)
)
server <- function(input, output) {
output$trendPlot <- renderPlot({
ggplot(data, aes(x = reference_period_start, y = !!sym(input$indicator), color = admin1_name)) +
geom_line() +
theme_minimal() +
labs(title = paste(input$indicator, "Trends Over Time"), x = "Year", y = input$indicator)
})
}
shinyApp(ui = ui, server = server)