Suggested citation: > Miranti, Ragdad Cani.(2020). Regional Poverty Convergence across Districts in Indonesia: A Distribution Dynamics Approach https://rpubs.com/canimiranti/distribution_dynamics_poverty514districts
This work is licensed under the Creative Commons Attribution-Non Commercial-Share Alike 4.0 International License.
Data of Headcount Index (Poverty Rate) are derived from the Indonesia Central Bureau of Statistics (Badan Pusat Statistik Republik of Indonesia). https://www.bps.go.id/
Acknowledgment:
Material adapted from multiple sources, in particular from Magrini (2007).
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(skimr)
library(kableExtra) # html tables
library(pdfCluster) # density based clusters
library(hdrcde) # conditional density estimation
library(plotly)
library(intoo)
library(barsurf)
library(bivariate)
library(np)
library(quantreg)
library(basetheme)
basetheme("minimal")
library(viridis)
library(ggpointdensity)
library(isoband)
#library(MASS)
library(KernSmooth)
# Change the presentation of decimal numbers to 4 and avoid scientific notation
options(prompt="R> ", digits=4, scipen=7)Study the dynamics of univariate densities
Compute the bandwidth of a density
Study mobility plots
Study bi-variate densities
Study density-based clustering methods
Study conditional bi-variate densities
Since the data is in relative terms, let us rename the variables and add new variables.
dat <- dat %>%
mutate(
rel_pov2010 = pov2010/mean(pov2010),
rel_pov2018 = pov2010/mean(pov2018)
)
dat| Name | dat |
| Number of rows | 514 |
| Number of columns | 13 |
| _______________________ | |
| Column type frequency: | |
| character | 1 |
| numeric | 12 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| District | 0 | 1 | 4 | 26 | 0 | 514 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| Code | 0 | 1 | 4558.30 | 2678.26 | 911.00 | 1803.25 | 3550.00 | 7173.75 | 9471.00 | ▇▆▂▇▃ |
| pov2010 | 0 | 1 | 15.62 | 9.42 | 1.67 | 9.01 | 13.34 | 19.56 | 49.58 | ▇▇▂▁▁ |
| pov2011 | 0 | 1 | 14.64 | 8.92 | 1.50 | 8.14 | 12.44 | 18.91 | 47.44 | ▇▇▂▁▁ |
| pov2012 | 0 | 1 | 13.92 | 8.55 | 1.33 | 7.79 | 11.77 | 17.78 | 45.92 | ▇▇▂▁▁ |
| pov2013 | 0 | 1 | 13.83 | 8.59 | 1.75 | 7.76 | 11.74 | 17.41 | 47.52 | ▇▆▂▁▁ |
| pov2014 | 0 | 1 | 13.12 | 7.93 | 1.68 | 7.38 | 11.18 | 16.60 | 44.49 | ▇▆▂▁▁ |
| pov2015 | 0 | 1 | 13.51 | 8.25 | 1.69 | 7.59 | 11.58 | 16.87 | 45.74 | ▇▇▂▁▁ |
| pov2016 | 0 | 1 | 13.18 | 8.20 | 1.67 | 7.40 | 11.25 | 16.29 | 45.11 | ▇▇▂▁▁ |
| pov2017 | 0 | 1 | 12.97 | 7.98 | 1.76 | 7.35 | 11.14 | 16.04 | 43.63 | ▇▇▂▁▁ |
| pov2018 | 0 | 1 | 12.34 | 7.84 | 1.68 | 6.99 | 10.21 | 15.08 | 43.49 | ▇▆▂▁▁ |
| rel_pov2010 | 0 | 1 | 1.00 | 0.60 | 0.11 | 0.58 | 0.85 | 1.25 | 3.18 | ▇▇▂▁▁ |
| rel_pov2018 | 0 | 1 | 1.27 | 0.76 | 0.14 | 0.73 | 1.08 | 1.59 | 4.02 | ▇▇▂▁▁ |
Select bandwidth based on function dpik from the package KernSmooth
## [1] 0.1026
## [1] 0.1299
dis_rel_pov2010 <- bkde(dat$rel_pov2010, bandwidth = h_rel_pov2010)
dis_rel_pov2010 <- as.data.frame(dis_rel_pov2010)
ggplot(dis_rel_pov2010, aes(x, y)) + geom_line() +
theme_minimal() dis_rel_pov2018 <- bkde(dat$rel_pov2018, bandwidth = h_rel_pov2018)
dis_rel_pov2018 <- as.data.frame(dis_rel_pov2018)
ggplot(dis_rel_pov2018, aes(x, y)) + geom_line() +
theme_minimal() There are two methods for plot both densities,i.e. Kernsmooth and bandwith default of ggplot. I prefer using the gglot ( Method 2).
Using the bandwidth default of ggplot Manual labels are not yet implemented in the ggplotly function
rel_pov2010 <- dat %>%
select(rel_pov2010) %>%
rename(rel_var = rel_pov2010) %>%
mutate(year = 2010)rel_pov2018 <- dat %>%
select(rel_pov2018) %>%
rename(rel_var = rel_pov2018) %>%
mutate(year = 2018)rel_pov2010pov2018 <- rel_pov2010pov2018 %>%
mutate(year = as.factor(year))
head(rel_pov2010pov2018)dis_rel_pov2010pov2018 <- ggplot(rel_pov2010pov2018,aes(x=rel_var, color=year)) +
geom_density() +
theme_minimal()
dis_rel_pov2010pov2018Using plotly
dat %>%
ggplot(aes(x = rel_pov2010, y = rel_pov2018)) +
geom_point(alpha=0.5) +
geom_abline(aes(intercept = 0, slope = 1)) +
geom_hline(yintercept = 1, linetype="dashed") +
geom_vline(xintercept = 1, linetype="dashed") +
theme_minimal() +
labs(subtitle = "Relative Pov2018",
x = "Relative Pov2010",
y = "") +
theme(text=element_text(family="Palatino")) Fit a non-linear function
dat %>%
ggplot(aes(x = rel_pov2010, y = rel_pov2018)) +
geom_point(alpha=0.5) +
geom_smooth() +
geom_abline(aes(intercept = 0, slope = 1)) +
geom_hline(yintercept = 1, linetype="dashed") +
geom_vline(xintercept = 1, linetype="dashed") +
theme_minimal() +
labs(subtitle = "Relative pov2018",
x = "Relative pov2010",
y = "") +
theme(text=element_text(family="Palatino")) ## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Not that the nonlinear fit crosses the 45-degree line two times from above.
plot(bivariate,
xlab="Relative Poverty 2010",
ylab="Relative Poverty 2018")
abline(a=0, b=1)
abline(h=1, v=1)dat %>%
ggplot(aes(x = rel_pov2010, y = rel_pov2018)) +
geom_point(color = "lightgray") +
geom_smooth() +
#geom_smooth(method=lm, se=FALSE) +
stat_density_2d() +
geom_abline(aes(intercept = 0, slope = 1)) +
geom_hline(yintercept = 1, linetype="dashed") +
geom_vline(xintercept = 1, linetype="dashed") +
theme_minimal() +
labs(subtitle = "Relative pov2018",
x = "Relative pov2010",
y = "") +
theme(text=element_text(family="Palatino")) ## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
dat %>%
ggplot(aes(x = rel_pov2010, y = rel_pov2018)) +
stat_density_2d(aes(fill = stat(nlevel)), geom = "polygon") +
scale_fill_viridis_c() +
geom_abline(aes(intercept = 0, slope = 1)) +
geom_hline(yintercept = 1, linetype="dashed") +
geom_vline(xintercept = 1, linetype="dashed") +
theme_minimal() +
labs(x = "Relative Poverty Rate 2010",
y = "Relative Poverty Rate 2018") +
theme(text=element_text(size=8, family="Palatino"))There are two ways of analyzing the use of stochastic kernel package: 1. Contour Plots and 2. Surface Plots
hdrcde packageIncrease the number of intervals to 60
High density regions
np packageCompute adaptive bandwith based on cross-validation
##
Multistart 1 of 2 |
Multistart 1 of 2 |
Multistart 1 of 2 |
Multistart 1 of 2 /
Multistart 1 of 2 |
Multistart 1 of 2 |
Multistart 2 of 2 |
Multistart 2 of 2 |
Multistart 2 of 2 /
Multistart 2 of 2 -
Multistart 2 of 2 |
Multistart 2 of 2 |
##
## Conditional density data (514 observations, 2 variable(s))
## (1 dependent variable(s), and 1 explanatory variable(s))
##
## Bandwidth Selection Method: Maximum Likelihood Cross-Validation
## Formula: dat$rel_pov2018 ~ dat$rel_pov2010
## Bandwidth Type: Adaptive Nearest Neighbour
## Objective Function Value: 2166 (achieved on multistart 1)
##
## Exp. Var. Name: dat$rel_pov2010 Bandwidth: 1
##
## Dep. Var. Name: dat$rel_pov2018 Bandwidth: 1
##
## Continuous Kernel Type: Second-Order Gaussian
## No. Continuous Explanatory Vars.: 1
## No. Continuous Dependent Vars.: 1
## Estimation Time: 13.48 seconds
Compute conditional density object
##
## Conditional Density Data: 514 training points, in 2 variable(s)
## (1 dependent variable(s), and 1 explanatory variable(s))
##
## dat$rel_pov2018
## Dep. Var. Bandwidth(s): 1
## dat$rel_pov2010
## Exp. Var. Bandwidth(s): 1
##
## Bandwidth Type: Adaptive Nearest Neighbour
## Log Likelihood: 2534
##
## Continuous Kernel Type: Second-Order Gaussian
## No. Continuous Explanatory Vars.: 1
## No. Continuous Dependent Vars.: 1
## An S4 object of class "pdfCluster"
##
## Call: pdfCluster(x = pov)
##
## Initial groupings:
## label 1 2 3 4 NA
## count 458 15 7 4 30
##
## Final groupings:
## label 1 2 3 4
## count 461 28 16 9
##
## Groups tree (here 'h' denotes 'height'):
## --[dendrogram w/ 1 branches and 4 members at h = 1]
## `--[dendrogram w/ 3 branches and 4 members at h = 0.955]
## |--[dendrogram w/ 2 branches and 2 members at h = 0.927]
## | |--leaf "1 "
## | `--leaf "2 " (h= 0.882 )
## |--leaf "4 " (h= 0.945 )
## `--leaf "3 " (h= 0.918 )
fig1 <- cde(dat$pov2010, dat$pov2018,
x.name="Poverty Rate in 2010",
y.name="Poverty Rate in 2018")
plot(fig1)## null device
## 1
plot_sc_pov <- plot(cl.pov,
stage = 0,
which = 3,
frame = FALSE)
# 1. Create the plot
plot(cl.pov,
stage = 0,
which = 3,
frame = FALSE)## null device
## 1
## null device
## 1
Mendez C. (2020). Classical sigma and beta convergence analysis in R: Using the REAT 2.1 Package. R Studio/RPubs. Available at https://rpubs.com/quarcs-lab/classical-convergence-reat21
Mendez C. (2020). Univariate distribution dynamics in R: Using the ggridges package. R Studio/RPubs. Available at https://rpubs.com/quarcs-lab/univariate-distribution-dynamics
Mendez, C. (2019). Overall efficiency, pure technical efficiency, and scale efficiency across provinces in Indonesia 1990 and 2010. R Studio/RPubs. Available at https://rpubs.com/quarcs-lab/efficiency-clusters-indonesia-1990-2010
END