Stata and R PIP clients

Accessing the World Bank’s estimates of poverty and inequality

The pip Stata client (wrapper)

Installation

SSC

ssc install pip


Github

net install github, from("https://haghish.github.io/github/")  
github install worldbank/pip

Installation

SSC

ssc install pip


Github

net install github, from("https://haghish.github.io/github/")  
github install worldbank/pip, version(0.9.5.9003)

Country-level estimates

/* To query poverty at $2.15-a-day poverty line for 
all countries in all survey years*/

pip, clear country(all) year(all) povline(2.15)

* Default 
pip, clear

Variables

Country-level estimates

/* To query poverty at \$2.15-a-day poverty line 
for Morocco in 2013 */

pip, country(mar) year(2013) clear

Country-level estimates

/* For extrapolated and interpolated data that 
underpin the global and regional poverty numbers, use 
`fillgaps` option */

pip, country(mar) year(2019) clear fillgaps

Global and regional poverty estimates

pip wb, clear

Global and regional poverty estimates

pip wb, clear

/* Query a particular region or global values with `region()` */

pip wb, clear region(wld)
pip wb, clear region(lac)

Poverty lines

/*Query poverty at other thresholds*/

pip, country(mar) year(2019) clear ///
fillgaps povline(6.85)

Poverty lines

/*Or multiple thresholds*/

pip, country(mar) year(2019) clear ///
fillgaps povline(2.15 3.65 6.85 10) 

Data availability

pip info, clear

Data availability

pip, country(mar) year(2019) clear

Replicability

pip version, clear

Data for different PPP years

pip, country(COL) ppp_year(2017) clear
pip, country(COL) ppp_year(2011) clear

Towards distributional analysis

pip, country(mar) year(2019) clear fillgaps popshare(0.5)

Towards distributional analysis

numlist "0.01(.01)1"
pip, country(mar) year(2019) clear fillgaps ///
    popshare(`r(numlist)') n2disp(6)

PIP Percentiles data

Auxiliary Data

pip tables, clear

Alternatively

pip tables, table(cpi) clear

pip tables, table(interpolated_means) clear

Help file

R Client (wrapper)

Installation

# install.packages("devtools")
devtools::install_github("worldbank/pipr")

Webpage

https://worldbank.github.io/pipr/

Country-level estimates

pipr::get_stats() |> 
  select(c(country_code, year, poverty_line, headcount))
# A tibble: 2,320 × 4
   country_code  year poverty_line headcount
   <chr>        <dbl>        <dbl>     <dbl>
 1 AGO           2000         2.15   0.214  
 2 AGO           2008         2.15   0.146  
 3 AGO           2018         2.15   0.311  
 4 ALB           1996         2.15   0.00535
 5 ALB           2002         2.15   0.0109 
 6 ALB           2005         2.15   0.00591
 7 ALB           2008         2.15   0.00200
 8 ALB           2012         2.15   0.00621
 9 ALB           2014         2.15   0.0102 
10 ALB           2015         2.15   0.00121
# ℹ 2,310 more rows

Country-level estimates

pipr::get_stats(country = "MAR") |> 
  select(c(country_code, year, poverty_line, headcount))
# A tibble: 6 × 4
  country_code  year poverty_line headcount
  <chr>        <dbl>        <dbl>     <dbl>
1 MAR           1984         2.15    0.132 
2 MAR           1990         2.15    0.0473
3 MAR           1998         2.15    0.0838
4 MAR           2000         2.15    0.0793
5 MAR           2006         2.15    0.0424
6 MAR           2013         2.15    0.0144

Country-level estimates

pipr::get_stats(country = c("COL", "MAR")) |> 
  arrange(year) |> 
  select(c(country_code, year, poverty_line, headcount))
# A tibble: 33 × 4
   country_code  year poverty_line headcount
   <chr>        <dbl>        <dbl>     <dbl>
 1 COL           1980         2.15    0.208 
 2 MAR           1984         2.15    0.132 
 3 COL           1988         2.15    0.152 
 4 COL           1989         2.15    0.142 
 5 MAR           1990         2.15    0.0473
 6 COL           1991         2.15    0.134 
 7 COL           1992         2.15    0.0956
 8 COL           1996         2.15    0.176 
 9 MAR           1998         2.15    0.0838
10 COL           1999         2.15    0.214 
# ℹ 23 more rows

Country-level estimates

pipr::get_stats(country = "COL", year = c(2012, 2018))  |> 
  select(c(country_code, year, poverty_line, headcount))
# A tibble: 2 × 4
  country_code  year poverty_line headcount
  <chr>        <dbl>        <dbl>     <dbl>
1 COL           2012         2.15    0.0672
2 COL           2018         2.15    0.0450

Interporlated and Extrapolated values

pipr::get_stats(country = "HTI")  |> 
  select(c(country_code, year, poverty_line, headcount))
# A tibble: 3 × 4
  country_code  year poverty_line headcount
  <chr>        <dbl>        <dbl>     <dbl>
1 HTI           2001         2.15     0.678
2 HTI           2012         2.15     0.292
3 HTI           2012         2.15     0.579

Interporlated and Extrapolated values

pipr::get_stats(country = "HTI", fill_gaps = TRUE)   |> 
  select(c(country_code, year, poverty_line, headcount))
# A tibble: 41 × 4
   country_code  year poverty_line headcount
   <chr>        <dbl>        <dbl>     <dbl>
 1 HTI           1981         2.15     0.513
 2 HTI           1982         2.15     0.532
 3 HTI           1983         2.15     0.538
 4 HTI           1984         2.15     0.545
 5 HTI           1985         2.15     0.550
 6 HTI           1986         2.15     0.563
 7 HTI           1987         2.15     0.572
 8 HTI           1988         2.15     0.763
 9 HTI           1989         2.15     0.778
10 HTI           1990         2.15     0.775
# ℹ 31 more rows

Poverty line

pipr::get_stats(country = "COL", povline = 3)  |> 
  select(c(country_code, year, poverty_line, headcount))
# A tibble: 27 × 4
   country_code  year poverty_line headcount
   <chr>        <dbl>        <dbl>     <dbl>
 1 COL           1980            3     0.299
 2 COL           1988            3     0.234
 3 COL           1989            3     0.231
 4 COL           1991            3     0.214
 5 COL           1992            3     0.175
 6 COL           1996            3     0.270
 7 COL           1999            3     0.309
 8 COL           2000            3     0.258
 9 COL           2001            3     0.319
10 COL           2002            3     0.231
# ℹ 17 more rows

Multiple Poverty lines

povlines <- c(2, 4, 6, 10)
map_df(.x      = povlines, 
       .f      = pipr::get_stats, 
       country = "COL",
       year    = 2012) |> 
  select(c(country_code, year, poverty_line, headcount))
# A tibble: 4 × 4
  country_code  year poverty_line headcount
  <chr>        <dbl>        <dbl>     <dbl>
1 COL           2012            2    0.0588
2 COL           2012            4    0.194 
3 COL           2012            6    0.344 
4 COL           2012           10    0.563 

Global estimates

pipr::get_wb()  |> 
  arrange(year) |> 
  select(c(region_code, year, poverty_line, headcount))
# A tibble: 347 × 4
   region_code  year poverty_line headcount
   <chr>       <dbl>        <dbl>     <dbl>
 1 OHI          1981          1.9   0.00518
 2 LAC          1981          1.9   0.126  
 3 SAS          1981          1.9   0.487  
 4 EAP          1981          1.9   0.787  
 5 WLD          1981          1.9   0.393  
 6 OHI          1982          1.9   0.00564
 7 LAC          1982          1.9   0.132  
 8 SAS          1982          1.9   0.483  
 9 EAP          1982          1.9   0.763  
10 WLD          1982          1.9   0.388  
# ℹ 337 more rows

Many more functions

# Global poverty trends 1990-2019
df <- pipr::get_wb() |>
  filter(year > 1989, region_code == "WLD") |>
  mutate(
    pop_in_poverty = round(pop_in_poverty / 1000000, 0),
    headcount = round(headcount, 3)
  )

headcount_col <- "#E69F00"

gr <- ggplot(df, aes(x = year)) +
  geom_text(aes(label = headcount * 100, 
                y     = headcount), 
            vjust   = 1, 
            nudge_y = -0.02, 
            color   = headcount_col) +
  geom_text(aes(label = pop_in_poverty, 
                y     = pop_in_poverty / 5000), 
            vjust   = 0, 
            nudge_y = 0.02) +
  geom_line(aes(y  = headcount), color = headcount_col) +
  geom_line(aes(y  = pop_in_poverty / 5000)) +
  geom_point(aes(y = headcount), color = headcount_col) +
  geom_point(aes(y = pop_in_poverty / 5000)) +
  scale_y_continuous(
    labels = scales::percent,
    limits = c(0, 0.5),
    breaks = c(0, 0.1, 0.2, 0.3, 0.4),
    sec.axis = sec_axis(~.*5000, name = "Number of poor (million)",
                        breaks = c(0, 500, 1000, 1500, 2000))) +
  labs(
    title = "Global poverty trends 1990-2019",
    y = "Poverty rate (%)",
    x = ""
  ) +
  theme_classic()

# Number of poor by region
df <- pipr::get_wb() |>
  filter(year > 1989 & year < 2019) |>
  mutate(
    pop_in_poverty = round(pop_in_poverty / 1000000, 0),
    headcount = round(headcount, 3)
  )

regions <- df |>
  filter(!region_code %in% c("WLD", "AFE", "AFW")) |>
  mutate(
    region_name = fct_relevel(region_name,
                               c("Other high Income",
                                 "Europe and Central Asia",
                                 "Middle East and North Africa",
                                 "Latin America and the Caribbean",
                                 "East Asia and Pacific",
                                 "South Asia",
                                 "Sub-Saharan Africa"
                                 ))
  )

world <- df |>
  filter(region_code == "WLD")

gr2 <- ggplot(regions, aes(y = pop_in_poverty, 
                           x = year, 
                           fill = region_name)) +
  geom_area() +
  scale_y_continuous(
    limits = c(0, 2000),
    breaks = c(0, 500, 1000, 1500, 2000)
  ) +
  scale_fill_tableau(palette = "Tableau 10") +
  labs(
    title = "Number of poor by region",
    y = "Number of poor (million)",
    x = ""
  ) +
  theme_classic() +
  theme(
    legend.position = "bottom"
  ) +
  geom_line(data = world, 
            size = rel(1.5), 
            alpha =.5, 
            linetype = "longdash")

Thanks.