Notes

Two Stata files in the working directory:

It computes the three Foster–Greer–Thorbecke (FGT) poverty measures (headcount P0, poverty gap P1, squared poverty gap P2) at:

For two poverty lines: povline and food_povline contained in the datasets.

It uses household sampling weights (hhweight) and household size (hhsize) to compute population (person-level) measures.


1. Helper functions

fgt_measures <- function(cons, hhweight, hhsize, z){
  cons <- as.numeric(cons)
  hhweight <- as.numeric(hhweight)
  hhsize <- as.numeric(hhsize)
  
  pw <- hhweight * hhsize
  N_pop <- sum(pw, na.rm = TRUE)
  
  poor <- pmax(0, z - cons)
  
  P0 <- sum(pw * (cons < z), na.rm = TRUE) / N_pop
  P1 <- sum(pw * (poor / z), na.rm = TRUE) / N_pop
  P2 <- sum(pw * ((poor / z)^2), na.rm = TRUE) / N_pop
  
  tibble(P0 = P0, P1 = P1, P2 = P2, N_pop = N_pop)
}

compute_grouped_fgt <- function(df, group_var, z_var){
  df %>%
    group_by(across(all_of(group_var))) %>%
    summarise(res = list(fgt_measures(cons = cons, hhweight = hhweight, hhsize = hhsize, z = unique(!!sym(z_var)))),
              .groups = "drop") %>%
    unnest(res)
}

2. Read data

d12 <- read_dta("Tanzania_2012.dta") %>% clean_names()
d18 <- read_dta("Tanzania_2018.dta") %>% clean_names()

glimpse(d12)
## Rows: 10,186
## Columns: 16
## $ stratum           <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ hhweight          <dbl> 1898.306, 1898.306, 1898.306, 1898.306, 1898.306, 18…
## $ region            <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ hhsize            <dbl> 2, 4, 4, 6, 3, 4, 4, 9, 4, 8, 3, 5, 2, 4, 4, 6, 5, 8…
## $ ch14              <dbl> 0, 1, 2, 4, 1, 2, 2, 4, 2, 6, 1, 3, 0, 2, 1, 4, 3, 3…
## $ adult             <dbl> 2, 3, 2, 2, 2, 2, 2, 5, 2, 2, 2, 2, 2, 2, 3, 2, 2, 5…
## $ elderly           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0…
## $ sex_hhh           <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, …
## $ cons              <dbl> 45471.95, 42183.56, 59783.75, 34738.25, 52342.97, 33…
## $ year              <dbl> 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012…
## $ educlevel_hhh     <dbl+lbl> 4, 2, 3, 3, 1, 2, 2, 3, 3, 3, 2, 3, 1, 1, 1, 3, …
## $ hhid              <dbl> 11091011008, 11091011019, 11091011031, 11091011042, …
## $ cluster           <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ hhh_main_activity <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ povline           <dbl> 36482, 36482, 36482, 36482, 36482, 36482, 36482, 364…
## $ food_povline      <dbl> 26085, 26085, 26085, 26085, 26085, 26085, 26085, 260…
glimpse(d18)
## Rows: 9,463
## Columns: 16
## $ stratum       <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ hhweight      <dbl> 2654.378, 2654.378, 2654.378, 2654.378, 2654.378, 2654.3…
## $ region        <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ hhsize        <dbl> 7, 1, 3, 8, 7, 1, 6, 2, 4, 1, 2, 1, 6, 5, 2, 5, 4, 5, 2,…
## $ ch14          <dbl> 4, 0, 0, 5, 5, 0, 4, 1, 2, 0, 1, 0, 1, 2, 0, 3, 0, 1, 0,…
## $ adult         <dbl> 3, 1, 3, 3, 2, 1, 2, 1, 2, 1, 1, 1, 5, 3, 2, 2, 4, 4, 2,…
## $ elderly       <dbl> 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 2, 0, 0, 0, 0,…
## $ sex_hhh       <dbl+lbl> 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2…
## $ cons          <dbl> 68496.26, 94985.55, 81810.64, 47930.10, 66822.25, 55410.…
## $ hhid          <dbl> 1.01021e+13, 1.01021e+13, 1.01021e+13, 1.01021e+13, 1.01…
## $ year          <dbl> 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 20…
## $ educlevel_hhh <dbl+lbl> 3, 3, 1, 3, 3, 1, 3, 1, 3, 2, 2, 2, 3, 2, 1, 3, 3, 3…
## $ povline       <dbl> 49320, 49320, 49320, 49320, 49320, 49320, 49320, 49320, …
## $ food_povline  <dbl> 33748, 33748, 33748, 33748, 33748, 33748, 33748, 33748, …
## $ cluster       <dbl> 101021, 101021, 101021, 101021, 101021, 101021, 101021, …
## $ merge         <dbl> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…

3. Prepare datasets

keep_vars <- c("stratum","hhweight","region","hhsize","cons","povline","food_povline","year","hhid","cluster")

d12 <- d12 %>% select(any_of(keep_vars)) %>% mutate(year = 2012)
d18 <- d18 %>% select(any_of(keep_vars)) %>% mutate(year = 2018)

dall <- bind_rows(d12, d18)

4. National-level FGT

compute_national <- function(df, z_var){
  z <- df[[z_var]]
  z_val <- unique(na.omit(z))[1]
  fgt_measures(cons = df$cons, hhweight = df$hhweight, hhsize = df$hhsize, z = z_val) %>%
    mutate(z_var = z_var, z = z_val)
}

nat12_pov <- compute_national(d12, "povline") %>% mutate(year = 2012, line = "povline")
nat12_food <- compute_national(d12, "food_povline") %>% mutate(year = 2012, line = "food_povline")
nat18_pov <- compute_national(d18, "povline") %>% mutate(year = 2018, line = "povline")
nat18_food <- compute_national(d18, "food_povline") %>% mutate(year = 2018, line = "food_povline")

national_results <- bind_rows(nat12_pov, nat12_food, nat18_pov, nat18_food)
national_results %>% knitr::kable(digits = 4)
P0 P1 P2 N_pop z_var z year line
0.2817 0.0670 0.0233 42270137 povline 36482 2012 povline
0.0974 0.0185 0.0057 42270137 food_povline 26085 2012 food_povline
0.2639 0.0616 0.0212 52679833 povline 49320 2018 povline
0.0801 0.0139 0.0039 52679833 food_povline 33748 2018 food_povline

5. FGT by stratum and region

# by stratum
stratum_results <- dall %>%
  group_by(year, stratum) %>%
  summarise(P0 = mean(cons < povline), .groups = "drop")

# by region (can extend similar to above)
region_results <- dall %>%
  group_by(year, region) %>%
  summarise(P0 = mean(cons < povline), .groups = "drop")


stratum_results
region_results