Two Stata files in the working directory:
Tanzania_2012.dtaTanzania_2018.dtaIt computes the three Foster–Greer–Thorbecke (FGT) poverty measures (headcount P0, poverty gap P1, squared poverty gap P2) at:
For two poverty lines: povline and
food_povline contained in the datasets.
It uses household sampling weights (hhweight) and
household size (hhsize) to compute population
(person-level) measures.
fgt_measures <- function(cons, hhweight, hhsize, z){
cons <- as.numeric(cons)
hhweight <- as.numeric(hhweight)
hhsize <- as.numeric(hhsize)
pw <- hhweight * hhsize
N_pop <- sum(pw, na.rm = TRUE)
poor <- pmax(0, z - cons)
P0 <- sum(pw * (cons < z), na.rm = TRUE) / N_pop
P1 <- sum(pw * (poor / z), na.rm = TRUE) / N_pop
P2 <- sum(pw * ((poor / z)^2), na.rm = TRUE) / N_pop
tibble(P0 = P0, P1 = P1, P2 = P2, N_pop = N_pop)
}
compute_grouped_fgt <- function(df, group_var, z_var){
df %>%
group_by(across(all_of(group_var))) %>%
summarise(res = list(fgt_measures(cons = cons, hhweight = hhweight, hhsize = hhsize, z = unique(!!sym(z_var)))),
.groups = "drop") %>%
unnest(res)
}
d12 <- read_dta("Tanzania_2012.dta") %>% clean_names()
d18 <- read_dta("Tanzania_2018.dta") %>% clean_names()
glimpse(d12)
## Rows: 10,186
## Columns: 16
## $ stratum <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ hhweight <dbl> 1898.306, 1898.306, 1898.306, 1898.306, 1898.306, 18…
## $ region <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ hhsize <dbl> 2, 4, 4, 6, 3, 4, 4, 9, 4, 8, 3, 5, 2, 4, 4, 6, 5, 8…
## $ ch14 <dbl> 0, 1, 2, 4, 1, 2, 2, 4, 2, 6, 1, 3, 0, 2, 1, 4, 3, 3…
## $ adult <dbl> 2, 3, 2, 2, 2, 2, 2, 5, 2, 2, 2, 2, 2, 2, 3, 2, 2, 5…
## $ elderly <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0…
## $ sex_hhh <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, …
## $ cons <dbl> 45471.95, 42183.56, 59783.75, 34738.25, 52342.97, 33…
## $ year <dbl> 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012…
## $ educlevel_hhh <dbl+lbl> 4, 2, 3, 3, 1, 2, 2, 3, 3, 3, 2, 3, 1, 1, 1, 3, …
## $ hhid <dbl> 11091011008, 11091011019, 11091011031, 11091011042, …
## $ cluster <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ hhh_main_activity <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ povline <dbl> 36482, 36482, 36482, 36482, 36482, 36482, 36482, 364…
## $ food_povline <dbl> 26085, 26085, 26085, 26085, 26085, 26085, 26085, 260…
glimpse(d18)
## Rows: 9,463
## Columns: 16
## $ stratum <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ hhweight <dbl> 2654.378, 2654.378, 2654.378, 2654.378, 2654.378, 2654.3…
## $ region <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ hhsize <dbl> 7, 1, 3, 8, 7, 1, 6, 2, 4, 1, 2, 1, 6, 5, 2, 5, 4, 5, 2,…
## $ ch14 <dbl> 4, 0, 0, 5, 5, 0, 4, 1, 2, 0, 1, 0, 1, 2, 0, 3, 0, 1, 0,…
## $ adult <dbl> 3, 1, 3, 3, 2, 1, 2, 1, 2, 1, 1, 1, 5, 3, 2, 2, 4, 4, 2,…
## $ elderly <dbl> 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 2, 0, 0, 0, 0,…
## $ sex_hhh <dbl+lbl> 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2…
## $ cons <dbl> 68496.26, 94985.55, 81810.64, 47930.10, 66822.25, 55410.…
## $ hhid <dbl> 1.01021e+13, 1.01021e+13, 1.01021e+13, 1.01021e+13, 1.01…
## $ year <dbl> 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 20…
## $ educlevel_hhh <dbl+lbl> 3, 3, 1, 3, 3, 1, 3, 1, 3, 2, 2, 2, 3, 2, 1, 3, 3, 3…
## $ povline <dbl> 49320, 49320, 49320, 49320, 49320, 49320, 49320, 49320, …
## $ food_povline <dbl> 33748, 33748, 33748, 33748, 33748, 33748, 33748, 33748, …
## $ cluster <dbl> 101021, 101021, 101021, 101021, 101021, 101021, 101021, …
## $ merge <dbl> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
keep_vars <- c("stratum","hhweight","region","hhsize","cons","povline","food_povline","year","hhid","cluster")
d12 <- d12 %>% select(any_of(keep_vars)) %>% mutate(year = 2012)
d18 <- d18 %>% select(any_of(keep_vars)) %>% mutate(year = 2018)
dall <- bind_rows(d12, d18)
compute_national <- function(df, z_var){
z <- df[[z_var]]
z_val <- unique(na.omit(z))[1]
fgt_measures(cons = df$cons, hhweight = df$hhweight, hhsize = df$hhsize, z = z_val) %>%
mutate(z_var = z_var, z = z_val)
}
nat12_pov <- compute_national(d12, "povline") %>% mutate(year = 2012, line = "povline")
nat12_food <- compute_national(d12, "food_povline") %>% mutate(year = 2012, line = "food_povline")
nat18_pov <- compute_national(d18, "povline") %>% mutate(year = 2018, line = "povline")
nat18_food <- compute_national(d18, "food_povline") %>% mutate(year = 2018, line = "food_povline")
national_results <- bind_rows(nat12_pov, nat12_food, nat18_pov, nat18_food)
national_results %>% knitr::kable(digits = 4)
| P0 | P1 | P2 | N_pop | z_var | z | year | line |
|---|---|---|---|---|---|---|---|
| 0.2817 | 0.0670 | 0.0233 | 42270137 | povline | 36482 | 2012 | povline |
| 0.0974 | 0.0185 | 0.0057 | 42270137 | food_povline | 26085 | 2012 | food_povline |
| 0.2639 | 0.0616 | 0.0212 | 52679833 | povline | 49320 | 2018 | povline |
| 0.0801 | 0.0139 | 0.0039 | 52679833 | food_povline | 33748 | 2018 | food_povline |
# by stratum
stratum_results <- dall %>%
group_by(year, stratum) %>%
summarise(P0 = mean(cons < povline), .groups = "drop")
# by region (can extend similar to above)
region_results <- dall %>%
group_by(year, region) %>%
summarise(P0 = mean(cons < povline), .groups = "drop")
stratum_results
region_results