used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
Ncells 613043 32.8 1386174 74.1 NA 715640 38.3
Vcells 1146983 8.8 8388608 64.0 16384 2010507 15.4
if(!require(tidyverse)) install.packages("tidyverse")
Loading required package: tidyverse
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.2.0 ✔ readr 2.1.6
✔ forcats 1.0.1 ✔ stringr 1.6.0
✔ ggplot2 4.0.1 ✔ tibble 3.3.1
✔ lubridate 1.9.5 ✔ tidyr 1.3.2
✔ purrr 1.2.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
if(!require(lmtest)) install.packages("lmtest")
Loading required package: lmtest
Loading required package: zoo
Attaching package: 'zoo'
The following objects are masked from 'package:base':
as.Date, as.Date.numeric
if(!require(sandwich)) install.packages("sandwich")
Loading required package: sandwich
if(!require(car)) install.packages("car")
Loading required package: car
Loading required package: carData
Attaching package: 'car'
The following object is masked from 'package:dplyr':
recode
The following object is masked from 'package:purrr':
some
if(!require(readxl)) install.packages("readxl")
Loading required package: readxl
if(!require(stargazer)) install.packages("stargazer")
Loading required package: stargazer
Please cite as:
Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
if(!require(ggplot2)) install.packages("ggplot2")
if(!require(patchwork)) install.packages("patchwork")
Loading required package: patchwork
if(!require(dplyr)) install.packages("dplyr")
if(!require(kableExtra)) install.packages("kableExtra")
Loading required package: kableExtra
Attaching package: 'kableExtra'
The following object is masked from 'package:dplyr':
group_rows
library(tidyverse)
library(lmtest)
library(sandwich)
library(car)
library(readxl)
library(stargazer)
library(ggplot2)
library(patchwork)
library(dplyr)
library(kableExtra)
#----------------------------------------------------------------------------------#
# DATA PREPARATION
apis_data <- read_excel("APIS.xlsx", sheet=1)
head(apis_data)
# A tibble: 6 × 275
REGN ID RFACT FSIZE PUFQ1 PUFQ2 PUFQ4 PUFQ4A PUFQ4B PUFQ4C PUFQ4D
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 12 1000100000000 2064. 3 2 3 222222 2 2 2 2
2 12 1000200000000 2064. 11 2 3 222222 2 2 2 2
3 12 1000300000000 2064. 12 2 3 222222 2 2 2 2
4 12 1000400000000 2064. 8 2 3 111111 1 1 1 1
5 12 1000500000000 2064. 3 2 3 111111 1 1 1 1
6 12 1000600000000 2064. 5 2 3 111111 1 1 1 1
# ℹ 264 more variables: PUFQ4E <dbl>, PUFQ4F <dbl>, PUFQ3 <dbl>, PUFQ3A <dbl>,
# PUFQ3B <dbl>, PUFQ3C <dbl>, PUFQ3D <dbl>, PUFEQ1 <dbl>, PUFEQ2 <dbl>,
# PUFEQ3 <dbl>, PUFEQ4A <dbl>, PUFEQ4B <dbl>, PUFEQ5 <dbl>, PUFEQ6A <dbl>,
# PUFEQ6B <dbl>, PUFEQ6C <dbl>, PUFEQ6D <dbl>, PUFEQ6E <dbl>, PUFEQ6F <dbl>,
# PUFEQ6G <dbl>, PUFEQ6H <dbl>, PUFEQ6I <dbl>, PUFEQ6J <dbl>, PUFEQ6K <dbl>,
# PUFEQ6L <dbl>, PUFEQ6M <dbl>, PUFEQ6N <dbl>, PUFEQ6O <dbl>, PUFFQ1 <dbl>,
# PUFFQ1A <dbl>, PUFFQ2 <dbl>, PUFFQ2A <dbl>, PUFFQ2B <dbl>, PUFGQ1 <dbl>, …
# checking for missing values:
# dependent variable
apis_data %>% count(is.na(FOOD))
# A tibble: 1 × 2
`is.na(FOOD)` n
<lgl> <int>
1 FALSE 10332
apis_data %>% count(is.na(MEAT))
# A tibble: 1 × 2
`is.na(MEAT)` n
<lgl> <int>
1 FALSE 10332
apis_data %>% count(is.na(FISH))
# A tibble: 1 × 2
`is.na(FISH)` n
<lgl> <int>
1 FALSE 10332
# independent variables
apis_data %>% count(is.na(PUFFQ1))
# A tibble: 1 × 2
`is.na(PUFFQ1)` n
<lgl> <int>
1 FALSE 10332
apis_data %>% count(is.na(TOINC))
# A tibble: 1 × 2
`is.na(TOINC)` n
<lgl> <int>
1 FALSE 10332
# controls: socioeconomic characteristics
apis_data %>% count(is.na(EDUCATION))
# A tibble: 1 × 2
`is.na(EDUCATION)` n
<lgl> <int>
1 FALSE 10332
apis_data %>% count(is.na(PUFH05_AGE))
# A tibble: 1 × 2
`is.na(PUFH05_AGE)` n
<lgl> <int>
1 FALSE 10332
apis_data %>% count(is.na(FSIZE))
# A tibble: 1 × 2
`is.na(FSIZE)` n
<lgl> <int>
1 FALSE 10332
apis_data %>% count(is.na(PUFCHLD_6_11))
# A tibble: 1 × 2
`is.na(PUFCHLD_6_11)` n
<lgl> <int>
1 FALSE 10332
# controls: time poverty and gendered labor
apis_data %>% count(is.na(PUFWRK_HEAD))
# A tibble: 1 × 2
`is.na(PUFWRK_HEAD)` n
<lgl> <int>
1 FALSE 10332
apis_data %>% count(is.na(PUFH04_SEX))
# A tibble: 1 × 2
`is.na(PUFH04_SEX)` n
<lgl> <int>
1 FALSE 10332
# controls: household infrastructure
# a. food storage
apis_data %>% count(is.na(PUFEQ6G))
# A tibble: 1 × 2
`is.na(PUFEQ6G)` n
<lgl> <int>
1 FALSE 10332
apis_data %>% count(is.na(PUFEQ5))
# A tibble: 1 × 2
`is.na(PUFEQ5)` n
<lgl> <int>
1 FALSE 10332
# b. housing quality
apis_data %>% count(is.na(PUFEQ2))
# A tibble: 1 × 2
`is.na(PUFEQ2)` n
<lgl> <int>
1 FALSE 10332
apis_data %>% count(is.na(PUFEQ3))
# A tibble: 1 × 2
`is.na(PUFEQ3)` n
<lgl> <int>
1 FALSE 10332
apis_data %>% count(is.na(PUFEQ4B))
# A tibble: 1 × 2
`is.na(PUFEQ4B)` n
<lgl> <int>
1 FALSE 10332
# c. sanitation infrastructure
apis_data %>% count(is.na(PUFFQ2))
# A tibble: 1 × 2
`is.na(PUFFQ2)` n
<lgl> <int>
1 FALSE 10332
# d. asset-based urban proxies
apis_data %>% count(is.na(PUFEQ6D))
# A tibble: 1 × 2
`is.na(PUFEQ6D)` n
<lgl> <int>
1 FALSE 10332
apis_data %>% count(is.na(PUFEQ6J))
# A tibble: 1 × 2
`is.na(PUFEQ6J)` n
<lgl> <int>
1 FALSE 10332
apis_data %>% count(is.na(PUFEQ6H))
# A tibble: 1 × 2
`is.na(PUFEQ6H)` n
<lgl> <int>
1 FALSE 10332
# controls: market access
apis_data %>% count(is.na(TRANSPORT))
# A tibble: 1 × 2
`is.na(TRANSPORT)` n
<lgl> <int>
1 FALSE 10332
apis_data %>% count(is.na(FOOD_OUTSIDE))
# A tibble: 1 × 2
`is.na(FOOD_OUTSIDE)` n
<lgl> <int>
1 FALSE 10332
# fixed effects
apis_data %>% count(is.na(REGN))
# A tibble: 1 × 2
`is.na(REGN)` n
<lgl> <int>
1 FALSE 10332
apis_data %>% count(is.na(PUFEQ1))
# A tibble: 1 × 2
`is.na(PUFEQ1)` n
<lgl> <int>
1 FALSE 10332
# other variables
apis_data %>% count(is.na(BREAD))
# A tibble: 1 × 2
`is.na(BREAD)` n
<lgl> <int>
1 FALSE 10332
#----------------------------------------------------------------------------------#
# DATA CLEANING
# categorizing the highest grade completed (HGC) variable
# we convert HGC into a ratio-level data: the number of years completed in school
map_educ_full <- function(hgc) {
h <- as.numeric(hgc)
# no schooling / preschool / SPED
if (h == 0 || h == 1 || h == 2 || h == 10 || (h >= 191 && h <= 192)) return(0)
# elementary (old curriculum and k-12)
if (h >= 110 && h <= 170) return((h - 100) / 10)
if (h >= 410 && h <= 460) return((h - 400) / 10)
# secondary / high school:
# old curriculum HS (grade 7)
if (h == 180) return(7)
# old curriculum HS (1st year-4th year)
if (h >= 210 && h <= 250) return(6 + (h - 200) / 10)
# K-12 HS
if (h >= 470 && h <= 520) return((h - 400) / 10)
# post-secondary / vocational
if ((h >= 310 && h <= 320) || (h >= 601 && h <= 689)) return(14)
# college undergrad
if (h >= 710 && h <= 760) return(12 + (h - 700) / 10)
# college graduate
if (h >= 801 && h <= 889) return(16)
# graduate school - masters
if (h >= 910 && h <= 920) return(18)
# graduate school - doctorate
if (h >= 930 && h <= 940) return(20)
}
v_map_educ <- Vectorize(map_educ_full)
#----------------------------------------------------------------------------------#
# creating the needed variables
# we filter only for protein-consuming households
apis_final <- apis_data %>%
filter(FOOD>0 & MEAT>0 & FISH>0) %>%
mutate(
# mean-center transformations for the interactions
log_income_c = log(TOINC) - mean(log(TOINC)),
family_size_c = FSIZE - mean(FSIZE),
# dependent variable
protein_share = (MEAT+FISH)/FOOD,
# main regressors
water_source = ifelse(PUFFQ1==1,1,0),
water_income_interac = water_source * log_income_c,
water_family_interac = water_source * family_size_c,
log_income = log(TOINC),
family_size = FSIZE,
# vector of controls (socioeconomic characteristics)
educ_head = v_map_educ(PUFH12_HGC),
age_head = PUFH05_AGE,
has_kids = ifelse(PUFCHLD_6_11 == 1,1,0),
# vector of controls (time constraints and gender roles)
head_works = ifelse(PUFWRK_HEAD == 1, 1, 0),
female_head = ifelse(PUFH04_SEX == 2,1,0),
work_female_interac = ifelse(head_works == 1 & female_head == 1, 1, 0),
# vector of controls (household infrastructure characteristics)
# a. food storage
has_fridge = ifelse(PUFEQ6G > 0, 1, 0),
has_electricity = ifelse(PUFEQ5 == 1, 1, 0),
# b. housing quality
roof = ifelse(PUFEQ2 == 1 | PUFEQ2 == 4, 1, 0),
wall = ifelse(PUFEQ3 == 1 | PUFEQ3 == 4, 1, 0),
floor = PUFEQ4B,
# c. sanitation infrastructure
toilet = ifelse(PUFFQ2 == 1, 1, 0),
# d. asset-based urban proxies
aircon = ifelse(PUFEQ6D > 0, 1, 0),
telephone = ifelse(PUFEQ6J > 0, 1, 0),
computer = ifelse(PUFEQ6H > 0, 1, 0),
# vector of controls (market access)
transportation = TRANSPORT,
food_outside = ifelse(FOOD_OUTSIDE > median(FOOD_OUTSIDE, na.rm=T), 1, 0),
# fixed effects
region = as.factor(REGN),
building = as.factor(PUFEQ1),
# falsification variable
bread_share = BREAD/FOOD,
)
# checking the final number of observations
str(apis_final$water_source)
num [1:9994] 0 0 1 0 0 0 0 1 1 1 ...
# MAIN ANALYSIS
# ols model (protein share)
bennett_ols_protein <- lm(protein_share ~
water_source +
water_income_interac +
water_family_interac +
log_income_c +
family_size_c +
# control vec. 1
educ_head +
age_head +
I(age_head^2) +
has_kids +
# control vec. 2
head_works +
female_head +
work_female_interac +
# control vec. 3
has_fridge +
has_electricity +
roof +
wall +
floor +
toilet +
aircon +
telephone +
computer +
# control vec. 4
transportation +
food_outside +
# fixed effects
region +
building,
data = apis_final
)
summary(bennett_ols_protein)
Call:
lm(formula = protein_share ~ water_source + water_income_interac +
water_family_interac + log_income_c + family_size_c + educ_head +
age_head + I(age_head^2) + has_kids + head_works + female_head +
work_female_interac + has_fridge + has_electricity + roof +
wall + floor + toilet + aircon + telephone + computer + transportation +
food_outside + region + building, data = apis_final)
Residuals:
Min 1Q Median 3Q Max
-0.28461 -0.06364 -0.00649 0.05693 0.50263
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.357e-01 1.158e-02 20.356 < 2e-16 ***
water_source 8.891e-03 2.314e-03 3.842 0.000123 ***
water_income_interac -1.185e-02 3.059e-03 -3.874 0.000108 ***
water_family_interac 8.295e-03 9.127e-04 9.088 < 2e-16 ***
log_income_c 2.847e-02 2.487e-03 11.446 < 2e-16 ***
family_size_c -8.118e-03 6.851e-04 -11.850 < 2e-16 ***
educ_head 2.564e-04 2.686e-04 0.955 0.339718
age_head 1.840e-03 3.804e-04 4.837 1.34e-06 ***
I(age_head^2) -1.690e-05 3.809e-06 -4.438 9.19e-06 ***
has_kids 3.740e-03 2.120e-03 1.765 0.077667 .
head_works 7.276e-03 3.623e-03 2.008 0.044640 *
female_head 5.399e-04 4.492e-03 0.120 0.904327
work_female_interac -1.524e-02 5.335e-03 -2.856 0.004300 **
has_fridge 1.328e-02 2.539e-03 5.230 1.73e-07 ***
has_electricity -8.626e-03 3.620e-03 -2.383 0.017195 *
roof 2.763e-03 3.178e-03 0.869 0.384690
wall 1.288e-03 2.727e-03 0.472 0.636773
floor 9.111e-05 2.518e-05 3.618 0.000299 ***
toilet 7.996e-03 2.960e-03 2.702 0.006911 **
aircon -5.669e-03 3.890e-03 -1.457 0.145109
telephone -3.685e-03 4.280e-03 -0.861 0.389214
computer 2.033e-03 2.905e-03 0.700 0.484133
transportation -3.259e-07 1.036e-07 -3.147 0.001656 **
food_outside -5.112e-02 2.303e-03 -22.196 < 2e-16 ***
region2 7.411e-04 6.455e-03 0.115 0.908598
region3 9.082e-03 5.705e-03 1.592 0.111468
region4 -1.565e-02 6.255e-03 -2.501 0.012393 *
region5 -1.973e-02 6.196e-03 -3.185 0.001452 **
region6 -2.125e-02 5.884e-03 -3.612 0.000305 ***
region7 -2.415e-02 6.175e-03 -3.912 9.23e-05 ***
region8 -2.882e-02 5.993e-03 -4.810 1.53e-06 ***
region9 -4.084e-02 6.717e-03 -6.081 1.24e-09 ***
region10 -6.027e-02 6.037e-03 -9.984 < 2e-16 ***
region11 1.220e-02 6.204e-03 1.967 0.049226 *
region12 -4.685e-02 6.487e-03 -7.223 5.48e-13 ***
region13 -5.046e-02 5.587e-03 -9.031 < 2e-16 ***
region14 -5.015e-02 6.044e-03 -8.298 < 2e-16 ***
region15 -5.497e-02 6.634e-03 -8.286 < 2e-16 ***
region16 -3.241e-02 6.270e-03 -5.169 2.40e-07 ***
region17 -2.224e-02 6.191e-03 -3.593 0.000329 ***
building2 -5.064e-03 6.148e-03 -0.824 0.410106
building3 -9.761e-03 4.564e-03 -2.139 0.032482 *
building4 8.837e-05 1.632e-02 0.005 0.995679
building5 -2.005e-02 6.396e-02 -0.314 0.753892
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.09016 on 9950 degrees of freedom
Multiple R-squared: 0.1404, Adjusted R-squared: 0.1367
F-statistic: 37.79 on 43 and 9950 DF, p-value: < 2.2e-16
print(coeftest(bennett_ols_protein,
vcov = vcovCL(bennett_ols_protein, cluster = ~region)))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.3575e-01 1.2206e-02 19.3147 < 2.2e-16 ***
water_source 8.8914e-03 3.2795e-03 2.7112 0.0067152 **
water_income_interac -1.1850e-02 6.5465e-03 -1.8101 0.0703041 .
water_family_interac 8.2948e-03 1.8867e-03 4.3965 1.111e-05 ***
log_income_c 2.8467e-02 4.3136e-03 6.5994 4.339e-11 ***
family_size_c -8.1179e-03 9.2493e-04 -8.7768 < 2.2e-16 ***
educ_head 2.5645e-04 4.1849e-04 0.6128 0.5400295
age_head 1.8399e-03 4.4920e-04 4.0959 4.238e-05 ***
I(age_head^2) -1.6903e-05 4.1802e-06 -4.0435 5.306e-05 ***
has_kids 3.7402e-03 2.2181e-03 1.6862 0.0917850 .
head_works 7.2758e-03 4.6171e-03 1.5759 0.1150911
female_head 5.3991e-04 4.4117e-03 0.1224 0.9025989
work_female_interac -1.5237e-02 6.1205e-03 -2.4895 0.0128082 *
has_fridge 1.3279e-02 2.1215e-03 6.2595 4.022e-10 ***
has_electricity -8.6256e-03 5.8751e-03 -1.4681 0.1420965
roof 2.7626e-03 4.7025e-03 0.5875 0.5568942
wall 1.2876e-03 4.0648e-03 0.3168 0.7514225
floor 9.1109e-05 4.8823e-05 1.8661 0.0620542 .
toilet 7.9957e-03 6.7984e-03 1.1761 0.2395771
aircon -5.6687e-03 5.5658e-03 -1.0185 0.3084741
telephone -3.6854e-03 3.1953e-03 -1.1534 0.2487815
computer 2.0328e-03 3.4419e-03 0.5906 0.5547881
transportation -3.2593e-07 8.8200e-08 -3.6953 0.0002208 ***
food_outside -5.1121e-02 3.7971e-03 -13.4633 < 2.2e-16 ***
region2 7.4107e-04 1.4721e-03 0.5034 0.6146894
region3 9.0818e-03 1.9468e-03 4.6651 3.125e-06 ***
region4 -1.5646e-02 2.4244e-03 -6.4537 1.143e-10 ***
region5 -1.9735e-02 2.4769e-03 -7.9677 1.794e-15 ***
region6 -2.1255e-02 2.0265e-03 -10.4886 < 2.2e-16 ***
region7 -2.4154e-02 2.6786e-03 -9.0173 < 2.2e-16 ***
region8 -2.8822e-02 2.2326e-03 -12.9099 < 2.2e-16 ***
region9 -4.0843e-02 2.6208e-03 -15.5838 < 2.2e-16 ***
region10 -6.0266e-02 2.2135e-03 -27.2268 < 2.2e-16 ***
region11 1.2203e-02 3.2525e-03 3.7518 0.0001765 ***
region12 -4.6850e-02 2.8371e-03 -16.5135 < 2.2e-16 ***
region13 -5.0458e-02 4.6498e-03 -10.8517 < 2.2e-16 ***
region14 -5.0152e-02 1.9416e-03 -25.8300 < 2.2e-16 ***
region15 -5.4972e-02 5.8262e-03 -9.4354 < 2.2e-16 ***
region16 -3.2410e-02 2.4215e-03 -13.3843 < 2.2e-16 ***
region17 -2.2244e-02 2.6988e-03 -8.2422 < 2.2e-16 ***
building2 -5.0645e-03 5.7400e-03 -0.8823 0.3776263
building3 -9.7613e-03 3.0800e-03 -3.1693 0.0015326 **
building4 8.8367e-05 2.3391e-02 0.0038 0.9969859
building5 -2.0052e-02 1.5769e-01 -0.1272 0.8988117
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# assumption diagnostics
vif(bennett_ols_protein)
GVIF Df GVIF^(1/(2*Df))
water_source 1.626700 1 1.275422
water_income_interac 3.221646 1 1.794895
water_family_interac 2.006816 1 1.416621
log_income_c 4.511720 1 2.124081
family_size_c 2.588251 1 1.608804
educ_head 1.694844 1 1.301862
age_head 38.545977 1 6.208541
I(age_head^2) 40.176003 1 6.338454
has_kids 1.345954 1 1.160153
head_works 2.204835 1 1.484869
female_head 3.972155 1 1.993027
work_female_interac 3.726701 1 1.930467
has_fridge 1.779105 1 1.333831
has_electricity 1.347734 1 1.160919
roof 1.621014 1 1.273191
wall 1.718226 1 1.310811
floor 1.383571 1 1.176253
toilet 1.547047 1 1.243804
aircon 1.648638 1 1.283993
telephone 1.345900 1 1.160129
computer 1.770477 1 1.330593
transportation 1.427557 1 1.194804
food_outside 1.630232 1 1.276805
region 2.743580 16 1.032042
building 1.209854 4 1.024098
bgtest(bennett_ols_protein)
Breusch-Godfrey test for serial correlation of order up to 1
data: bennett_ols_protein
LM test = 707.67, df = 1, p-value < 2.2e-16
bptest(bennett_ols_protein)
studentized Breusch-Pagan test
data: bennett_ols_protein
BP = 292.63, df = 43, p-value < 2.2e-16
resettest(bennett_ols_protein,
power = 2:3,
type = "fitted",
vcov = function(x) vcovCL(x, cluster = ~apis_final$region))
RESET test
data: bennett_ols_protein
RESET = 0.45302, df1 = 2, df2 = 9950, p-value = 0.6357
# we initially used JB test for normality of residuals
# but this failed due to the sensitivity of large samples,
# wherein even minor, economically insignificant deviations from normality
# lead to a rejection of the null.
tseries::jarque.bera.test(residuals(bennett_ols_protein))
Registered S3 method overwritten by 'quantmod':
method from
as.zoo.data.frame zoo
Jarque Bera Test
data: residuals(bennett_ols_protein)
X-squared = 369.64, df = 2, p-value < 2.2e-16
# we turn to visual diagnostics
hist(residuals(bennett_ols_protein), main="Histogram of Residuals", breaks=50)
#----------------------------------------------------------------------------------#
# ROBUSTNESS TABLE
# 1. define the 5 robustness models (Same as before)
m1 <- lm(protein_share ~ water_source + region + building, data = apis_final)
m2 <- lm(protein_share ~ water_source + water_income_interac + water_family_interac +
log_income_c + family_size_c +
educ_head + age_head + I(age_head^2) + has_kids +
region + building, data = apis_final)
m3 <- lm(protein_share ~ water_source + log_income_c + family_size_c +
water_income_interac + water_family_interac +
educ_head + age_head + I(age_head^2) + has_kids +
head_works + female_head + work_female_interac +
region + building, data = apis_final)
m4 <- lm(protein_share ~ water_source + log_income_c + family_size_c +
water_income_interac + water_family_interac +
educ_head + age_head + I(age_head^2) + has_kids +
head_works + female_head + work_female_interac +
transportation + food_outside +
has_fridge + has_electricity + roof + wall + floor + toilet +
region + building, data = apis_final)
m5 <- lm(protein_share ~ water_source + log_income_c + family_size_c +
water_income_interac + water_family_interac +
educ_head + age_head + I(age_head^2) + has_kids +
head_works + female_head + work_female_interac +
transportation + food_outside +
has_fridge + has_electricity + roof + wall + floor + toilet +
aircon + telephone + computer +
region + building, data = apis_final)
# 2. extract clustered standard errors
se_1 <- coeftest(m1, vcov = vcovCL(m1, cluster = ~region))[, 2]
se_2 <- coeftest(m2, vcov = vcovCL(m2, cluster = ~region))[, 2]
se_3 <- coeftest(m3, vcov = vcovCL(m3, cluster = ~region))[, 2]
se_4 <- coeftest(m4, vcov = vcovCL(m4, cluster = ~region))[, 2]
se_5 <- coeftest(m5, vcov = vcovCL(m5, cluster = ~region))[, 2]
# 3. create the final table for robustness
stargazer(m1, m2, m3, m4, m5,
type = "text",
se = list(se_1, se_2, se_3, se_4, se_5),
intercept.bottom = FALSE,
digits = 4,
no.space = TRUE,
order = c("Constant", "water_source", "water_income_interac",
"water_family_interac", "log_income_c", "family_size_c"),
omit = c("region", "building"),
column.labels = c("Bivariate", "Socioecon", "Time Poverty", "Infra", "Full"),
add.lines = list(
c("Region Fixed Effects", "Yes", "Yes", "Yes", "Yes", "Yes"),
c("Building Fixed Effects", "Yes", "Yes", "Yes", "Yes", "Yes")
),
dep.var.caption = "Dependent variable: Protein share (protein-consuming households)",
dep.var.labels.include = FALSE,
model.numbers = TRUE,
star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
star.char = c("+", "*", "**", "***"),
notes = c("Clustered standard errors (at Region level) reported in parentheses.",
"Significance: + p<0.1; * p<0.05; ** p<0.01; *** p<0.001",
"(1): Bivariate",
"(2): + Main interactions and socioeconomic indicators",
"(3): + Time poverty indicators",
"(4): + Household infrastructure and market access",
"(5): + Urban indicator proxies (full model)"),
notes.append = FALSE
)
=============================================================================================================================================================
Dependent variable: Protein share (protein-consuming households)
--------------------------------------------------------------------------------------------------------------------------------------
Bivariate Socioecon Time Poverty Infra Full
(1) (2) (3) (4) (5)
-------------------------------------------------------------------------------------------------------------------------------------------------------------
Constant 0.2697*** 0.2179*** 0.2119*** 0.2358*** 0.2357***
(0.0010) (0.0119) (0.0111) (0.0120) (0.0122)
water_source 0.0186*** 0.0082* 0.0089* 0.0088** 0.0089**
(0.0037) (0.0038) (0.0037) (0.0033) (0.0033)
water_income_interac -0.0102 -0.0099 -0.0131+ -0.0119+
(0.0064) (0.0065) (0.0073) (0.0065)
water_family_interac 0.0081*** 0.0079*** 0.0084*** 0.0083***
(0.0020) (0.0020) (0.0020) (0.0019)
log_income_c 0.0204*** 0.0202*** 0.0285*** 0.0285***
(0.0037) (0.0037) (0.0040) (0.0043)
family_size_c -0.0089*** -0.0092*** -0.0081*** -0.0081***
(0.0010) (0.0010) (0.0009) (0.0009)
educ_head 0.0002 0.0003 0.0002 0.0003
(0.0004) (0.0004) (0.0004) (0.0004)
age_head 0.0019*** 0.0019*** 0.0019*** 0.0018***
(0.0004) (0.0005) (0.0004) (0.0004)
I(age_head2) -0.00002*** -0.00002*** -0.00002*** -0.00002***
(0.000003) (0.000004) (0.000004) (0.000004)
has_kids 0.0026 0.0030 0.0036+ 0.0037+
(0.0023) (0.0023) (0.0022) (0.0022)
head_works 0.0060 0.0074 0.0073
(0.0049) (0.0046) (0.0046)
female_head -0.0012 0.0004 0.0005
(0.0044) (0.0044) (0.0044)
work_female_interac -0.0148* -0.0151* -0.0152*
(0.0065) (0.0062) (0.0061)
transportation -0.000000*** -0.000000***
(0.000000) (0.000000)
food_outside -0.0509*** -0.0511***
(0.0039) (0.0038)
has_fridge 0.0131*** 0.0133***
(0.0025) (0.0021)
has_electricity -0.0085 -0.0086
(0.0058) (0.0059)
roof 0.0028 0.0028
(0.0047) (0.0047)
wall 0.0013 0.0013
(0.0041) (0.0041)
floor 0.0001+ 0.0001+
(0.00005) (0.00005)
toilet 0.0080 0.0080
(0.0068) (0.0068)
aircon -0.0057
(0.0056)
telephone -0.0037
(0.0032)
computer 0.0020
(0.0034)
-------------------------------------------------------------------------------------------------------------------------------------------------------------
Region Fixed Effects Yes Yes Yes Yes Yes
Building Fixed Effects Yes Yes Yes Yes Yes
Observations 9,994 9,994 9,994 9,994 9,994
R2 0.0636 0.0888 0.0915 0.1401 0.1404
Adjusted R2 0.0616 0.0861 0.0886 0.1366 0.1367
Residual Std. Error 0.0940 (df = 9972) 0.0928 (df = 9964) 0.0926 (df = 9961) 0.0902 (df = 9953) 0.0902 (df = 9950)
F Statistic 32.2460*** (df = 21; 9972) 33.4644*** (df = 29; 9964) 31.3577*** (df = 32; 9961) 40.5330*** (df = 40; 9953) 37.7897*** (df = 43; 9950)
=============================================================================================================================================================
Note: Clustered standard errors (at Region level) reported in parentheses.
Significance: + p<0.1; * p<0.05; ** p<0.01; *** p<0.001
(1): Bivariate
(2): + Main interactions and socioeconomic indicators
(3): + Time poverty indicators
(4): + Household infrastructure and market access
(5): + Urban indicator proxies (full model)
#----------------------------------------------------------------------------------#
# formal oster test: comparing m4 vs m5
# 1. extract stats for the transition from m3 -> m4
beta_3 <- coef(m3)["water_source"]
beta_4 <- coef(m4)["water_source"]
r2_3 <- summary(m3)$r.squared
r2_4 <- summary(m4)$r.squared
# 2. extract stats for the transition from m4 -> m5
beta_5 <- coef(m5)["water_source"]
r2_5 <- summary(m5)$r.squared
# 3. calculate the "oster efficiency" (movement in beta relative to r^2 gain)
# a high ratio in m4 means the controls were informative.
# a tiny ratio in m5 means the controls are somewhat redundant.
gain_m4 <- (r2_4 - r2_3)
gain_m5 <- (r2_5 - r2_4)
shift_m4 <- abs(beta_4 - beta_3)
shift_m5 <- abs(beta_5 - beta_4)
cat("--- oster informational gain comparison ---\n")
--- oster informational gain comparison ---
cat("m3 to m4: r2 gain =", round(gain_m4, 5),
"| beta shift =", round(shift_m4, 5), "\n")
m3 to m4: r2 gain = 0.04856 | beta shift = 4e-05
cat("m4 to m5: r2 gain =", round(gain_m5, 5),
"| beta shift =", round(shift_m5, 5), "\n\n")
m4 to m5: r2 gain = 0.00031 | beta shift = 8e-05
# 4. the formal delta check for m4
r2_max <- 1.3 * r2_4
delta_m4 <- (beta_4 * (r2_4 - r2_3)) / ((beta_3 - beta_4) * (r2_max - r2_4))
cat("formal oster delta for model 4:", round(delta_m4, 4), "\n")
formal oster delta for model 4: 263.5139
cat("interpretation: because delta is already << -1 (or >> 1), \n")
interpretation: because delta is already << -1 (or >> 1),
cat("selection on unobservables is already statistically accounted for.\n")
selection on unobservables is already statistically accounted for.
# we use m4 as the final model
# ols model (bread share)
bennett_ols_bread <- lm(bread_share ~
water_source +
water_income_interac +
water_family_interac +
log_income_c +
family_size_c +
# control vec. 1
educ_head +
age_head +
I(age_head^2) +
has_kids +
# control vec. 2
head_works +
female_head +
work_female_interac +
# control vec. 3
has_fridge +
has_electricity +
roof +
wall +
floor +
toilet +
# control vec. 4
transportation +
food_outside +
# fixed effects
region +
building,
data = apis_final
)
summary(bennett_ols_bread)
Call:
lm(formula = bread_share ~ water_source + water_income_interac +
water_family_interac + log_income_c + family_size_c + educ_head +
age_head + I(age_head^2) + has_kids + head_works + female_head +
work_female_interac + has_fridge + has_electricity + roof +
wall + floor + toilet + transportation + food_outside + region +
building, data = apis_final)
Residuals:
Min 1Q Median 3Q Max
-0.49076 -0.05828 -0.00439 0.05566 0.41737
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.822e-01 1.158e-02 24.369 < 2e-16 ***
water_source -9.813e-03 2.312e-03 -4.244 2.22e-05 ***
water_income_interac 1.044e-03 2.938e-03 0.355 0.722332
water_family_interac -5.625e-03 9.096e-04 -6.184 6.51e-10 ***
log_income_c -5.109e-02 2.465e-03 -20.725 < 2e-16 ***
family_size_c 2.059e-02 6.848e-04 30.069 < 2e-16 ***
educ_head -5.451e-04 2.667e-04 -2.044 0.040973 *
age_head 2.391e-03 3.804e-04 6.286 3.40e-10 ***
I(age_head^2) -1.826e-05 3.808e-06 -4.794 1.66e-06 ***
has_kids 3.260e-03 2.119e-03 1.538 0.123976
head_works 2.896e-03 3.618e-03 0.801 0.423425
female_head -4.986e-03 4.492e-03 -1.110 0.266985
work_female_interac 2.339e-03 5.336e-03 0.438 0.661164
has_fridge -1.058e-02 2.476e-03 -4.273 1.95e-05 ***
has_electricity 9.212e-03 3.618e-03 2.546 0.010914 *
roof 2.250e-03 3.178e-03 0.708 0.478931
wall -7.449e-03 2.727e-03 -2.732 0.006314 **
floor -5.381e-05 2.491e-05 -2.160 0.030811 *
toilet -2.366e-02 2.960e-03 -7.994 1.45e-15 ***
transportation -4.522e-07 1.033e-07 -4.378 1.21e-05 ***
food_outside -6.004e-02 2.300e-03 -26.105 < 2e-16 ***
region2 4.699e-03 6.454e-03 0.728 0.466558
region3 -1.069e-02 5.702e-03 -1.874 0.060951 .
region4 3.881e-03 6.253e-03 0.621 0.534801
region5 1.644e-02 6.197e-03 2.652 0.008003 **
region6 2.529e-02 5.881e-03 4.301 1.72e-05 ***
region7 3.822e-02 6.171e-03 6.194 6.10e-10 ***
region8 5.714e-02 5.992e-03 9.535 < 2e-16 ***
region9 6.801e-02 6.718e-03 10.124 < 2e-16 ***
region10 8.751e-02 6.036e-03 14.498 < 2e-16 ***
region11 3.013e-02 6.203e-03 4.857 1.21e-06 ***
region12 6.341e-02 6.487e-03 9.775 < 2e-16 ***
region13 -2.156e-02 5.569e-03 -3.871 0.000109 ***
region14 6.263e-02 6.041e-03 10.368 < 2e-16 ***
region15 3.207e-02 6.634e-03 4.834 1.36e-06 ***
region16 6.525e-02 6.264e-03 10.418 < 2e-16 ***
region17 5.407e-02 6.193e-03 8.731 < 2e-16 ***
building2 -4.139e-03 6.147e-03 -0.673 0.500772
building3 -9.932e-03 4.563e-03 -2.177 0.029521 *
building4 -4.412e-03 1.632e-02 -0.270 0.786924
building5 1.429e-02 6.397e-02 0.223 0.823187
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.09019 on 9953 degrees of freedom
Multiple R-squared: 0.5291, Adjusted R-squared: 0.5272
F-statistic: 279.6 on 40 and 9953 DF, p-value: < 2.2e-16
print(coeftest(bennett_ols_bread,
vcov = vcovCL(bennett_ols_bread, cluster = ~region)))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.8216e-01 1.4268e-02 19.7762 < 2.2e-16 ***
water_source -9.8135e-03 3.2318e-03 -3.0365 0.0023993 **
water_income_interac 1.0440e-03 5.3254e-03 0.1960 0.8445778
water_family_interac -5.6247e-03 2.2533e-03 -2.4962 0.0125686 *
log_income_c -5.1086e-02 3.2948e-03 -15.5050 < 2.2e-16 ***
family_size_c 2.0590e-02 1.0817e-03 19.0347 < 2.2e-16 ***
educ_head -5.4508e-04 4.6742e-04 -1.1661 0.2435906
age_head 2.3909e-03 5.4225e-04 4.4092 1.048e-05 ***
I(age_head^2) -1.8256e-05 5.1373e-06 -3.5535 0.0003818 ***
has_kids 3.2604e-03 2.2367e-03 1.4577 0.1449621
head_works 2.8965e-03 4.9977e-03 0.5796 0.5622215
female_head -4.9863e-03 5.2253e-03 -0.9543 0.3399722
work_female_interac 2.3390e-03 7.3363e-03 0.3188 0.7498697
has_fridge -1.0579e-02 2.9678e-03 -3.5647 0.0003660 ***
has_electricity 9.2122e-03 8.8057e-03 1.0462 0.2955129
roof 2.2503e-03 5.0809e-03 0.4429 0.6578480
wall -7.4492e-03 4.3712e-03 -1.7042 0.0883796 .
floor -5.3808e-05 4.2536e-05 -1.2650 0.2059062
toilet -2.3658e-02 7.1976e-03 -3.2869 0.0010165 **
transportation -4.5215e-07 1.2429e-07 -3.6380 0.0002762 ***
food_outside -6.0040e-02 3.7013e-03 -16.2214 < 2.2e-16 ***
region2 4.6994e-03 1.4268e-03 3.2936 0.0009927 ***
region3 -1.0686e-02 1.1549e-03 -9.2525 < 2.2e-16 ***
region4 3.8812e-03 1.6991e-03 2.2842 0.0223799 *
region5 1.6438e-02 2.5158e-03 6.5339 6.721e-11 ***
region6 2.5294e-02 1.7635e-03 14.3433 < 2.2e-16 ***
region7 3.8221e-02 1.6418e-03 23.2802 < 2.2e-16 ***
region8 5.7136e-02 2.0991e-03 27.2197 < 2.2e-16 ***
region9 6.8009e-02 2.6757e-03 25.4173 < 2.2e-16 ***
region10 8.7514e-02 2.0537e-03 42.6121 < 2.2e-16 ***
region11 3.0126e-02 2.6721e-03 11.2744 < 2.2e-16 ***
region12 6.3410e-02 2.1592e-03 29.3675 < 2.2e-16 ***
region13 -2.1555e-02 2.7686e-03 -7.7856 7.627e-15 ***
region14 6.2632e-02 1.9128e-03 32.7427 < 2.2e-16 ***
region15 3.2071e-02 5.8804e-03 5.4539 5.046e-08 ***
region16 6.5253e-02 2.1996e-03 29.6660 < 2.2e-16 ***
region17 5.4067e-02 2.8559e-03 18.9320 < 2.2e-16 ***
building2 -4.1390e-03 5.7112e-03 -0.7247 0.4686383
building3 -9.9325e-03 4.9848e-03 -1.9926 0.0463358 *
building4 -4.4115e-03 1.0931e-02 -0.4036 0.6865180
building5 1.4294e-02 7.7223e-02 0.1851 0.8531524
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# assumption diagnostics
vif(bennett_ols_bread)
GVIF Df GVIF^(1/(2*Df))
water_source 1.623243 1 1.274065
water_income_interac 2.969923 1 1.723347
water_family_interac 1.991939 1 1.411361
log_income_c 4.429597 1 2.104661
family_size_c 2.584313 1 1.607580
educ_head 1.669494 1 1.292089
age_head 38.521056 1 6.206533
I(age_head^2) 40.128468 1 6.334703
has_kids 1.344721 1 1.159621
head_works 2.197742 1 1.482478
female_head 3.969691 1 1.992408
work_female_interac 3.725754 1 1.930221
has_fridge 1.690478 1 1.300184
has_electricity 1.345908 1 1.160133
roof 1.620328 1 1.272921
wall 1.717634 1 1.310585
floor 1.353128 1 1.163240
toilet 1.546075 1 1.243413
transportation 1.418134 1 1.190854
food_outside 1.624596 1 1.274596
region 2.625678 16 1.030626
building 1.206677 4 1.023762
bgtest(bennett_ols_bread)
Breusch-Godfrey test for serial correlation of order up to 1
data: bennett_ols_bread
LM test = 571.12, df = 1, p-value < 2.2e-16
bptest(bennett_ols_bread)
studentized Breusch-Pagan test
data: bennett_ols_bread
BP = 651.39, df = 40, p-value < 2.2e-16
resettest(bennett_ols_bread,
power = 2:3,
type = "fitted",
vcov = function(x) vcovCL(x, cluster = ~apis_final$region))
RESET test
data: bennett_ols_bread
RESET = 34.925, df1 = 2, df2 = 9953, p-value = 7.678e-16
hist(residuals(bennett_ols_bread), main="Histogram of Residuals", breaks=50)
#----------------------------------------------------------------------------------#
# creating a robustness table
# define the 4 models for bread_share
b1 <- lm(bread_share ~ water_source + region + building, data = apis_final)
b2 <- lm(bread_share ~ water_source + water_income_interac + water_family_interac +
log_income_c + family_size_c +
educ_head + age_head + I(age_head^2) + has_kids +
region + building, data = apis_final)
b3 <- lm(bread_share ~ water_source + log_income_c + family_size_c +
water_income_interac + water_family_interac +
educ_head + age_head + I(age_head^2) + has_kids +
head_works + female_head + work_female_interac +
region + building, data = apis_final)
b4 <- lm(bread_share ~ water_source + log_income_c + family_size_c +
water_income_interac + water_family_interac +
educ_head + age_head + I(age_head^2) + has_kids +
head_works + female_head + work_female_interac +
transportation + food_outside +
has_fridge + has_electricity + roof + wall + floor + toilet +
region + building, data = apis_final)
# extract clustered standard errors (at region level)
seb_1 <- coeftest(b1, vcov = vcovCL(b1, cluster = ~region))[, 2]
seb_2 <- coeftest(b2, vcov = vcovCL(b2, cluster = ~region))[, 2]
seb_3 <- coeftest(b3, vcov = vcovCL(b3, cluster = ~region))[, 2]
seb_4 <- coeftest(b4, vcov = vcovCL(b4, cluster = ~region))[, 2]
# create the final table
stargazer(b1, b2, b3, b4,
type = "text",
se = list(seb_1, seb_2, seb_3, seb_4),
intercept.bottom = FALSE,
digits = 4,
no.space = TRUE,
order = c("Constant", "water_source", "water_income_interac",
"water_family_interac", "log_income_c", "family_size_c"),
omit = c("region", "building"),
column.labels = c("Bivariate", "Socioecon", "Time Poverty", "Full (Infra)"),
add.lines = list(
c("Region Fixed Effects", "Yes", "Yes", "Yes", "Yes"),
c("Building Fixed Effects", "Yes", "Yes", "Yes", "Yes")
),
dep.var.caption = "Dependent variable: Bread expenditure share",
dep.var.labels.include = FALSE,
model.numbers = TRUE,
star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
star.char = c("+", "*", "**", "***"),
notes = c("Clustered standard errors (at Region level) reported in parentheses.",
"Significance: + p<0.1; * p<0.05; ** p<0.01; *** p<0.001",
"This table serves as a falsification test for the preparation-barrier mechanism."),
notes.append = FALSE
)
======================================================================================================================================
Dependent variable: Bread expenditure share
---------------------------------------------------------------------------------------------------------------
Bivariate Socioecon Time Poverty Full (Infra)
(1) (2) (3) (4)
--------------------------------------------------------------------------------------------------------------------------------------
Constant 0.2835*** 0.2312*** 0.2301*** 0.2822***
(0.0014) (0.0116) (0.0124) (0.0143)
water_source -0.0647*** -0.0185*** -0.0179*** -0.0098**
(0.0053) (0.0029) (0.0029) (0.0032)
water_income_interac 0.0050 0.0052 0.0010
(0.0066) (0.0065) (0.0053)
water_family_interac -0.0066** -0.0067** -0.0056*
(0.0022) (0.0022) (0.0023)
log_income_c -0.0746*** -0.0746*** -0.0511***
(0.0033) (0.0033) (0.0033)
family_size_c 0.0209*** 0.0208*** 0.0206***
(0.0013) (0.0013) (0.0011)
educ_head -0.0013*** -0.0013*** -0.0005
(0.0004) (0.0004) (0.0005)
age_head 0.0023*** 0.0021*** 0.0024***
(0.0005) (0.0005) (0.0005)
I(age_head2) -0.00002*** -0.00002** -0.00002***
(0.000005) (0.000005) (0.00001)
has_kids 0.0013 0.0017 0.0033
(0.0026) (0.0026) (0.0022)
head_works 0.0046 0.0029
(0.0052) (0.0050)
female_head -0.0076 -0.0050
(0.0054) (0.0052)
work_female_interac 0.0020 0.0023
(0.0074) (0.0073)
transportation -0.000000***
(0.000000)
food_outside -0.0600***
(0.0037)
has_fridge -0.0106***
(0.0030)
has_electricity 0.0092
(0.0088)
roof 0.0023
(0.0051)
wall -0.0074+
(0.0044)
floor -0.0001
(0.00004)
toilet -0.0237**
(0.0072)
--------------------------------------------------------------------------------------------------------------------------------------
Region Fixed Effects Yes Yes Yes Yes
Building Fixed Effects Yes Yes Yes Yes
Observations 9,994 9,994 9,994 9,994
R2 0.3202 0.4887 0.4893 0.5291
Adjusted R2 0.3188 0.4872 0.4877 0.5272
Residual Std. Error 0.1083 (df = 9972) 0.0939 (df = 9964) 0.0939 (df = 9961) 0.0902 (df = 9953)
F Statistic 223.7175*** (df = 21; 9972) 328.3981*** (df = 29; 9964) 298.2398*** (df = 32; 9961) 279.5627*** (df = 40; 9953)
======================================================================================================================================
Note: Clustered standard errors (at Region level) reported in parentheses.
Significance: + p<0.1; * p<0.05; ** p<0.01; *** p<0.001
This table serves as a falsification test for the preparation-barrier mechanism.
# female heads
# in the original model, we include age_head^2.
# however, the RESET test is specifying that the model is misspecified because of this.
# so we only maintain age_head.
apis_subsample_female <- apis_final %>%
filter(female_head == 1)
subsample_female <- lm(protein_share ~
water_source +
water_income_interac +
water_family_interac +
log_income_c +
family_size_c +
# control vec. 1
educ_head +
age_head +
has_kids +
# control vec. 2, exc. female_head & work_female_interac
head_works +
# control vec. 3
has_fridge +
has_electricity +
roof +
wall +
floor +
toilet +
# control vec. 4
transportation +
food_outside +
# fixed effects
region +
building,
data = apis_subsample_female
)
summary(subsample_female)
Call:
lm(formula = protein_share ~ water_source + water_income_interac +
water_family_interac + log_income_c + family_size_c + educ_head +
age_head + has_kids + head_works + has_fridge + has_electricity +
roof + wall + floor + toilet + transportation + food_outside +
region + building, data = apis_subsample_female)
Residuals:
Min 1Q Median 3Q Max
-0.241371 -0.061832 -0.005229 0.056111 0.303060
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.534e-01 1.998e-02 12.685 < 2e-16 ***
water_source 2.069e-03 5.254e-03 0.394 0.693774
water_income_interac -1.496e-02 6.247e-03 -2.395 0.016723 *
water_family_interac 9.343e-03 2.186e-03 4.274 2.01e-05 ***
log_income_c 2.529e-02 5.590e-03 4.523 6.45e-06 ***
family_size_c -4.722e-03 1.803e-03 -2.619 0.008879 **
educ_head 6.837e-04 6.022e-04 1.135 0.256365
age_head 4.922e-05 1.501e-04 0.328 0.743008
has_kids 6.459e-03 4.862e-03 1.329 0.184162
head_works -4.757e-03 4.365e-03 -1.090 0.276011
has_fridge 2.226e-02 5.227e-03 4.258 2.16e-05 ***
has_electricity 2.309e-03 9.375e-03 0.246 0.805461
roof 2.123e-02 8.493e-03 2.500 0.012517 *
wall -2.735e-03 6.763e-03 -0.404 0.685964
floor 2.858e-05 4.857e-05 0.589 0.556240
toilet 6.280e-03 7.559e-03 0.831 0.406151
transportation -2.717e-08 2.827e-07 -0.096 0.923434
food_outside -5.635e-02 5.120e-03 -11.005 < 2e-16 ***
region2 -6.398e-03 1.565e-02 -0.409 0.682811
region3 1.434e-02 1.275e-02 1.125 0.260575
region4 -4.008e-03 1.411e-02 -0.284 0.776376
region5 -2.100e-02 1.406e-02 -1.494 0.135267
region6 -2.591e-02 1.288e-02 -2.012 0.044370 *
region7 -1.945e-02 1.381e-02 -1.408 0.159216
region8 -2.219e-02 1.372e-02 -1.617 0.105999
region9 -2.771e-02 1.546e-02 -1.792 0.073287 .
region10 -3.455e-02 1.410e-02 -2.451 0.014350 *
region11 1.731e-02 1.484e-02 1.167 0.243423
region12 -4.390e-02 1.598e-02 -2.748 0.006053 **
region13 -4.940e-02 1.235e-02 -4.001 6.55e-05 ***
region14 -4.225e-02 1.362e-02 -3.103 0.001943 **
region15 -6.458e-02 1.763e-02 -3.664 0.000255 ***
region16 -3.712e-02 1.400e-02 -2.652 0.008066 **
region17 -2.861e-03 1.442e-02 -0.198 0.842802
building2 -7.848e-03 1.454e-02 -0.540 0.589549
building3 -1.421e-02 8.637e-03 -1.645 0.100065
building4 -2.333e-02 4.520e-02 -0.516 0.605857
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.08975 on 1964 degrees of freedom
Multiple R-squared: 0.1582, Adjusted R-squared: 0.1427
F-statistic: 10.25 on 36 and 1964 DF, p-value: < 2.2e-16
print(coeftest(subsample_female,
vcov = vcovCL(subsample_female, cluster = ~region)))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.5344e-01 1.8019e-02 14.0653 < 2.2e-16 ***
water_source 2.0689e-03 7.5985e-03 0.2723 0.7854366
water_income_interac -1.4960e-02 8.9841e-03 -1.6651 0.0960478 .
water_family_interac 9.3431e-03 2.2616e-03 4.1312 3.759e-05 ***
log_income_c 2.5286e-02 7.3784e-03 3.4270 0.0006228 ***
family_size_c -4.7217e-03 1.7408e-03 -2.7125 0.0067369 **
educ_head 6.8372e-04 6.1053e-04 1.1199 0.2629041
age_head 4.9221e-05 1.2641e-04 0.3894 0.6970294
has_kids 6.4588e-03 4.3056e-03 1.5001 0.1337538
head_works -4.7565e-03 4.1961e-03 -1.1336 0.2571141
has_fridge 2.2257e-02 3.8156e-03 5.8331 6.346e-09 ***
has_electricity 2.3093e-03 1.0417e-02 0.2217 0.8245837
roof 2.1229e-02 6.4856e-03 3.2733 0.0010813 **
wall -2.7349e-03 7.6266e-03 -0.3586 0.7199298
floor 2.8583e-05 3.2941e-05 0.8677 0.3856561
toilet 6.2801e-03 1.0803e-02 0.5813 0.5610715
transportation -2.7168e-08 3.1804e-07 -0.0854 0.9319333
food_outside -5.6348e-02 5.0518e-03 -11.1540 < 2.2e-16 ***
region2 -6.3978e-03 1.1461e-03 -5.5821 2.707e-08 ***
region3 1.4344e-02 4.2135e-03 3.4043 0.0006766 ***
region4 -4.0079e-03 5.0782e-03 -0.7892 0.4300686
region5 -2.1005e-02 3.9373e-03 -5.3348 1.067e-07 ***
region6 -2.5907e-02 2.2391e-03 -11.5703 < 2.2e-16 ***
region7 -1.9446e-02 4.4568e-03 -4.3632 1.348e-05 ***
region8 -2.2185e-02 2.4533e-03 -9.0431 < 2.2e-16 ***
region9 -2.7713e-02 4.3162e-03 -6.4208 1.695e-10 ***
region10 -3.4550e-02 3.3180e-03 -10.4129 < 2.2e-16 ***
region11 1.7315e-02 3.7431e-03 4.6258 3.975e-06 ***
region12 -4.3904e-02 3.0971e-03 -14.1761 < 2.2e-16 ***
region13 -4.9400e-02 7.3834e-03 -6.6906 2.889e-11 ***
region14 -4.2249e-02 1.6386e-03 -25.7833 < 2.2e-16 ***
region15 -6.4581e-02 8.1496e-03 -7.9245 3.805e-15 ***
region16 -3.7121e-02 3.7454e-03 -9.9110 < 2.2e-16 ***
region17 -2.8606e-03 3.3608e-03 -0.8512 0.3947738
building2 -7.8475e-03 7.7762e-03 -1.0092 0.3130152
building3 -1.4210e-02 9.7025e-03 -1.4646 0.1431867
building4 -2.3328e-02 4.2663e-02 -0.5468 0.5845804
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
GVIF Df GVIF^(1/(2*Df))
water_source 1.691807 1 1.300695
water_income_interac 3.554955 1 1.885459
water_family_interac 2.942803 1 1.715460
log_income_c 5.582345 1 2.362699
family_size_c 3.380519 1 1.838619
educ_head 1.834611 1 1.354478
age_head 1.495928 1 1.223082
has_kids 1.340753 1 1.157909
head_works 1.131093 1 1.063528
has_fridge 1.673408 1 1.293603
has_electricity 1.345387 1 1.159908
roof 1.612160 1 1.269709
wall 1.658201 1 1.287712
floor 1.378329 1 1.174022
toilet 1.410944 1 1.187832
transportation 1.600071 1 1.264939
food_outside 1.609005 1 1.268466
region 2.499325 16 1.029039
building 1.223044 3 1.034127
Breusch-Godfrey test for serial correlation of order up to 1
data: subsample_female
LM test = 60.681, df = 1, p-value = 6.711e-15
studentized Breusch-Pagan test
data: subsample_female
BP = 71.792, df = 36, p-value = 0.0003603
resettest(subsample_female,
power = 2:3,
type = "fitted",
vcov = function(x) vcovCL(x, cluster = ~apis_subsample_female$region))
RESET test
data: subsample_female
RESET = 2.8156, df1 = 2, df2 = 1964, p-value = 0.06011
hist(residuals(subsample_female), main="Histogram of Residuals", breaks=50)
#----------------------------------------------------------------------------------#
# male heads
apis_subsample_male <- apis_final %>%
filter(female_head == 0)
subsample_male <- lm(protein_share ~
water_source +
water_income_interac +
water_family_interac +
log_income_c +
family_size_c +
# control vec. 1
educ_head +
age_head +
I(age_head^2) +
has_kids +
# control vec. 2, exc. female_head & work_female_interac
head_works +
# control vec. 3
has_fridge +
has_electricity +
roof +
wall +
floor +
toilet +
# control vec. 4
transportation +
food_outside +
# fixed effects
region +
building,
data = apis_subsample_male
)
summary(subsample_male)
Call:
lm(formula = protein_share ~ water_source + water_income_interac +
water_family_interac + log_income_c + family_size_c + educ_head +
age_head + I(age_head^2) + has_kids + head_works + has_fridge +
has_electricity + roof + wall + floor + toilet + transportation +
food_outside + region + building, data = apis_subsample_male)
Residuals:
Min 1Q Median 3Q Max
-0.27704 -0.06367 -0.00667 0.05626 0.50464
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.443e-01 1.292e-02 18.905 < 2e-16 ***
water_source 1.090e-02 2.601e-03 4.190 2.81e-05 ***
water_income_interac -1.204e-02 3.360e-03 -3.585 0.000339 ***
water_family_interac 7.345e-03 1.029e-03 7.139 1.02e-12 ***
log_income_c 2.881e-02 2.767e-03 10.410 < 2e-16 ***
family_size_c -8.389e-03 7.453e-04 -11.255 < 2e-16 ***
educ_head 1.229e-04 2.983e-04 0.412 0.680295
age_head 1.559e-03 4.601e-04 3.389 0.000706 ***
I(age_head^2) -1.383e-05 4.754e-06 -2.909 0.003640 **
has_kids 3.128e-03 2.362e-03 1.324 0.185441
head_works 9.161e-03 3.802e-03 2.410 0.015995 *
has_fridge 1.057e-02 2.812e-03 3.758 0.000173 ***
has_electricity -9.869e-03 3.929e-03 -2.512 0.012019 *
roof 9.105e-05 3.431e-03 0.027 0.978827
wall 2.338e-03 2.987e-03 0.783 0.433758
floor 1.029e-04 2.908e-05 3.536 0.000408 ***
toilet 7.824e-03 3.225e-03 2.426 0.015275 *
transportation -3.873e-07 1.113e-07 -3.480 0.000504 ***
food_outside -4.964e-02 2.583e-03 -19.215 < 2e-16 ***
region2 2.098e-03 7.101e-03 0.295 0.767677
region3 7.500e-03 6.386e-03 1.174 0.240235
region4 -1.833e-02 6.980e-03 -2.626 0.008661 **
region5 -1.937e-02 6.912e-03 -2.803 0.005081 **
region6 -2.038e-02 6.637e-03 -3.071 0.002144 **
region7 -2.590e-02 6.910e-03 -3.748 0.000179 ***
region8 -3.072e-02 6.671e-03 -4.605 4.20e-06 ***
region9 -4.334e-02 7.465e-03 -5.805 6.68e-09 ***
region10 -6.580e-02 6.685e-03 -9.842 < 2e-16 ***
region11 1.084e-02 6.841e-03 1.585 0.113109
region12 -4.763e-02 7.127e-03 -6.683 2.50e-11 ***
region13 -5.116e-02 6.263e-03 -8.168 3.61e-16 ***
region14 -5.148e-02 6.746e-03 -7.631 2.60e-14 ***
region15 -5.550e-02 7.228e-03 -7.678 1.81e-14 ***
region16 -3.164e-02 7.011e-03 -4.513 6.50e-06 ***
region17 -2.681e-02 6.871e-03 -3.902 9.62e-05 ***
building2 -4.495e-03 6.788e-03 -0.662 0.507896
building3 -8.190e-03 5.375e-03 -1.524 0.127647
building4 1.470e-03 1.754e-02 0.084 0.933209
building5 -2.515e-02 6.404e-02 -0.393 0.694532
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.09022 on 7954 degrees of freedom
Multiple R-squared: 0.1395, Adjusted R-squared: 0.1354
F-statistic: 33.94 on 38 and 7954 DF, p-value: < 2.2e-16
print(coeftest(subsample_male,
vcov = vcovCL(subsample_male, cluster = ~region)))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.4425e-01 1.3963e-02 17.4928 < 2.2e-16 ***
water_source 1.0901e-02 2.8347e-03 3.8455 0.0001213 ***
water_income_interac -1.2045e-02 7.6061e-03 -1.5836 0.1133325
water_family_interac 7.3448e-03 2.1785e-03 3.3715 0.0007511 ***
log_income_c 2.8810e-02 4.5170e-03 6.3782 1.892e-10 ***
family_size_c -8.3885e-03 1.0091e-03 -8.3133 < 2.2e-16 ***
educ_head 1.2291e-04 4.7468e-04 0.2589 0.7956874
age_head 1.5592e-03 6.5410e-04 2.3837 0.0171616 *
I(age_head^2) -1.3829e-05 6.3574e-06 -2.1753 0.0296363 *
has_kids 3.1276e-03 2.2011e-03 1.4210 0.1553703
head_works 9.1611e-03 4.8549e-03 1.8870 0.0592023 .
has_fridge 1.0566e-02 3.1275e-03 3.3786 0.0007321 ***
has_electricity -9.8691e-03 6.4025e-03 -1.5414 0.1232504
roof 9.1055e-05 5.2338e-03 0.0174 0.9861200
wall 2.3381e-03 4.2286e-03 0.5529 0.5803299
floor 1.0285e-04 5.7519e-05 1.7881 0.0737968 .
toilet 7.8241e-03 6.5610e-03 1.1925 0.2330892
transportation -3.8727e-07 9.7684e-08 -3.9646 7.417e-05 ***
food_outside -4.9638e-02 4.5713e-03 -10.8585 < 2.2e-16 ***
region2 2.0979e-03 1.8229e-03 1.1509 0.2498223
region3 7.5000e-03 1.4335e-03 5.2318 1.721e-07 ***
region4 -1.8328e-02 2.3294e-03 -7.8681 4.074e-15 ***
region5 -1.9373e-02 2.8391e-03 -6.8235 9.538e-12 ***
region6 -2.0380e-02 2.4225e-03 -8.4129 < 2.2e-16 ***
region7 -2.5900e-02 2.5960e-03 -9.9771 < 2.2e-16 ***
region8 -3.0719e-02 2.6959e-03 -11.3948 < 2.2e-16 ***
region9 -4.3336e-02 3.1096e-03 -13.9362 < 2.2e-16 ***
region10 -6.5797e-02 2.4399e-03 -26.9669 < 2.2e-16 ***
region11 1.0840e-02 3.7239e-03 2.9109 0.0036139 **
region12 -4.7632e-02 3.2918e-03 -14.4700 < 2.2e-16 ***
region13 -5.1155e-02 4.5936e-03 -11.1363 < 2.2e-16 ***
region14 -5.1480e-02 2.3829e-03 -21.6041 < 2.2e-16 ***
region15 -5.5497e-02 6.1054e-03 -9.0898 < 2.2e-16 ***
region16 -3.1637e-02 2.5545e-03 -12.3850 < 2.2e-16 ***
region17 -2.6812e-02 3.1667e-03 -8.4670 < 2.2e-16 ***
building2 -4.4948e-03 6.3007e-03 -0.7134 0.4756361
building3 -8.1896e-03 1.9141e-03 -4.2786 1.903e-05 ***
building4 1.4698e-03 2.2411e-02 0.0656 0.9477091
building5 -2.5148e-02 1.5636e-01 -0.1608 0.8722273
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
GVIF Df GVIF^(1/(2*Df))
water_source 1.616794 1 1.271532
water_income_interac 2.852839 1 1.689035
water_family_interac 1.866466 1 1.366187
log_income_c 4.214313 1 2.052879
family_size_c 2.412280 1 1.553152
educ_head 1.631705 1 1.277382
age_head 40.039873 1 6.327707
I(age_head^2) 41.610239 1 6.450600
has_kids 1.347899 1 1.160991
head_works 1.337867 1 1.156662
has_fridge 1.674965 1 1.294204
has_electricity 1.348411 1 1.161211
roof 1.615900 1 1.271181
wall 1.724114 1 1.313055
floor 1.345329 1 1.159883
toilet 1.567900 1 1.252158
transportation 1.400192 1 1.183297
food_outside 1.635357 1 1.278811
region 2.738622 16 1.031984
building 1.211424 4 1.024264
Breusch-Godfrey test for serial correlation of order up to 1
data: subsample_male
LM test = 573, df = 1, p-value < 2.2e-16
studentized Breusch-Pagan test
data: subsample_male
BP = 248.9, df = 38, p-value < 2.2e-16
resettest(subsample_male,
power = 2:3,
type = "fitted",
vcov = function(x) vcovCL(x, cluster = ~apis_subsample_male$region))
RESET test
data: subsample_male
RESET = 0.71522, df1 = 2, df2 = 7954, p-value = 0.4891
hist(residuals(subsample_male), main="Histogram of Residuals", breaks=50)
#----------------------------------------------------------------------------------#
# presenting the comparison, male vs. female, in table format
# define subsample models
# female model: linear age
subsample_female <- lm(protein_share ~
water_source + water_income_interac + water_family_interac +
log_income_c + family_size_c + educ_head + age_head + has_kids +
head_works + has_fridge + has_electricity + roof + wall + floor +
toilet + transportation + food_outside +
region + building,
data = apis_subsample_female)
# male model: quadratic age
subsample_male <- lm(protein_share ~
water_source + water_income_interac + water_family_interac +
log_income_c + family_size_c + educ_head + age_head + I(age_head^2) + has_kids +
head_works + has_fridge + has_electricity + roof + wall + floor +
toilet + transportation + food_outside +
region + building,
data = apis_subsample_male)
# extract clustered standard errors
se_f <- coeftest(subsample_female, vcov = vcovCL(subsample_female, cluster = ~region))[, 2]
se_m <- coeftest(subsample_male, vcov = vcovCL(subsample_male, cluster = ~region))[, 2]
stargazer(subsample_female, subsample_male,
type = "text",
se = list(se_f, se_m),
intercept.bottom = FALSE,
digits = 4,
no.space = TRUE,
order = c("Constant", "water_source", "water_income_interac", "water_family_interac"),
omit = c("region", "building"),
column.labels = c("Female Headed", "Male Headed"),
add.lines = list(
c("Region Fixed Effects", "Yes", "Yes"),
c("Building Fixed Effects", "Yes", "Yes")
),
dep.var.labels.include = FALSE,
model.numbers = TRUE,
# Star/Plus Configuration
star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
star.char = c("+", "*", "**", "***"),
notes = c("Clustered standard errors (at Region level) reported in parentheses.",
"Significance: + p<0.1; * p<0.05; ** p<0.01; *** p<0.001",
"Age specification: Linear for Female, Quadratic for Male."),
notes.append = FALSE)
============================================================================================
Dependent variable:
---------------------------------------------------------------------
Female Headed Male Headed
(1) (2)
--------------------------------------------------------------------------------------------
Constant 0.2534*** 0.2443***
(0.0180) (0.0140)
water_source 0.0021 0.0109***
(0.0076) (0.0028)
water_income_interac -0.0150+ -0.0120
(0.0090) (0.0076)
water_family_interac 0.0093*** 0.0073***
(0.0023) (0.0022)
log_income_c 0.0253*** 0.0288***
(0.0074) (0.0045)
family_size_c -0.0047** -0.0084***
(0.0017) (0.0010)
educ_head 0.0007 0.0001
(0.0006) (0.0005)
age_head 0.00005 0.0016*
(0.0001) (0.0007)
I(age_head2) -0.00001*
(0.00001)
has_kids 0.0065 0.0031
(0.0043) (0.0022)
head_works -0.0048 0.0092+
(0.0042) (0.0049)
has_fridge 0.0223*** 0.0106***
(0.0038) (0.0031)
has_electricity 0.0023 -0.0099
(0.0104) (0.0064)
roof 0.0212** 0.0001
(0.0065) (0.0052)
wall -0.0027 0.0023
(0.0076) (0.0042)
floor 0.00003 0.0001+
(0.00003) (0.0001)
toilet 0.0063 0.0078
(0.0108) (0.0066)
transportation -0.000000 -0.000000***
(0.000000) (0.000000)
food_outside -0.0563*** -0.0496***
(0.0051) (0.0046)
--------------------------------------------------------------------------------------------
Region Fixed Effects Yes Yes
Building Fixed Effects Yes Yes
Observations 2,001 7,993
R2 0.1582 0.1395
Adjusted R2 0.1427 0.1354
Residual Std. Error 0.0897 (df = 1964) 0.0902 (df = 7954)
F Statistic 10.2493*** (df = 36; 1964) 33.9386*** (df = 38; 7954)
============================================================================================
Note: Clustered standard errors (at Region level) reported in parentheses.
Significance: + p<0.1; * p<0.05; ** p<0.01; *** p<0.001
Age specification: Linear for Female, Quadratic for Male.
#----------------------------------------------------------------------------------#
# robustness test for each subsample analysis (included in the appendix)
# female robustness table
f1 <- lm(protein_share ~ water_source + region + building,
data = apis_subsample_female)
f2 <- lm(protein_share ~ water_source + water_income_interac + water_family_interac +
log_income_c + family_size_c + educ_head + age_head + has_kids +
region + building, data = apis_subsample_female)
f3 <- lm(protein_share ~ water_source + water_income_interac + water_family_interac +
log_income_c + family_size_c + educ_head + age_head + has_kids +
head_works + transportation + food_outside + has_fridge + has_electricity +
region + building, data = apis_subsample_female)
f4 <- lm(protein_share ~ water_source + water_income_interac + water_family_interac +
log_income_c + family_size_c + educ_head + age_head + has_kids +
head_works + transportation + food_outside +
has_fridge + has_electricity + roof + wall + floor + toilet +
region + building, data = apis_subsample_female)
se_f1 <- coeftest(f1, vcov = vcovCL(f1, cluster = ~region))[, 2]
se_f2 <- coeftest(f2, vcov = vcovCL(f2, cluster = ~region))[, 2]
se_f3 <- coeftest(f3, vcov = vcovCL(f3, cluster = ~region))[, 2]
se_f4 <- coeftest(f4, vcov = vcovCL(f4, cluster = ~region))[, 2]
stargazer(f1, f2, f3, f4,
type = "text",
se = list(se_f1, se_f2, se_f3, se_f4),
intercept.bottom = FALSE,
digits = 4,
no.space = TRUE,
order = c("Constant", "water_source", "water_income_interac",
"water_family_interac", "log_income_c", "family_size_c"),
omit = c("region", "building"),
column.labels = c("Bivariate", "Socioecon", "Employment", "Full"),
add.lines = list(
c("Region Fixed Effects", "Yes", "Yes", "Yes", "Yes"),
c("Building Fixed Effects", "Yes", "Yes", "Yes", "Yes")
),
title = "Robustness Check: Female-Headed Households",
dep.var.caption = "Dependent variable: Protein share (protein-consuming female-headed households)",
dep.var.labels.include = FALSE,
model.numbers = TRUE,
# Updated for the plus sign
star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
star.char = c("+", "*", "**", "***"),
notes = c("Clustered standard errors (at Region level) reported in parentheses.",
"Significance: + p<0.1; * p<0.05; ** p<0.01; *** p<0.001",
"(1): Bivariate",
"(2): + Main interactions and socioeconomic indicators",
"(3): + Employment indicators",
"(4): + Household infrastructure"),
notes.append = FALSE)
Robustness Check: Female-Headed Households
================================================================================================================================
Dependent variable: Protein share (protein-consuming female-headed households)
---------------------------------------------------------------------------------------------------------
Bivariate Socioecon Employment Full
(1) (2) (3) (4)
--------------------------------------------------------------------------------------------------------------------------------
Constant 0.2735*** 0.2417*** 0.2714*** 0.2534***
(0.0022) (0.0100) (0.0164) (0.0180)
water_source 0.0123+ 0.0056 0.0042 0.0021
(0.0071) (0.0079) (0.0074) (0.0076)
water_income_interac -0.0137+ -0.0170+ -0.0150+
(0.0079) (0.0087) (0.0090)
water_family_interac 0.0094*** 0.0096*** 0.0093***
(0.0025) (0.0023) (0.0023)
log_income_c 0.0233*** 0.0284*** 0.0253***
(0.0060) (0.0070) (0.0074)
family_size_c -0.0063*** -0.0050** -0.0047**
(0.0016) (0.0018) (0.0017)
educ_head 0.0008 0.0007 0.0007
(0.0007) (0.0006) (0.0006)
age_head 0.0003* 0.0001 0.00005
(0.0001) (0.0001) (0.0001)
has_kids 0.0064 0.0062 0.0065
(0.0044) (0.0043) (0.0043)
head_works -0.0051 -0.0048
(0.0042) (0.0042)
transportation -0.000000 -0.000000
(0.000000) (0.000000)
food_outside -0.0554*** -0.0563***
(0.0049) (0.0051)
has_fridge 0.0227*** 0.0223***
(0.0040) (0.0038)
has_electricity 0.0084 0.0023
(0.0090) (0.0104)
roof 0.0212**
(0.0065)
wall -0.0027
(0.0076)
floor 0.00003
(0.00003)
toilet 0.0063
(0.0108)
--------------------------------------------------------------------------------------------------------------------------------
Region Fixed Effects Yes Yes Yes Yes
Building Fixed Effects Yes Yes Yes Yes
Observations 2,001 2,001 2,001 2,001
R2 0.0685 0.0940 0.1543 0.1582
Adjusted R2 0.0591 0.0816 0.1406 0.1427
Residual Std. Error 0.0940 (df = 1980) 0.0929 (df = 1973) 0.0899 (df = 1968) 0.0897 (df = 1964)
F Statistic 7.2792*** (df = 20; 1980) 7.5804*** (df = 27; 1973) 11.2247*** (df = 32; 1968) 10.2493*** (df = 36; 1964)
================================================================================================================================
Note: Clustered standard errors (at Region level) reported in parentheses.
Significance: + p<0.1; * p<0.05; ** p<0.01; *** p<0.001
(1): Bivariate
(2): + Main interactions and socioeconomic indicators
(3): + Employment indicators
(4): + Household infrastructure
# male robustness table
m1 <- lm(protein_share ~ water_source + region + building,
data = apis_subsample_male)
m2 <- lm(protein_share ~ water_source + water_income_interac + water_family_interac +
log_income_c + family_size_c + educ_head + age_head + I(age_head^2) + has_kids +
region + building, data = apis_subsample_male)
m3 <- lm(protein_share ~ water_source + water_income_interac + water_family_interac +
log_income_c + family_size_c + educ_head + age_head + I(age_head^2) + has_kids +
head_works + transportation + food_outside + has_fridge + has_electricity +
region + building, data = apis_subsample_male)
m4 <- lm(protein_share ~ water_source + water_income_interac + water_family_interac +
log_income_c + family_size_c + educ_head + age_head + I(age_head^2) + has_kids +
head_works + transportation + food_outside +
has_fridge + has_electricity + roof + wall + floor + toilet +
region + building, data = apis_subsample_male)
se_m1 <- coeftest(m1, vcov = vcovCL(m1, cluster = ~region))[, 2]
se_m2 <- coeftest(m2, vcov = vcovCL(m2, cluster = ~region))[, 2]
se_m3 <- coeftest(m3, vcov = vcovCL(m3, cluster = ~region))[, 2]
se_m4 <- coeftest(m4, vcov = vcovCL(m4, cluster = ~region))[, 2]
stargazer(m1, m2, m3, m4,
type = "text",
se = list(se_m1, se_m2, se_m3, se_m4),
intercept.bottom = FALSE,
digits = 4,
no.space = TRUE,
order = c("Constant", "water_source", "water_income_interac",
"water_family_interac", "log_income_c", "family_size_c"),
omit = c("region", "building"),
column.labels = c("Bivariate", "Socioecon", "Employment", "Full"),
add.lines = list(
c("Region Fixed Effects", "Yes", "Yes", "Yes", "Yes"),
c("Building Fixed Effects", "Yes", "Yes", "Yes", "Yes")
),
title = "Robustness Check: Male-Headed Households",
dep.var.caption = "Dependent variable: Protein share (protein-consuming male-headed households)",
dep.var.labels.include = FALSE,
model.numbers = TRUE,
# Updated for the plus sign
star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
star.char = c("+", "*", "**", "***"),
notes = c("Clustered standard errors (at Region level) reported in parentheses.",
"Significance: + p<0.1; * p<0.05; ** p<0.01; *** p<0.001",
"(1): Bivariate",
"(2): + Main interactions and socioeconomic indicators",
"(3): + Employment indicators",
"(4): + Household infrastructure"),
notes.append = FALSE)
Robustness Check: Male-Headed Households
==================================================================================================================================
Dependent variable: Protein share (protein-consuming male-headed households)
-----------------------------------------------------------------------------------------------------------
Bivariate Socioecon Employment Full
(1) (2) (3) (4)
----------------------------------------------------------------------------------------------------------------------------------
Constant 0.2687*** 0.2258*** 0.2548*** 0.2443***
(0.0009) (0.0160) (0.0135) (0.0140)
water_source 0.0210*** 0.0101** 0.0123*** 0.0109***
(0.0035) (0.0036) (0.0029) (0.0028)
water_income_interac -0.0089 -0.0126+ -0.0120
(0.0069) (0.0075) (0.0076)
water_family_interac 0.0070** 0.0074*** 0.0073***
(0.0023) (0.0022) (0.0022)
log_income_c 0.0194*** 0.0312*** 0.0288***
(0.0045) (0.0050) (0.0045)
family_size_c -0.0095*** -0.0086*** -0.0084***
(0.0011) (0.0011) (0.0010)
educ_head 0.0001 0.0003 0.0001
(0.0004) (0.0004) (0.0005)
age_head 0.0016** 0.0015* 0.0016*
(0.0006) (0.0006) (0.0007)
I(age_head2) -0.00001* -0.00001* -0.00001*
(0.00001) (0.00001) (0.00001)
has_kids 0.0027 0.0032 0.0031
(0.0024) (0.0023) (0.0022)
head_works 0.0083+ 0.0092+
(0.0050) (0.0049)
transportation -0.000000*** -0.000000***
(0.000000) (0.000000)
food_outside -0.0494*** -0.0496***
(0.0048) (0.0046)
has_fridge 0.0119*** 0.0106***
(0.0030) (0.0031)
has_electricity -0.0073 -0.0099
(0.0070) (0.0064)
roof 0.0001
(0.0052)
wall 0.0023
(0.0042)
floor 0.0001+
(0.0001)
toilet 0.0078
(0.0066)
----------------------------------------------------------------------------------------------------------------------------------
Region Fixed Effects Yes Yes Yes Yes
Building Fixed Effects Yes Yes Yes Yes
Observations 7,993 7,993 7,993 7,993
R2 0.0656 0.0928 0.1373 0.1395
Adjusted R2 0.0632 0.0895 0.1336 0.1354
Residual Std. Error 0.0939 (df = 7971) 0.0926 (df = 7963) 0.0903 (df = 7958) 0.0902 (df = 7954)
F Statistic 26.6695*** (df = 21; 7971) 28.0951*** (df = 29; 7963) 37.2587*** (df = 34; 7958) 33.9386*** (df = 38; 7954)
==================================================================================================================================
Note: Clustered standard errors (at Region level) reported in parentheses.
Significance: + p<0.1; * p<0.05; ** p<0.01; *** p<0.001
(1): Bivariate
(2): + Main interactions and socioeconomic indicators
(3): + Employment indicators
(4): + Household infrastructure
# early-stage (below 35)
apis_subsample_below_35 <- apis_final %>%
filter(age_head < 35)
subsample_below_35 <- lm(protein_share ~
water_source +
water_income_interac +
water_family_interac +
log_income_c +
family_size_c +
# control vec. 1
educ_head +
age_head +
I(age_head^2) +
has_kids +
# control vec. 2
head_works +
female_head +
work_female_interac +
# control vec. 3
has_fridge +
has_electricity +
roof +
wall +
floor +
toilet +
# control vec. 4
transportation +
food_outside +
# fixed effects
region +
building,
data = apis_subsample_below_35
)
summary(subsample_below_35)
Call:
lm(formula = protein_share ~ water_source + water_income_interac +
water_family_interac + log_income_c + family_size_c + educ_head +
age_head + I(age_head^2) + has_kids + head_works + female_head +
work_female_interac + has_fridge + has_electricity + roof +
wall + floor + toilet + transportation + food_outside + region +
building, data = apis_subsample_below_35)
Residuals:
Min 1Q Median 3Q Max
-0.26544 -0.06349 -0.00706 0.05502 0.34949
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.536e-01 8.607e-02 2.946 0.003255 **
water_source 1.340e-02 5.530e-03 2.423 0.015469 *
water_income_interac -1.679e-02 7.440e-03 -2.257 0.024122 *
water_family_interac 1.336e-02 2.616e-03 5.106 3.59e-07 ***
log_income_c 3.404e-02 5.972e-03 5.700 1.37e-08 ***
family_size_c -9.595e-03 1.950e-03 -4.921 9.31e-07 ***
educ_head -5.776e-04 6.390e-04 -0.904 0.366121
age_head 2.982e-03 6.270e-03 0.476 0.634392
I(age_head^2) -3.269e-05 1.137e-04 -0.287 0.773793
has_kids 6.375e-03 5.081e-03 1.255 0.209760
head_works 9.740e-03 1.183e-02 0.824 0.410266
female_head 7.176e-03 1.482e-02 0.484 0.628292
work_female_interac -2.399e-02 1.676e-02 -1.432 0.152434
has_fridge 2.053e-03 6.083e-03 0.337 0.735805
has_electricity -1.075e-02 7.114e-03 -1.511 0.130956
roof 2.864e-03 6.500e-03 0.441 0.659468
wall -6.788e-03 5.723e-03 -1.186 0.235746
floor 2.411e-04 7.496e-05 3.216 0.001320 **
toilet 4.330e-03 6.167e-03 0.702 0.482694
transportation -3.943e-07 3.898e-07 -1.012 0.311845
food_outside -6.372e-02 5.310e-03 -11.999 < 2e-16 ***
region2 4.232e-03 1.648e-02 0.257 0.797338
region3 -1.680e-03 1.417e-02 -0.119 0.905610
region4 -3.028e-02 1.525e-02 -1.986 0.047160 *
region5 -4.254e-02 1.525e-02 -2.790 0.005324 **
region6 -4.331e-02 1.471e-02 -2.943 0.003287 **
region7 -5.565e-02 1.495e-02 -3.722 0.000203 ***
region8 -4.508e-02 1.485e-02 -3.036 0.002427 **
region9 -6.127e-02 1.701e-02 -3.602 0.000323 ***
region10 -8.605e-02 1.467e-02 -5.867 5.16e-09 ***
region11 -1.727e-02 1.506e-02 -1.147 0.251523
region12 -6.079e-02 1.522e-02 -3.993 6.75e-05 ***
region13 -7.822e-02 1.388e-02 -5.637 1.97e-08 ***
region14 -5.986e-02 1.493e-02 -4.010 6.28e-05 ***
region15 -7.988e-02 1.504e-02 -5.313 1.20e-07 ***
region16 -4.855e-02 1.531e-02 -3.171 0.001544 **
region17 -5.186e-02 1.492e-02 -3.475 0.000521 ***
building2 -2.563e-03 1.316e-02 -0.195 0.845577
building3 -2.245e-03 8.713e-03 -0.258 0.796729
building4 2.329e-02 2.398e-02 0.971 0.331517
building5 2.116e-01 9.200e-02 2.301 0.021513 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.09073 on 2047 degrees of freedom
Multiple R-squared: 0.1805, Adjusted R-squared: 0.1645
F-statistic: 11.27 on 40 and 2047 DF, p-value: < 2.2e-16
print(coeftest(subsample_below_35,
vcov = vcovCL(subsample_below_35, cluster = ~region)))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.5357e-01 6.2805e-02 4.0374 5.604e-05 ***
water_source 1.3401e-02 6.6454e-03 2.0166 0.043867 *
water_income_interac -1.6790e-02 1.3378e-02 -1.2551 0.209590
water_family_interac 1.3359e-02 5.1064e-03 2.6160 0.008961 **
log_income_c 3.4044e-02 7.5266e-03 4.5232 6.441e-06 ***
family_size_c -9.5949e-03 2.1906e-03 -4.3801 1.246e-05 ***
educ_head -5.7761e-04 9.2645e-04 -0.6235 0.533048
age_head 2.9820e-03 4.6737e-03 0.6380 0.523518
I(age_head^2) -3.2686e-05 8.3832e-05 -0.3899 0.696649
has_kids 6.3748e-03 5.1297e-03 1.2427 0.214115
head_works 9.7401e-03 1.2216e-02 0.7973 0.425354
female_head 7.1764e-03 1.6342e-02 0.4391 0.660613
work_female_interac -2.3990e-02 1.8810e-02 -1.2754 0.202316
has_fridge 2.0527e-03 4.1549e-03 0.4940 0.621326
has_electricity -1.0749e-02 1.2215e-02 -0.8800 0.378987
roof 2.8645e-03 7.6139e-03 0.3762 0.706798
wall -6.7881e-03 5.6716e-03 -1.1969 0.231500
floor 2.4108e-04 1.0396e-04 2.3190 0.020491 *
toilet 4.3299e-03 8.2460e-03 0.5251 0.599573
transportation -3.9431e-07 3.4215e-07 -1.1525 0.249264
food_outside -6.3721e-02 7.1690e-03 -8.8885 < 2.2e-16 ***
region2 4.2323e-03 2.9946e-03 1.4133 0.157721
region3 -1.6799e-03 3.4480e-03 -0.4872 0.626170
region4 -3.0278e-02 5.6537e-03 -5.3555 9.490e-08 ***
region5 -4.2543e-02 5.7344e-03 -7.4189 1.721e-13 ***
region6 -4.3305e-02 4.1176e-03 -10.5170 < 2.2e-16 ***
region7 -5.5646e-02 4.7320e-03 -11.7596 < 2.2e-16 ***
region8 -4.5083e-02 4.8592e-03 -9.2778 < 2.2e-16 ***
region9 -6.1272e-02 5.3895e-03 -11.3687 < 2.2e-16 ***
region10 -8.6047e-02 4.2935e-03 -20.0411 < 2.2e-16 ***
region11 -1.7271e-02 5.4201e-03 -3.1864 0.001462 **
region12 -6.0794e-02 4.6562e-03 -13.0566 < 2.2e-16 ***
region13 -7.8222e-02 8.6820e-03 -9.0097 < 2.2e-16 ***
region14 -5.9856e-02 4.1735e-03 -14.3420 < 2.2e-16 ***
region15 -7.9875e-02 7.9477e-03 -10.0501 < 2.2e-16 ***
region16 -4.8546e-02 4.7669e-03 -10.1841 < 2.2e-16 ***
region17 -5.1865e-02 5.6716e-03 -9.1447 < 2.2e-16 ***
building2 -2.5634e-03 1.5174e-02 -0.1689 0.865866
building3 -2.2445e-03 5.3903e-03 -0.4164 0.677163
building4 2.3288e-02 2.5712e-02 0.9057 0.365197
building5 2.1165e-01 2.4970e-02 8.4759 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
GVIF Df GVIF^(1/(2*Df))
water_source 1.863179 1 1.364983
water_income_interac 2.336852 1 1.528677
water_family_interac 2.149937 1 1.466266
log_income_c 3.840384 1 1.959690
family_size_c 2.470122 1 1.571662
educ_head 1.675782 1 1.294520
age_head 161.895849 1 12.723830
I(age_head^2) 161.903175 1 12.724118
has_kids 1.600608 1 1.265151
head_works 2.510244 1 1.584375
female_head 6.193916 1 2.488758
work_female_interac 5.384839 1 2.320526
has_fridge 1.561013 1 1.249405
has_electricity 1.394828 1 1.181028
roof 1.817525 1 1.348156
wall 1.835716 1 1.354886
floor 1.321589 1 1.149604
toilet 1.664863 1 1.290296
transportation 1.521619 1 1.233539
food_outside 1.766256 1 1.329006
region 3.741587 16 1.042097
building 1.366710 4 1.039823
bgtest(subsample_below_35)
Breusch-Godfrey test for serial correlation of order up to 1
data: subsample_below_35
LM test = 101.46, df = 1, p-value < 2.2e-16
bptest(subsample_below_35)
studentized Breusch-Pagan test
data: subsample_below_35
BP = 86.833, df = 40, p-value = 2.586e-05
resettest(subsample_below_35,
power = 2:3,
type = "fitted",
vcov = function(x) vcovCL(x, cluster = ~apis_subsample_below_35$region))
RESET test
data: subsample_below_35
RESET = 1.1849, df1 = 2, df2 = 2047, p-value = 0.306
hist(residuals(subsample_below_35), main="Histogram of Residuals", breaks=50)
#----------------------------------------------------------------------------------#
# mid-stage (35-64)
apis_subsample_35_64 <- apis_final %>%
filter(age_head >= 35 & age_head < 65)
subsample_35_64 <- lm(protein_share ~
water_source +
water_income_interac +
water_family_interac +
log_income_c +
family_size_c +
# control vec. 1
educ_head +
age_head +
I(age_head^2) +
has_kids +
# control vec. 2
head_works +
female_head +
work_female_interac +
# control vec. 3
has_fridge +
has_electricity +
roof +
wall +
floor +
toilet +
# control vec. 4
transportation +
food_outside +
# fixed effects
region +
building,
data = apis_subsample_35_64
)
summary(subsample_35_64)
Call:
lm(formula = protein_share ~ water_source + water_income_interac +
water_family_interac + log_income_c + family_size_c + educ_head +
age_head + I(age_head^2) + has_kids + head_works + female_head +
work_female_interac + has_fridge + has_electricity + roof +
wall + floor + toilet + transportation + food_outside + region +
building, data = apis_subsample_35_64)
Residuals:
Min 1Q Median 3Q Max
-0.28561 -0.06356 -0.00664 0.05634 0.43232
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.479e-01 4.189e-02 5.917 3.44e-09 ***
water_source 7.923e-03 2.860e-03 2.770 0.005618 **
water_income_interac -1.397e-02 3.604e-03 -3.875 0.000108 ***
water_family_interac 7.675e-03 1.077e-03 7.125 1.15e-12 ***
log_income_c 2.962e-02 3.063e-03 9.669 < 2e-16 ***
family_size_c -8.480e-03 7.932e-04 -10.691 < 2e-16 ***
educ_head 3.029e-04 3.294e-04 0.919 0.357899
age_head 1.123e-03 1.699e-03 0.661 0.508714
I(age_head^2) -1.018e-05 1.725e-05 -0.590 0.555172
has_kids 2.343e-03 2.603e-03 0.900 0.368006
head_works 4.780e-03 5.003e-03 0.955 0.339378
female_head 2.264e-03 6.762e-03 0.335 0.737748
work_female_interac -1.689e-02 7.575e-03 -2.230 0.025793 *
has_fridge 1.465e-02 2.978e-03 4.919 8.90e-07 ***
has_electricity -8.357e-03 4.627e-03 -1.806 0.070938 .
roof 1.420e-03 3.967e-03 0.358 0.720360
wall 5.637e-03 3.358e-03 1.679 0.093259 .
floor 8.621e-05 2.995e-05 2.878 0.004011 **
toilet 8.785e-03 3.688e-03 2.382 0.017251 *
transportation -4.074e-07 1.130e-07 -3.604 0.000316 ***
food_outside -4.818e-02 2.818e-03 -17.098 < 2e-16 ***
region2 3.384e-03 7.866e-03 0.430 0.667085
region3 1.070e-02 6.950e-03 1.539 0.123781
region4 -1.297e-02 7.600e-03 -1.706 0.087979 .
region5 -1.320e-02 7.655e-03 -1.724 0.084802 .
region6 -1.465e-02 7.238e-03 -2.024 0.043037 *
region7 -1.578e-02 7.622e-03 -2.071 0.038413 *
region8 -2.250e-02 7.362e-03 -3.057 0.002248 **
region9 -3.866e-02 8.135e-03 -4.751 2.06e-06 ***
region10 -5.755e-02 7.460e-03 -7.714 1.40e-14 ***
region11 2.126e-02 7.610e-03 2.794 0.005227 **
region12 -4.044e-02 7.994e-03 -5.059 4.34e-07 ***
region13 -4.277e-02 6.802e-03 -6.288 3.42e-10 ***
region14 -4.597e-02 7.449e-03 -6.171 7.18e-10 ***
region15 -4.718e-02 8.244e-03 -5.723 1.10e-08 ***
region16 -2.802e-02 7.703e-03 -3.638 0.000277 ***
region17 -1.353e-02 7.644e-03 -1.770 0.076761 .
building2 -7.751e-03 7.338e-03 -1.056 0.290867
building3 -1.375e-02 5.699e-03 -2.413 0.015848 *
building4 6.663e-03 2.405e-02 0.277 0.781732
building5 -2.365e-01 8.991e-02 -2.630 0.008553 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.08958 on 6484 degrees of freedom
Multiple R-squared: 0.1454, Adjusted R-squared: 0.1401
F-statistic: 27.57 on 40 and 6484 DF, p-value: < 2.2e-16
print(coeftest(subsample_35_64,
vcov = vcovCL(subsample_35_64, cluster = ~region)))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.4790e-01 4.9371e-02 5.0212 5.273e-07 ***
water_source 7.9232e-03 3.9073e-03 2.0278 0.0426202 *
water_income_interac -1.3967e-02 6.9531e-03 -2.0087 0.0446104 *
water_family_interac 7.6752e-03 1.5224e-03 5.0415 4.745e-07 ***
log_income_c 2.9617e-02 4.6358e-03 6.3889 1.787e-10 ***
family_size_c -8.4802e-03 9.5006e-04 -8.9259 < 2.2e-16 ***
educ_head 3.0290e-04 3.8263e-04 0.7916 0.4286090
age_head 1.1228e-03 2.1748e-03 0.5163 0.6056927
I(age_head^2) -1.0180e-05 2.2115e-05 -0.4603 0.6453020
has_kids 2.3431e-03 2.5216e-03 0.9292 0.3528187
head_works 4.7800e-03 5.7448e-03 0.8320 0.4054151
female_head 2.2642e-03 6.0811e-03 0.3723 0.7096509
work_female_interac -1.6890e-02 7.4528e-03 -2.2663 0.0234670 *
has_fridge 1.4651e-02 3.0189e-03 4.8531 1.244e-06 ***
has_electricity -8.3573e-03 5.1536e-03 -1.6216 0.1049326
roof 1.4203e-03 5.3031e-03 0.2678 0.7888412
wall 5.6368e-03 5.0084e-03 1.1255 0.2604297
floor 8.6209e-05 4.8245e-05 1.7869 0.0740008 .
toilet 8.7847e-03 5.8027e-03 1.5139 0.1301016
transportation -4.0736e-07 9.6683e-08 -4.2133 2.551e-05 ***
food_outside -4.8182e-02 4.7817e-03 -10.0763 < 2.2e-16 ***
region2 3.3839e-03 1.6563e-03 2.0431 0.0410861 *
region3 1.0698e-02 1.6009e-03 6.6827 2.540e-11 ***
region4 -1.2969e-02 2.1169e-03 -6.1263 9.525e-10 ***
region5 -1.3196e-02 2.5173e-03 -5.2420 1.638e-07 ***
region6 -1.4648e-02 2.1986e-03 -6.6622 2.919e-11 ***
region7 -1.5784e-02 2.4427e-03 -6.4620 1.108e-10 ***
region8 -2.2502e-02 2.9134e-03 -7.7236 1.302e-14 ***
region9 -3.8656e-02 3.2147e-03 -12.0245 < 2.2e-16 ***
region10 -5.7548e-02 2.5786e-03 -22.3176 < 2.2e-16 ***
region11 2.1260e-02 3.5261e-03 6.0294 1.737e-09 ***
region12 -4.0437e-02 3.3320e-03 -12.1360 < 2.2e-16 ***
region13 -4.2771e-02 4.5153e-03 -9.4725 < 2.2e-16 ***
region14 -4.5969e-02 2.3280e-03 -19.7458 < 2.2e-16 ***
region15 -4.7180e-02 6.0381e-03 -7.8136 6.439e-15 ***
region16 -2.8020e-02 2.6334e-03 -10.6400 < 2.2e-16 ***
region17 -1.3530e-02 2.9926e-03 -4.5211 6.261e-06 ***
building2 -7.7508e-03 6.4706e-03 -1.1978 0.2310236
building3 -1.3751e-02 4.1676e-03 -3.2995 0.0009737 ***
building4 6.6626e-03 3.3526e-02 0.1987 0.8424815
building5 -2.3648e-01 5.4317e-03 -43.5370 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
GVIF Df GVIF^(1/(2*Df))
water_source 1.650600 1 1.284757
water_income_interac 3.123177 1 1.767251
water_family_interac 1.933504 1 1.390505
log_income_c 4.393101 1 2.095973
family_size_c 2.424382 1 1.557043
educ_head 1.650063 1 1.284548
age_head 167.045181 1 12.924596
I(age_head^2) 166.778062 1 12.914258
has_kids 1.369089 1 1.170081
head_works 1.987893 1 1.409927
female_head 5.521773 1 2.349845
work_female_interac 5.253559 1 2.292064
has_fridge 1.683295 1 1.297419
has_electricity 1.319160 1 1.148547
roof 1.544416 1 1.242745
wall 1.667012 1 1.291128
floor 1.311287 1 1.145115
toilet 1.504700 1 1.226662
transportation 1.400562 1 1.183453
food_outside 1.608977 1 1.268455
region 2.662254 16 1.031072
building 1.195262 4 1.022546
Breusch-Godfrey test for serial correlation of order up to 1
data: subsample_35_64
LM test = 416.84, df = 1, p-value < 2.2e-16
studentized Breusch-Pagan test
data: subsample_35_64
BP = 207.86, df = 40, p-value < 2.2e-16
resettest(subsample_35_64,
power = 2:3,
type = "fitted",
vcov = function(x) vcovCL(x, cluster = ~apis_subsample_35_64$region))
RESET test
data: subsample_35_64
RESET = 0.49144, df1 = 2, df2 = 6484, p-value = 0.6118
hist(residuals(subsample_35_64), main="Histogram of Residuals", breaks=50)
#----------------------------------------------------------------------------------#
# elderly/retired (65+)
apis_subsample_65_or_above <- apis_final %>%
filter(age_head >= 65)
subsample_65_or_above <- lm(protein_share ~
water_source +
water_income_interac +
water_family_interac +
log_income_c +
family_size_c +
# control vec. 1
educ_head +
age_head +
I(age_head^2) +
has_kids +
# control vec. 2
head_works +
female_head +
work_female_interac +
# control vec. 3
has_fridge +
has_electricity +
roof +
wall +
floor +
toilet +
# control vec. 4
transportation +
food_outside +
# fixed effects
region +
building,
data = apis_subsample_65_or_above
)
summary(subsample_65_or_above)
Call:
lm(formula = protein_share ~ water_source + water_income_interac +
water_family_interac + log_income_c + family_size_c + educ_head +
age_head + I(age_head^2) + has_kids + head_works + female_head +
work_female_interac + has_fridge + has_electricity + roof +
wall + floor + toilet + transportation + food_outside + region +
building, data = apis_subsample_65_or_above)
Residuals:
Min 1Q Median 3Q Max
-0.21940 -0.06541 -0.00416 0.05660 0.47955
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.919e-01 2.663e-01 2.222 0.026423 *
water_source 9.785e-03 6.480e-03 1.510 0.131293
water_income_interac -1.040e-02 7.574e-03 -1.373 0.169989
water_family_interac 4.569e-03 2.643e-03 1.729 0.084124 .
log_income_c 1.549e-02 6.280e-03 2.467 0.013754 *
family_size_c -4.033e-03 2.166e-03 -1.862 0.062860 .
educ_head 6.435e-04 6.644e-04 0.969 0.332889
age_head -8.889e-03 7.026e-03 -1.265 0.206054
I(age_head^2) 5.863e-05 4.631e-05 1.266 0.205705
has_kids 3.958e-03 7.025e-03 0.563 0.573247
head_works 1.214e-02 6.774e-03 1.792 0.073429 .
female_head -5.012e-03 6.771e-03 -0.740 0.459280
work_female_interac -5.166e-03 1.044e-02 -0.495 0.620660
has_fridge 1.229e-02 6.664e-03 1.844 0.065454 .
has_electricity -4.003e-03 1.028e-02 -0.389 0.697062
roof 1.737e-02 9.487e-03 1.831 0.067295 .
wall -4.708e-03 8.303e-03 -0.567 0.570815
floor -1.404e-06 5.710e-05 -0.025 0.980385
toilet 9.125e-03 8.604e-03 1.061 0.289064
transportation 2.874e-07 3.422e-07 0.840 0.401069
food_outside -3.994e-02 6.362e-03 -6.278 4.61e-10 ***
region2 -8.223e-03 1.555e-02 -0.529 0.596914
region3 1.084e-02 1.438e-02 0.754 0.451063
region4 -5.359e-03 1.645e-02 -0.326 0.744710
region5 -2.005e-02 1.473e-02 -1.361 0.173833
region6 -2.542e-02 1.392e-02 -1.827 0.067966 .
region7 -1.941e-02 1.516e-02 -1.280 0.200593
region8 -3.641e-02 1.448e-02 -2.515 0.012022 *
region9 -2.286e-02 1.679e-02 -1.361 0.173588
region10 -4.285e-02 1.466e-02 -2.924 0.003518 **
region11 1.166e-02 1.567e-02 0.744 0.456772
region12 -6.472e-02 1.726e-02 -3.750 0.000184 ***
region13 -4.828e-02 1.416e-02 -3.410 0.000670 ***
region14 -5.468e-02 1.444e-02 -3.787 0.000160 ***
region15 -6.454e-02 1.908e-02 -3.382 0.000742 ***
region16 -3.486e-02 1.538e-02 -2.267 0.023543 *
region17 -2.343e-02 1.540e-02 -1.521 0.128447
building2 1.183e-04 2.201e-02 0.005 0.995712
building3 2.035e-02 1.689e-02 1.205 0.228608
building4 -7.277e-02 6.519e-02 -1.116 0.264468
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.09058 on 1341 degrees of freedom
Multiple R-squared: 0.1091, Adjusted R-squared: 0.08323
F-statistic: 4.213 on 39 and 1341 DF, p-value: 4.163e-16
print(coeftest(subsample_65_or_above,
vcov = vcovCL(subsample_65_or_above, cluster = ~region)))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.9187e-01 2.7055e-01 2.1876 0.0288701 *
water_source 9.7850e-03 7.7467e-03 1.2631 0.2067646
water_income_interac -1.0399e-02 8.9397e-03 -1.1632 0.2449607
water_family_interac 4.5685e-03 2.2825e-03 2.0016 0.0455294 *
log_income_c 1.5493e-02 7.2973e-03 2.1230 0.0339325 *
family_size_c -4.0330e-03 2.2572e-03 -1.7867 0.0742081 .
educ_head 6.4355e-04 7.5856e-04 0.8484 0.3963756
age_head -8.8886e-03 7.2268e-03 -1.2299 0.2189324
I(age_head^2) 5.8630e-05 4.8630e-05 1.2056 0.2281661
has_kids 3.9580e-03 7.7031e-03 0.5138 0.6074664
head_works 1.2136e-02 7.1676e-03 1.6931 0.0906653 .
female_head -5.0122e-03 6.5274e-03 -0.7679 0.4426924
work_female_interac -5.1658e-03 1.0782e-02 -0.4791 0.6319287
has_fridge 1.2285e-02 5.3567e-03 2.2934 0.0219784 *
has_electricity -4.0034e-03 1.2866e-02 -0.3112 0.7557182
roof 1.7373e-02 8.7047e-03 1.9958 0.0461552 *
wall -4.7077e-03 7.3088e-03 -0.6441 0.5196108
floor -1.4042e-06 5.1158e-05 -0.0274 0.9781070
toilet 9.1252e-03 1.3853e-02 0.6587 0.5101970
transportation 2.8743e-07 3.6614e-07 0.7850 0.4325857
food_outside -3.9944e-02 5.4976e-03 -7.2658 6.272e-13 ***
region2 -8.2235e-03 1.8245e-03 -4.5071 7.143e-06 ***
region3 1.0841e-02 2.8535e-03 3.7993 0.0001516 ***
region4 -5.3592e-03 3.8540e-03 -1.3905 0.1645948
region5 -2.0049e-02 2.7075e-03 -7.4048 2.312e-13 ***
region6 -2.5422e-02 2.8282e-03 -8.9889 < 2.2e-16 ***
region7 -1.9410e-02 3.4054e-03 -5.6998 1.473e-08 ***
region8 -3.6414e-02 2.5644e-03 -14.2002 < 2.2e-16 ***
region9 -2.2857e-02 3.5647e-03 -6.4121 1.985e-10 ***
region10 -4.2850e-02 2.5577e-03 -16.7533 < 2.2e-16 ***
region11 1.1664e-02 4.3090e-03 2.7069 0.0068773 **
region12 -6.4724e-02 2.9585e-03 -21.8774 < 2.2e-16 ***
region13 -4.8277e-02 6.0897e-03 -7.9277 4.663e-15 ***
region14 -5.4678e-02 1.9716e-03 -27.7334 < 2.2e-16 ***
region15 -6.4537e-02 9.6921e-03 -6.6587 4.018e-11 ***
region16 -3.4859e-02 2.7297e-03 -12.7701 < 2.2e-16 ***
region17 -2.3433e-02 2.9438e-03 -7.9602 3.632e-15 ***
building2 1.1832e-04 1.9486e-02 0.0061 0.9951562
building3 2.0347e-02 1.9926e-02 1.0212 0.3073666
building4 -7.2771e-02 5.9210e-02 -1.2290 0.2192777
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
vif(subsample_65_or_above)
GVIF Df GVIF^(1/(2*Df))
water_source 1.756690 1 1.325402
water_income_interac 3.386435 1 1.840227
water_family_interac 2.784816 1 1.668777
log_income_c 5.413928 1 2.326785
family_size_c 3.589774 1 1.894670
educ_head 1.678960 1 1.295747
age_head 330.786716 1 18.187543
I(age_head^2) 329.522195 1 18.152746
has_kids 1.398923 1 1.182761
head_works 1.913571 1 1.383319
female_head 1.850646 1 1.360385
work_female_interac 2.222844 1 1.490920
has_fridge 1.776258 1 1.332763
has_electricity 1.443764 1 1.201567
roof 1.676710 1 1.294878
wall 1.778699 1 1.333679
floor 1.464843 1 1.210307
toilet 1.637968 1 1.279831
transportation 1.597129 1 1.263776
food_outside 1.641623 1 1.281258
region 2.903506 16 1.033871
building 1.214310 3 1.032892
bgtest(subsample_65_or_above)
Breusch-Godfrey test for serial correlation of order up to 1
data: subsample_65_or_above
LM test = 44.557, df = 1, p-value = 2.471e-11
bptest(subsample_65_or_above)
studentized Breusch-Pagan test
data: subsample_65_or_above
BP = 56.018, df = 39, p-value = 0.03796
resettest(subsample_65_or_above,
power = 2:3,
type = "fitted",
vcov = function(x) vcovCL(x, cluster = ~apis_subsample_65_or_above$region))
RESET test
data: subsample_65_or_above
RESET = 1.6071, df1 = 2, df2 = 1341, p-value = 0.2009
hist(residuals(subsample_65_or_above), main="Histogram of Residuals", breaks=50)
#----------------------------------------------------------------------------------#
# presenting the comparison
# early-stage (<35)
subsample_below_35 <- lm(protein_share ~ water_source + water_income_interac + water_family_interac +
female_head + work_female_interac + log_income_c + educ_head + age_head +
I(age_head^2) + family_size_c + has_kids + head_works + has_fridge +
has_electricity + roof + wall + floor + toilet + transportation +
food_outside + region + building, data = apis_subsample_below_35)
# mid-stage (35-64)
subsample_35_64 <- lm(protein_share ~ water_source + water_income_interac + water_family_interac +
female_head + work_female_interac + log_income_c + educ_head + age_head +
I(age_head^2) + family_size_c + has_kids + head_works + has_fridge +
has_electricity + roof + wall + floor + toilet + transportation +
food_outside + region + building, data = apis_subsample_35_64)
# senior-stage (65+)
subsample_above_65 <- lm(protein_share ~ water_source + water_income_interac + water_family_interac +
female_head + work_female_interac + log_income_c + educ_head + age_head +
I(age_head^2) + family_size_c + has_kids + head_works + has_fridge +
has_electricity + roof + wall + floor + toilet + transportation +
food_outside + region + building, data = apis_subsample_65_or_above)
# extract standard errors (robust/clustered)
se_below_35 <- coeftest(subsample_below_35, vcov = vcovCL(subsample_below_35, cluster = ~region))[, 2]
se_35_64 <- coeftest(subsample_35_64, vcov = vcovCL(subsample_35_64, cluster = ~region))[, 2]
se_above_65 <- coeftest(subsample_65_or_above, vcov = vcovCL(subsample_65_or_above, cluster = ~region))[, 2]
# generate the final table
stargazer(subsample_below_35, subsample_35_64, subsample_65_or_above,
type = "text",
se = list(se_below_35, se_35_64, se_above_65),
intercept.bottom = FALSE,
digits = 4,
no.space = TRUE,
order = c("Constant", "water_source", "water_income_interac", "water_family_interac",
"female_head", "work_female_interac"),
omit = c("region", "building"),
column.labels = c("Early (<35)", "Mid (35-64)", "Senior (65+)"),
add.lines = list(
c("Region Fixed Effects", "Yes", "Yes", "Yes"),
c("Building Fixed Effects", "Yes", "Yes", "Yes")
),
dep.var.caption = "Dependent variable: Protein share",
dep.var.labels.include = FALSE,
model.numbers = TRUE,
star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
star.char = c("+", "*", "**", "***"),
notes = c("Clustered standard errors (at Region level) reported in parentheses.",
"Significance: + p<0.1; * p<0.05; ** p<0.01; *** p<0.001"),
notes.append = FALSE)
======================================================================================================
Dependent variable: Protein share
-------------------------------------------------------------------------------
Early (<35) Mid (35-64) Senior (65+)
(1) (2) (3)
------------------------------------------------------------------------------------------------------
Constant 0.2536*** 0.2479*** 0.5919*
(0.0628) (0.0494) (0.2706)
water_source 0.0134* 0.0079* 0.0098
(0.0066) (0.0039) (0.0077)
water_income_interac -0.0168 -0.0140* -0.0104
(0.0134) (0.0070) (0.0089)
water_family_interac 0.0134** 0.0077*** 0.0046*
(0.0051) (0.0015) (0.0023)
female_head 0.0072 0.0023 -0.0050
(0.0163) (0.0061) (0.0065)
work_female_interac -0.0240 -0.0169* -0.0052
(0.0188) (0.0075) (0.0108)
log_income_c 0.0340*** 0.0296*** 0.0155*
(0.0075) (0.0046) (0.0073)
educ_head -0.0006 0.0003 0.0006
(0.0009) (0.0004) (0.0008)
age_head 0.0030 0.0011 -0.0089
(0.0047) (0.0022) (0.0072)
I(age_head2) -0.00003 -0.00001 0.0001
(0.0001) (0.00002) (0.00005)
family_size_c -0.0096*** -0.0085*** -0.0040+
(0.0022) (0.0010) (0.0023)
has_kids 0.0064 0.0023 0.0040
(0.0051) (0.0025) (0.0077)
head_works 0.0097 0.0048 0.0121+
(0.0122) (0.0057) (0.0072)
has_fridge 0.0021 0.0147*** 0.0123*
(0.0042) (0.0030) (0.0054)
has_electricity -0.0107 -0.0084 -0.0040
(0.0122) (0.0052) (0.0129)
roof 0.0029 0.0014 0.0174*
(0.0076) (0.0053) (0.0087)
wall -0.0068 0.0056 -0.0047
(0.0057) (0.0050) (0.0073)
floor 0.0002* 0.0001+ -0.000001
(0.0001) (0.00005) (0.0001)
toilet 0.0043 0.0088 0.0091
(0.0082) (0.0058) (0.0139)
transportation -0.000000 -0.000000*** 0.000000
(0.000000) (0.000000) (0.000000)
food_outside -0.0637*** -0.0482*** -0.0399***
(0.0072) (0.0048) (0.0055)
------------------------------------------------------------------------------------------------------
Region Fixed Effects Yes Yes Yes
Building Fixed Effects Yes Yes Yes
Observations 2,088 6,525 1,381
R2 0.1805 0.1454 0.1091
Adjusted R2 0.1645 0.1401 0.0832
Residual Std. Error 0.0907 (df = 2047) 0.0896 (df = 6484) 0.0906 (df = 1341)
F Statistic 11.2737*** (df = 40; 2047) 27.5746*** (df = 40; 6484) 4.2126*** (df = 39; 1341)
======================================================================================================
Note: Clustered standard errors (at Region level) reported in parentheses.
Significance: + p<0.1; * p<0.05; ** p<0.01; *** p<0.001
#----------------------------------------------------------------------------------#
# robustness test for early-stage (below 35)
y1 <- lm(protein_share ~ water_source + region + building,
data = apis_subsample_below_35)
y2 <- lm(protein_share ~ water_source + water_income_interac + water_family_interac +
log_income_c + family_size_c + educ_head + age_head + I(age_head^2) + has_kids +
region + building, data = apis_subsample_below_35)
y3 <- lm(protein_share ~ water_source + water_income_interac + water_family_interac +
log_income_c + family_size_c + educ_head + age_head + I(age_head^2) + has_kids +
head_works + female_head + work_female_interac + has_fridge + has_electricity +
region + building, data = apis_subsample_below_35)
y4 <- subsample_below_35 # full model
se_y1 <- coeftest(y1, vcov = vcovCL(y1, cluster = ~region))[, 2]
se_y2 <- coeftest(y2, vcov = vcovCL(y2, cluster = ~region))[, 2]
se_y3 <- coeftest(y3, vcov = vcovCL(y3, cluster = ~region))[, 2]
se_y4 <- se_below_35
stargazer(y1, y2, y3, y4,
type = "text",
se = list(se_y1, se_y2, se_y3, se_y4),
intercept.bottom = FALSE,
digits = 4,
no.space = TRUE,
order = c("Constant", "water_source", "water_income_interac", "water_family_interac",
"log_income_c", "family_size", "educ_head", "age_head", "I(age_head^2)",
"has_kids", "head_works", "female_head", "work_female_interac",
"has_fridge", "has_electricity", "transportation", "food_outside",
"roof", "wall", "floor", "toilet"),
omit = c("region", "building"),
column.labels = c("Bivariate", "Socioecon", "Time Poverty", "Full"),
star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
star.char = c("+", "*", "**", "***"),
dep.var.caption = "Dependent variable: Protein share (head below 35)",
dep.var.labels.include = FALSE,
notes = "+ p<0.1; * p<0.05; ** p<0.01; *** p<0.001",
notes.append = FALSE)
=============================================================================================================================
Dependent variable: Protein share (head below 35)
--------------------------------------------------------------------------------------------------------
Bivariate Socioecon Time Poverty Full
(1) (2) (3) (4)
-----------------------------------------------------------------------------------------------------------------------------
Constant 0.2817*** 0.1457+ 0.1555* 0.2536***
(0.0013) (0.0772) (0.0753) (0.0628)
water_source 0.0074 0.0104 0.0113 0.0134*
(0.0067) (0.0083) (0.0080) (0.0066)
water_income_interac -0.0114 -0.0139 -0.0168
(0.0123) (0.0122) (0.0134)
water_family_interac 0.0146** 0.0141** 0.0134**
(0.0050) (0.0049) (0.0051)
log_income_c 0.0185** 0.0201** 0.0340***
(0.0065) (0.0067) (0.0075)
family_size_c -0.0085*** -0.0089*** -0.0096***
(0.0025) (0.0026) (0.0022)
educ_head -0.0011 -0.0008 -0.0006
(0.0009) (0.0010) (0.0009)
age_head 0.0093+ 0.0085 0.0030
(0.0056) (0.0057) (0.0047)
I(age_head2) -0.0001 -0.0001 -0.00003
(0.0001) (0.0001) (0.0001)
has_kids 0.0023 0.0042 0.0064
(0.0054) (0.0055) (0.0051)
head_works 0.0112 0.0097
(0.0121) (0.0122)
female_head 0.0056 0.0072
(0.0160) (0.0163)
work_female_interac -0.0266 -0.0240
(0.0178) (0.0188)
has_fridge 0.0055 0.0021
(0.0042) (0.0042)
has_electricity -0.0133 -0.0107
(0.0149) (0.0122)
transportation -0.000000
(0.000000)
food_outside -0.0637***
(0.0072)
roof 0.0029
(0.0076)
wall -0.0068
(0.0057)
floor 0.0002*
(0.0001)
toilet 0.0043
(0.0082)
-----------------------------------------------------------------------------------------------------------------------------
Observations 2,088 2,088 2,088 2,088
R2 0.0908 0.1115 0.1167 0.1805
Adjusted R2 0.0816 0.0990 0.1020 0.1645
Residual Std. Error 0.0951 (df = 2066) 0.0942 (df = 2058) 0.0941 (df = 2053) 0.0907 (df = 2047)
F Statistic 9.8303*** (df = 21; 2066) 8.9084*** (df = 29; 2058) 7.9748*** (df = 34; 2053) 11.2737*** (df = 40; 2047)
=============================================================================================================================
Note: + p<0.1; * p<0.05; ** p<0.01; *** p<0.001
# robustness test for mid-stage (35-64)
m1 <- lm(protein_share ~ water_source + region + building,
data = apis_subsample_35_64)
m2 <- lm(protein_share ~ water_source + water_income_interac + water_family_interac +
log_income_c + family_size_c + educ_head + age_head + I(age_head^2) + has_kids +
region + building, data = apis_subsample_35_64)
m3 <- lm(protein_share ~ water_source + water_income_interac + water_family_interac +
log_income_c + family_size_c + educ_head + age_head + I(age_head^2) + has_kids +
head_works + female_head + work_female_interac + has_fridge + has_electricity +
region + building, data = apis_subsample_35_64)
m4 <- subsample_35_64 # full model
se_m1 <- coeftest(m1, vcov = vcovCL(m1, cluster = ~region))[, 2]
se_m2 <- coeftest(m2, vcov = vcovCL(m2, cluster = ~region))[, 2]
se_m3 <- coeftest(m3, vcov = vcovCL(m3, cluster = ~region))[, 2]
se_m4 <- se_35_64
stargazer(m1, m2, m3, m4,
type = "text",
se = list(se_m1, se_m2, se_m3, se_m4),
intercept.bottom = FALSE,
digits = 4,
no.space = TRUE,
order = c("Constant", "water_source", "water_income_interac", "water_family_interac",
"log_income_c", "family_size", "educ_head", "age_head", "I(age_head^2)",
"has_kids", "head_works", "female_head", "work_female_interac",
"has_fridge", "has_electricity", "transportation", "food_outside",
"roof", "wall", "floor", "toilet"),
omit = c("region", "building"),
column.labels = c("Bivariate", "Socioecon", "Time Poverty", "Full"),
star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
star.char = c("+", "*", "**", "***"),
dep.var.caption = "Dependent variable: Protein share (head 35-64)",
dep.var.labels.include = FALSE,
notes = "+ p<0.1; * p<0.05; ** p<0.01; *** p<0.001",
notes.append = FALSE)
================================================================================================================================
Dependent variable: Protein share (head 35-64)
-----------------------------------------------------------------------------------------------------------
Bivariate Socioecon Time Poverty Full
(1) (2) (3) (4)
--------------------------------------------------------------------------------------------------------------------------------
Constant 0.2667*** 0.2476*** 0.2413*** 0.2479***
(0.0013) (0.0515) (0.0518) (0.0494)
water_source 0.0224*** 0.0092* 0.0077+ 0.0079*
(0.0046) (0.0045) (0.0043) (0.0039)
water_income_interac -0.0119+ -0.0132+ -0.0140*
(0.0062) (0.0069) (0.0070)
water_family_interac 0.0074*** 0.0073*** 0.0077***
(0.0016) (0.0016) (0.0015)
log_income_c 0.0223*** 0.0187*** 0.0296***
(0.0041) (0.0052) (0.0046)
family_size_c -0.0096*** -0.0094*** -0.0085***
(0.0010) (0.0010) (0.0010)
educ_head 0.0004 0.0003 0.0003
(0.0004) (0.0004) (0.0004)
age_head 0.0004 0.0005 0.0011
(0.0022) (0.0021) (0.0022)
I(age_head2) -0.000001 -0.000002 -0.00001
(0.00002) (0.00002) (0.00002)
has_kids 0.0021 0.0018 0.0023
(0.0026) (0.0026) (0.0025)
head_works 0.0051 0.0048
(0.0057) (0.0057)
female_head 0.0035 0.0023
(0.0061) (0.0061)
work_female_interac -0.0194* -0.0169*
(0.0077) (0.0075)
has_fridge 0.0153*** 0.0147***
(0.0030) (0.0030)
has_electricity -0.0050 -0.0084
(0.0042) (0.0052)
transportation -0.000000***
(0.000000)
food_outside -0.0482***
(0.0048)
roof 0.0014
(0.0053)
wall 0.0056
(0.0050)
floor 0.0001+
(0.00005)
toilet 0.0088
(0.0058)
--------------------------------------------------------------------------------------------------------------------------------
Observations 6,525 6,525 6,525 6,525
R2 0.0650 0.0963 0.1026 0.1454
Adjusted R2 0.0619 0.0923 0.0979 0.1401
Residual Std. Error 0.0936 (df = 6503) 0.0920 (df = 6495) 0.0918 (df = 6490) 0.0896 (df = 6484)
F Statistic 21.5154*** (df = 21; 6503) 23.8700*** (df = 29; 6495) 21.8311*** (df = 34; 6490) 27.5746*** (df = 40; 6484)
================================================================================================================================
Note: + p<0.1; * p<0.05; ** p<0.01; *** p<0.001
# robustness test for senior-stage (65+)
s1 <- lm(protein_share ~ water_source + region + building,
data = apis_subsample_65_or_above)
s2 <- lm(protein_share ~ water_source + water_income_interac + water_family_interac +
log_income_c + family_size_c + educ_head + age_head + I(age_head^2) + has_kids +
region + building, data = apis_subsample_65_or_above)
s3 <- lm(protein_share ~ water_source + water_income_interac + water_family_interac +
log_income_c + family_size_c + educ_head + age_head + I(age_head^2) + has_kids +
head_works + female_head + work_female_interac + has_fridge + has_electricity +
region + building, data = apis_subsample_65_or_above)
s4 <- subsample_above_65 # full model
se_s1 <- coeftest(s1, vcov = vcovCL(s1, cluster = ~region))[, 2]
se_s2 <- coeftest(s2, vcov = vcovCL(s2, cluster = ~region))[, 2]
se_s3 <- coeftest(s3, vcov = vcovCL(s3, cluster = ~region))[, 2]
se_s4 <- se_above_65
stargazer(s1, s2, s3, s4,
type = "text",
se = list(se_s1, se_s2, se_s3, se_s4),
intercept.bottom = FALSE,
digits = 4,
no.space = TRUE,
order = c("Constant", "water_source", "water_income_interac", "water_family_interac",
"log_income_c", "family_size", "educ_head", "age_head", "I(age_head^2)",
"has_kids", "head_works", "female_head", "work_female_interac",
"has_fridge", "has_electricity", "transportation", "food_outside",
"roof", "wall", "floor", "toilet"),
omit = c("region", "building"),
column.labels = c("Bivariate", "Socioecon", "Time Poverty", "Full"),
star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
star.char = c("+", "*", "**", "***"),
dep.var.caption = "Dependent variable: Protein share (head 65+)",
dep.var.labels.include = FALSE,
notes = "+ p<0.1; * p<0.05; ** p<0.01; *** p<0.001",
notes.append = FALSE)
============================================================================================================================
Dependent variable: Protein share (head 65+)
-------------------------------------------------------------------------------------------------------
Bivariate Socioecon Time Poverty Full
(1) (2) (3) (4)
----------------------------------------------------------------------------------------------------------------------------
Constant 0.2694*** 0.6413* 0.5733* 0.5919*
(0.0016) (0.2823) (0.2719) (0.2706)
water_source 0.0130+ 0.0060 0.0065 0.0098
(0.0067) (0.0077) (0.0079) (0.0077)
water_income_interac -0.0101 -0.0103 -0.0104
(0.0081) (0.0076) (0.0089)
water_family_interac 0.0034 0.0035 0.0046*
(0.0022) (0.0022) (0.0023)
log_income_c 0.0155** 0.0112+ 0.0155*
(0.0058) (0.0063) (0.0073)
family_size_c -0.0045* -0.0046* -0.0040+
(0.0021) (0.0022) (0.0023)
educ_head 0.0006 0.0006 0.0006
(0.0008) (0.0008) (0.0008)
age_head -0.0097 -0.0086 -0.0089
(0.0075) (0.0072) (0.0072)
I(age_head2) 0.0001 0.0001 0.0001
(0.0001) (0.00005) (0.00005)
has_kids 0.00005 0.0021 0.0040
(0.0071) (0.0070) (0.0077)
head_works 0.0118 0.0121+
(0.0074) (0.0072)
female_head -0.0079 -0.0050
(0.0065) (0.0065)
work_female_interac -0.0020 -0.0052
(0.0110) (0.0108)
has_fridge 0.0130* 0.0123*
(0.0056) (0.0054)
has_electricity 0.0043 -0.0040
(0.0120) (0.0129)
transportation 0.000000
(0.000000)
food_outside -0.0399***
(0.0055)
roof 0.0174*
(0.0087)
wall -0.0047
(0.0073)
floor -0.000001
(0.0001)
toilet 0.0091
(0.0139)
----------------------------------------------------------------------------------------------------------------------------
Observations 1,381 1,381 1,381 1,381
R2 0.0610 0.0726 0.0800 0.1091
Adjusted R2 0.0472 0.0534 0.0574 0.0832
Residual Std. Error 0.0923 (df = 1360) 0.0920 (df = 1352) 0.0918 (df = 1347) 0.0906 (df = 1341)
F Statistic 4.4200*** (df = 20; 1360) 3.7793*** (df = 28; 1352) 3.5488*** (df = 33; 1347) 4.2126*** (df = 39; 1341)
============================================================================================================================
Note: + p<0.1; * p<0.05; ** p<0.01; *** p<0.001
# primary analysis
vif(bennett_ols_protein)
GVIF Df GVIF^(1/(2*Df))
water_source 1.626700 1 1.275422
water_income_interac 3.221646 1 1.794895
water_family_interac 2.006816 1 1.416621
log_income_c 4.511720 1 2.124081
family_size_c 2.588251 1 1.608804
educ_head 1.694844 1 1.301862
age_head 38.545977 1 6.208541
I(age_head^2) 40.176003 1 6.338454
has_kids 1.345954 1 1.160153
head_works 2.204835 1 1.484869
female_head 3.972155 1 1.993027
work_female_interac 3.726701 1 1.930467
has_fridge 1.779105 1 1.333831
has_electricity 1.347734 1 1.160919
roof 1.621014 1 1.273191
wall 1.718226 1 1.310811
floor 1.383571 1 1.176253
toilet 1.547047 1 1.243804
aircon 1.648638 1 1.283993
telephone 1.345900 1 1.160129
computer 1.770477 1 1.330593
transportation 1.427557 1 1.194804
food_outside 1.630232 1 1.276805
region 2.743580 16 1.032042
building 1.209854 4 1.024098
bgtest(bennett_ols_protein)
Breusch-Godfrey test for serial correlation of order up to 1
data: bennett_ols_protein
LM test = 707.67, df = 1, p-value < 2.2e-16
bptest(bennett_ols_protein)
studentized Breusch-Pagan test
data: bennett_ols_protein
BP = 292.63, df = 43, p-value < 2.2e-16
resettest(bennett_ols_protein,
power = 2:3,
type = "fitted",
vcov = function(x) vcovCL(x, cluster = ~apis_final$region))
RESET test
data: bennett_ols_protein
RESET = 0.45302, df1 = 2, df2 = 9950, p-value = 0.6357
hist(residuals(bennett_ols_protein), main="Histogram of Residuals", breaks=50)
# falsification test
vif(bennett_ols_bread)
GVIF Df GVIF^(1/(2*Df))
water_source 1.623243 1 1.274065
water_income_interac 2.969923 1 1.723347
water_family_interac 1.991939 1 1.411361
log_income_c 4.429597 1 2.104661
family_size_c 2.584313 1 1.607580
educ_head 1.669494 1 1.292089
age_head 38.521056 1 6.206533
I(age_head^2) 40.128468 1 6.334703
has_kids 1.344721 1 1.159621
head_works 2.197742 1 1.482478
female_head 3.969691 1 1.992408
work_female_interac 3.725754 1 1.930221
has_fridge 1.690478 1 1.300184
has_electricity 1.345908 1 1.160133
roof 1.620328 1 1.272921
wall 1.717634 1 1.310585
floor 1.353128 1 1.163240
toilet 1.546075 1 1.243413
transportation 1.418134 1 1.190854
food_outside 1.624596 1 1.274596
region 2.625678 16 1.030626
building 1.206677 4 1.023762
bgtest(bennett_ols_bread)
Breusch-Godfrey test for serial correlation of order up to 1
data: bennett_ols_bread
LM test = 571.12, df = 1, p-value < 2.2e-16
bptest(bennett_ols_bread)
studentized Breusch-Pagan test
data: bennett_ols_bread
BP = 651.39, df = 40, p-value < 2.2e-16
resettest(bennett_ols_bread,
power = 2:3,
type = "fitted",
vcov = function(x) vcovCL(x, cluster = ~apis_final$region))
RESET test
data: bennett_ols_bread
RESET = 34.925, df1 = 2, df2 = 9953, p-value = 7.678e-16
hist(residuals(bennett_ols_bread), main="Histogram of Residuals", breaks=50)
# female subsample
vif(subsample_female)
GVIF Df GVIF^(1/(2*Df))
water_source 1.691807 1 1.300695
water_income_interac 3.554955 1 1.885459
water_family_interac 2.942803 1 1.715460
log_income_c 5.582345 1 2.362699
family_size_c 3.380519 1 1.838619
educ_head 1.834611 1 1.354478
age_head 1.495928 1 1.223082
has_kids 1.340753 1 1.157909
head_works 1.131093 1 1.063528
has_fridge 1.673408 1 1.293603
has_electricity 1.345387 1 1.159908
roof 1.612160 1 1.269709
wall 1.658201 1 1.287712
floor 1.378329 1 1.174022
toilet 1.410944 1 1.187832
transportation 1.600071 1 1.264939
food_outside 1.609005 1 1.268466
region 2.499325 16 1.029039
building 1.223044 3 1.034127
Breusch-Godfrey test for serial correlation of order up to 1
data: subsample_female
LM test = 60.681, df = 1, p-value = 6.711e-15
studentized Breusch-Pagan test
data: subsample_female
BP = 71.792, df = 36, p-value = 0.0003603
resettest(subsample_female,
power = 2:3,
type = "fitted",
vcov = function(x) vcovCL(x, cluster = ~apis_subsample_female$region))
RESET test
data: subsample_female
RESET = 2.8156, df1 = 2, df2 = 1964, p-value = 0.06011
hist(residuals(subsample_female), main="Histogram of Residuals", breaks=50)
# male subsample
vif(subsample_male)
GVIF Df GVIF^(1/(2*Df))
water_source 1.616794 1 1.271532
water_income_interac 2.852839 1 1.689035
water_family_interac 1.866466 1 1.366187
log_income_c 4.214313 1 2.052879
family_size_c 2.412280 1 1.553152
educ_head 1.631705 1 1.277382
age_head 40.039873 1 6.327707
I(age_head^2) 41.610239 1 6.450600
has_kids 1.347899 1 1.160991
head_works 1.337867 1 1.156662
has_fridge 1.674965 1 1.294204
has_electricity 1.348411 1 1.161211
roof 1.615900 1 1.271181
wall 1.724114 1 1.313055
floor 1.345329 1 1.159883
toilet 1.567900 1 1.252158
transportation 1.400192 1 1.183297
food_outside 1.635357 1 1.278811
region 2.738622 16 1.031984
building 1.211424 4 1.024264
Breusch-Godfrey test for serial correlation of order up to 1
data: subsample_male
LM test = 573, df = 1, p-value < 2.2e-16
studentized Breusch-Pagan test
data: subsample_male
BP = 248.9, df = 38, p-value < 2.2e-16
resettest(subsample_male,
power = 2:3,
type = "fitted",
vcov = function(x) vcovCL(x, cluster = ~apis_subsample_male$region))
RESET test
data: subsample_male
RESET = 0.71522, df1 = 2, df2 = 7954, p-value = 0.4891
hist(residuals(subsample_male), main="Histogram of Residuals", breaks=50)
# below 35 subsample
vif(subsample_below_35)
GVIF Df GVIF^(1/(2*Df))
water_source 1.863179 1 1.364983
water_income_interac 2.336852 1 1.528677
water_family_interac 2.149937 1 1.466266
female_head 6.193916 1 2.488758
work_female_interac 5.384839 1 2.320526
log_income_c 3.840384 1 1.959690
educ_head 1.675782 1 1.294520
age_head 161.895849 1 12.723830
I(age_head^2) 161.903175 1 12.724118
family_size_c 2.470122 1 1.571662
has_kids 1.600608 1 1.265151
head_works 2.510244 1 1.584375
has_fridge 1.561013 1 1.249405
has_electricity 1.394828 1 1.181028
roof 1.817525 1 1.348156
wall 1.835716 1 1.354886
floor 1.321589 1 1.149604
toilet 1.664863 1 1.290296
transportation 1.521619 1 1.233539
food_outside 1.766256 1 1.329006
region 3.741587 16 1.042097
building 1.366710 4 1.039823
bgtest(subsample_below_35)
Breusch-Godfrey test for serial correlation of order up to 1
data: subsample_below_35
LM test = 101.46, df = 1, p-value < 2.2e-16
bptest(subsample_below_35)
studentized Breusch-Pagan test
data: subsample_below_35
BP = 86.833, df = 40, p-value = 2.586e-05
resettest(subsample_below_35,
power = 2:3,
type = "fitted",
vcov = function(x) vcovCL(x, cluster = ~apis_subsample_below_35$region))
RESET test
data: subsample_below_35
RESET = 1.1849, df1 = 2, df2 = 2047, p-value = 0.306
hist(residuals(subsample_below_35), main="Histogram of Residuals", breaks=50)
# 35-64 subsample
vif(subsample_35_64)
GVIF Df GVIF^(1/(2*Df))
water_source 1.650600 1 1.284757
water_income_interac 3.123177 1 1.767251
water_family_interac 1.933504 1 1.390505
female_head 5.521773 1 2.349845
work_female_interac 5.253559 1 2.292064
log_income_c 4.393101 1 2.095973
educ_head 1.650063 1 1.284548
age_head 167.045181 1 12.924596
I(age_head^2) 166.778062 1 12.914258
family_size_c 2.424382 1 1.557043
has_kids 1.369089 1 1.170081
head_works 1.987893 1 1.409927
has_fridge 1.683295 1 1.297419
has_electricity 1.319160 1 1.148547
roof 1.544416 1 1.242745
wall 1.667012 1 1.291128
floor 1.311287 1 1.145115
toilet 1.504700 1 1.226662
transportation 1.400562 1 1.183453
food_outside 1.608977 1 1.268455
region 2.662254 16 1.031072
building 1.195262 4 1.022546
Breusch-Godfrey test for serial correlation of order up to 1
data: subsample_35_64
LM test = 416.84, df = 1, p-value < 2.2e-16
studentized Breusch-Pagan test
data: subsample_35_64
BP = 207.86, df = 40, p-value < 2.2e-16
resettest(subsample_35_64,
power = 2:3,
type = "fitted",
vcov = function(x) vcovCL(x, cluster = ~apis_subsample_35_64$region))
RESET test
data: subsample_35_64
RESET = 0.49144, df1 = 2, df2 = 6484, p-value = 0.6118
hist(residuals(subsample_35_64), main="Histogram of Residuals", breaks=50)
# 65+ subsample
vif(subsample_65_or_above)
GVIF Df GVIF^(1/(2*Df))
water_source 1.756690 1 1.325402
water_income_interac 3.386435 1 1.840227
water_family_interac 2.784816 1 1.668777
log_income_c 5.413928 1 2.326785
family_size_c 3.589774 1 1.894670
educ_head 1.678960 1 1.295747
age_head 330.786716 1 18.187543
I(age_head^2) 329.522195 1 18.152746
has_kids 1.398923 1 1.182761
head_works 1.913571 1 1.383319
female_head 1.850646 1 1.360385
work_female_interac 2.222844 1 1.490920
has_fridge 1.776258 1 1.332763
has_electricity 1.443764 1 1.201567
roof 1.676710 1 1.294878
wall 1.778699 1 1.333679
floor 1.464843 1 1.210307
toilet 1.637968 1 1.279831
transportation 1.597129 1 1.263776
food_outside 1.641623 1 1.281258
region 2.903506 16 1.033871
building 1.214310 3 1.032892
bgtest(subsample_65_or_above)
Breusch-Godfrey test for serial correlation of order up to 1
data: subsample_65_or_above
LM test = 44.557, df = 1, p-value = 2.471e-11
bptest(subsample_65_or_above)
studentized Breusch-Pagan test
data: subsample_65_or_above
BP = 56.018, df = 39, p-value = 0.03796
resettest(subsample_65_or_above,
power = 2:3,
type = "fitted",
vcov = function(x) vcovCL(x, cluster = ~apis_subsample_65_or_above$region))
RESET test
data: subsample_65_or_above
RESET = 1.6071, df1 = 2, df2 = 1341, p-value = 0.2009
hist(residuals(subsample_65_or_above), main="Histogram of Residuals", breaks=50)
#----------------------------------------------------------------------------------#
# compiling the VIFs of all models in one table
# helper function to extract and format VIFs
get_vif_data <- function(model, model_name) {
v_raw <- car::vif(model)
# Adjust for factors (GVIF) to make it comparable to standard VIF
v_vals <- if(ncol(as.matrix(v_raw)) > 1) v_raw[, ncol(v_raw)]^2 else v_raw
df <- data.frame(Variables = names(v_vals), val = as.numeric(v_vals))
colnames(df)[2] <- model_name
return(df)
}
# extract VIFs for all 7 models
# note: ensure these model objects are in the environment
list_vifs <- list(
get_vif_data(bennett_ols_protein, "Model (1)"),
get_vif_data(bennett_ols_bread, "Model (2)"),
get_vif_data(subsample_female, "Model (3)"),
get_vif_data(subsample_male, "Model (4)"),
get_vif_data(subsample_below_35, "Model (5)"),
get_vif_data(subsample_35_64, "Model (6)"),
get_vif_data(subsample_above_65, "Model (7)")
)
# join all into one master table
vif_table <- Reduce(function(x, y) full_join(x, y, by = "Variables"), list_vifs)
# clean variable names for the table
# this removes things like I() or _c for a cleaner look
vif_table$Variables <- gsub("I\\(age_head\\^2\\)", "age_head_sq", vif_table$Variables)
# generate styled html table and save to file
vif_table %>%
mutate(across(starts_with("Model"), ~ifelse(is.na(.), "-", sprintf("%.3f", .)))) %>%
kable(format = "html",
caption = "Appendix X: Assumption Diagnostics (Multicollinearity)",
align = "lccccccc",
table.attr = 'style="font-family: Times New Roman; width: 100%;"') %>%
kable_classic(full_width = F, html_font = "Times New Roman") %>%
# Adding the thick horizontal rules from your screenshot
row_spec(0, bold = TRUE, extra_css = "border-top: 2px solid black; border-bottom: 1.5px solid black;") %>%
row_spec(nrow(vif_table), extra_css = "border-bottom: 2px solid black;") %>%
footnote(general = "Model (1) Baseline Protein; (2) Bread Falsification; (3) Female-Headed; (4) Male-Headed; (5) <35; (6) 35-64; (7) 65+. VIF values for fixed effects represent squared Generalized VIF. Age-squared omitted in Model 3 and Age cohorts based on RESET results.",
general_title = "Note: ",
footnote_as_chunk = T) %>%
save_kable(file = "Appendix_X_VIF.html") # This creates the file in your working directory
cat("VIF Table has been exported to Appendix_X_VIF.html")
VIF Table has been exported to Appendix_X_VIF.html
#----------------------------------------------------------------------------------#
library(knitr)
library(kableExtra)
library(tidyverse)
# 1. Improved Helper Function (Returns a single row per model)
get_diag_horizontal <- function(model, model_name, cluster_var = NULL) {
bg_p <- bgtest(model)$p.value
bp_p <- bptest(model)$p.value
re_p <- resettest(model, power = 2:3, type = "fitted",
vcov = function(x) vcovCL(x, cluster = cluster_var))$p.value
data.frame(
Model = model_name,
`Breusch-Godfrey (p-val)` = bg_p,
`Breusch-Pagan (p-val)` = bp_p,
`Ramsey RESET (p-val)` = re_p,
check.names = FALSE
)
}
# 2. Compile the rows for all 7 models
horizontal_diags <- bind_rows(
get_diag_horizontal(bennett_ols_protein, "Model 1: Baseline Protein", apis_final$region),
get_diag_horizontal(bennett_ols_bread, "Model 2: Bread Falsification", apis_final$region),
get_diag_horizontal(subsample_female, "Model 3: Female-Headed", apis_subsample_female$region),
get_diag_horizontal(subsample_male, "Model 4: Male-Headed", apis_subsample_male$region),
get_diag_horizontal(subsample_below_35, "Model 5: Head <35", apis_subsample_below_35$region),
get_diag_horizontal(subsample_35_64, "Model 6: Head 35-64", apis_subsample_35_64$region),
get_diag_horizontal(subsample_above_65, "Model 7: Head 65+", apis_subsample_65_or_above$region)
)
# 3. Generate the styled table
horizontal_diags %>%
mutate(across(where(is.numeric), ~sprintf("%.4f", .))) %>%
kable(format = "html",
caption = "Appendix Y: Summary of Diagnostic Tests (p-values)",
align = "lccc",
table.attr = 'style="font-family: Times New Roman; width: 100%;"') %>%
kable_classic(full_width = F, html_font = "Times New Roman") %>%
row_spec(0, bold = TRUE, extra_css = "border-top: 2px solid black; border-bottom: 1.5px solid black;") %>%
row_spec(nrow(horizontal_diags), extra_css = "border-bottom: 2px solid black;") %>%
footnote(general = "Table reports p-values for each diagnostic test. Ramsey RESET utilizes clustered standard errors. Significant p-values (< 0.05) in Breusch-Pagan justify the use of Clustered Robust Standard Errors in the main analysis.",
general_title = "Note: ",
footnote_as_chunk = T) %>%
save_kable(file = "Appendix_Y_Horizontal.html")
cat("Horizontal Diagnostic Table exported to Appendix_Y_Horizontal.html")
Horizontal Diagnostic Table exported to Appendix_Y_Horizontal.html