ECON222 Final paper

Author

Jaber, N.T. and Noriega, V.

Published

April 13, 2026

Access to an In-Dwelling Water Source and Protein Consumption: An Analysis of Philippine Expenditure Patterns Using APIS 2016

rm(list=ls())
gc()
          used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
Ncells  613043 32.8    1386174 74.1         NA   715640 38.3
Vcells 1146983  8.8    8388608 64.0      16384  2010507 15.4
if(!require(tidyverse)) install.packages("tidyverse")
Loading required package: tidyverse
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.2.0     ✔ readr     2.1.6
✔ forcats   1.0.1     ✔ stringr   1.6.0
✔ ggplot2   4.0.1     ✔ tibble    3.3.1
✔ lubridate 1.9.5     ✔ tidyr     1.3.2
✔ purrr     1.2.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
if(!require(lmtest)) install.packages("lmtest")
Loading required package: lmtest
Loading required package: zoo

Attaching package: 'zoo'

The following objects are masked from 'package:base':

    as.Date, as.Date.numeric
if(!require(sandwich)) install.packages("sandwich")
Loading required package: sandwich
if(!require(car)) install.packages("car")
Loading required package: car
Loading required package: carData

Attaching package: 'car'

The following object is masked from 'package:dplyr':

    recode

The following object is masked from 'package:purrr':

    some
if(!require(readxl)) install.packages("readxl")
Loading required package: readxl
if(!require(stargazer)) install.packages("stargazer")
Loading required package: stargazer

Please cite as: 

 Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
 R package version 5.2.3. https://CRAN.R-project.org/package=stargazer 
if(!require(ggplot2)) install.packages("ggplot2")
if(!require(patchwork)) install.packages("patchwork")
Loading required package: patchwork
if(!require(dplyr)) install.packages("dplyr")
if(!require(kableExtra)) install.packages("kableExtra")
Loading required package: kableExtra

Attaching package: 'kableExtra'

The following object is masked from 'package:dplyr':

    group_rows
library(tidyverse)
library(lmtest)
library(sandwich)
library(car)
library(readxl)
library(stargazer)
library(ggplot2)
library(patchwork)
library(dplyr)
library(kableExtra)

#----------------------------------------------------------------------------------#

# DATA PREPARATION
apis_data <- read_excel("APIS.xlsx", sheet=1)
head(apis_data)
# A tibble: 6 × 275
   REGN            ID RFACT FSIZE PUFQ1 PUFQ2  PUFQ4 PUFQ4A PUFQ4B PUFQ4C PUFQ4D
  <dbl>         <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
1    12 1000100000000 2064.     3     2     3 222222      2      2      2      2
2    12 1000200000000 2064.    11     2     3 222222      2      2      2      2
3    12 1000300000000 2064.    12     2     3 222222      2      2      2      2
4    12 1000400000000 2064.     8     2     3 111111      1      1      1      1
5    12 1000500000000 2064.     3     2     3 111111      1      1      1      1
6    12 1000600000000 2064.     5     2     3 111111      1      1      1      1
# ℹ 264 more variables: PUFQ4E <dbl>, PUFQ4F <dbl>, PUFQ3 <dbl>, PUFQ3A <dbl>,
#   PUFQ3B <dbl>, PUFQ3C <dbl>, PUFQ3D <dbl>, PUFEQ1 <dbl>, PUFEQ2 <dbl>,
#   PUFEQ3 <dbl>, PUFEQ4A <dbl>, PUFEQ4B <dbl>, PUFEQ5 <dbl>, PUFEQ6A <dbl>,
#   PUFEQ6B <dbl>, PUFEQ6C <dbl>, PUFEQ6D <dbl>, PUFEQ6E <dbl>, PUFEQ6F <dbl>,
#   PUFEQ6G <dbl>, PUFEQ6H <dbl>, PUFEQ6I <dbl>, PUFEQ6J <dbl>, PUFEQ6K <dbl>,
#   PUFEQ6L <dbl>, PUFEQ6M <dbl>, PUFEQ6N <dbl>, PUFEQ6O <dbl>, PUFFQ1 <dbl>,
#   PUFFQ1A <dbl>, PUFFQ2 <dbl>, PUFFQ2A <dbl>, PUFFQ2B <dbl>, PUFGQ1 <dbl>, …
# checking for missing values:
# dependent variable
apis_data %>% count(is.na(FOOD))
# A tibble: 1 × 2
  `is.na(FOOD)`     n
  <lgl>         <int>
1 FALSE         10332
apis_data %>% count(is.na(MEAT))
# A tibble: 1 × 2
  `is.na(MEAT)`     n
  <lgl>         <int>
1 FALSE         10332
apis_data %>% count(is.na(FISH))
# A tibble: 1 × 2
  `is.na(FISH)`     n
  <lgl>         <int>
1 FALSE         10332
# independent variables
apis_data %>% count(is.na(PUFFQ1))
# A tibble: 1 × 2
  `is.na(PUFFQ1)`     n
  <lgl>           <int>
1 FALSE           10332
apis_data %>% count(is.na(TOINC))
# A tibble: 1 × 2
  `is.na(TOINC)`     n
  <lgl>          <int>
1 FALSE          10332
# controls: socioeconomic characteristics
apis_data %>% count(is.na(EDUCATION))
# A tibble: 1 × 2
  `is.na(EDUCATION)`     n
  <lgl>              <int>
1 FALSE              10332
apis_data %>% count(is.na(PUFH05_AGE))
# A tibble: 1 × 2
  `is.na(PUFH05_AGE)`     n
  <lgl>               <int>
1 FALSE               10332
apis_data %>% count(is.na(FSIZE))
# A tibble: 1 × 2
  `is.na(FSIZE)`     n
  <lgl>          <int>
1 FALSE          10332
apis_data %>% count(is.na(PUFCHLD_6_11))
# A tibble: 1 × 2
  `is.na(PUFCHLD_6_11)`     n
  <lgl>                 <int>
1 FALSE                 10332
# controls: time poverty and gendered labor
apis_data %>% count(is.na(PUFWRK_HEAD))
# A tibble: 1 × 2
  `is.na(PUFWRK_HEAD)`     n
  <lgl>                <int>
1 FALSE                10332
apis_data %>% count(is.na(PUFH04_SEX))
# A tibble: 1 × 2
  `is.na(PUFH04_SEX)`     n
  <lgl>               <int>
1 FALSE               10332
# controls: household infrastructure

# a. food storage
apis_data %>% count(is.na(PUFEQ6G))
# A tibble: 1 × 2
  `is.na(PUFEQ6G)`     n
  <lgl>            <int>
1 FALSE            10332
apis_data %>% count(is.na(PUFEQ5))
# A tibble: 1 × 2
  `is.na(PUFEQ5)`     n
  <lgl>           <int>
1 FALSE           10332
# b. housing quality
apis_data %>% count(is.na(PUFEQ2))
# A tibble: 1 × 2
  `is.na(PUFEQ2)`     n
  <lgl>           <int>
1 FALSE           10332
apis_data %>% count(is.na(PUFEQ3))
# A tibble: 1 × 2
  `is.na(PUFEQ3)`     n
  <lgl>           <int>
1 FALSE           10332
apis_data %>% count(is.na(PUFEQ4B))
# A tibble: 1 × 2
  `is.na(PUFEQ4B)`     n
  <lgl>            <int>
1 FALSE            10332
# c. sanitation infrastructure
apis_data %>% count(is.na(PUFFQ2))
# A tibble: 1 × 2
  `is.na(PUFFQ2)`     n
  <lgl>           <int>
1 FALSE           10332
# d. asset-based urban proxies
apis_data %>% count(is.na(PUFEQ6D))
# A tibble: 1 × 2
  `is.na(PUFEQ6D)`     n
  <lgl>            <int>
1 FALSE            10332
apis_data %>% count(is.na(PUFEQ6J))
# A tibble: 1 × 2
  `is.na(PUFEQ6J)`     n
  <lgl>            <int>
1 FALSE            10332
apis_data %>% count(is.na(PUFEQ6H))
# A tibble: 1 × 2
  `is.na(PUFEQ6H)`     n
  <lgl>            <int>
1 FALSE            10332
# controls: market access
apis_data %>% count(is.na(TRANSPORT))
# A tibble: 1 × 2
  `is.na(TRANSPORT)`     n
  <lgl>              <int>
1 FALSE              10332
apis_data %>% count(is.na(FOOD_OUTSIDE))
# A tibble: 1 × 2
  `is.na(FOOD_OUTSIDE)`     n
  <lgl>                 <int>
1 FALSE                 10332
# fixed effects
apis_data %>% count(is.na(REGN))
# A tibble: 1 × 2
  `is.na(REGN)`     n
  <lgl>         <int>
1 FALSE         10332
apis_data %>% count(is.na(PUFEQ1))
# A tibble: 1 × 2
  `is.na(PUFEQ1)`     n
  <lgl>           <int>
1 FALSE           10332
# other variables
apis_data %>% count(is.na(BREAD))
# A tibble: 1 × 2
  `is.na(BREAD)`     n
  <lgl>          <int>
1 FALSE          10332
#----------------------------------------------------------------------------------#

# DATA CLEANING

# categorizing the highest grade completed (HGC) variable
# we convert HGC into a ratio-level data: the number of years completed in school
map_educ_full <- function(hgc) {
  h <- as.numeric(hgc)
  
  # no schooling / preschool / SPED
  if (h == 0 || h == 1 || h == 2 || h == 10 || (h >= 191 && h <= 192)) return(0)
  
  # elementary (old curriculum and k-12)
  if (h >= 110 && h <= 170) return((h - 100) / 10)
  if (h >= 410 && h <= 460) return((h - 400) / 10)
  
  # secondary / high school:
  
  # old curriculum HS (grade 7)
  if (h == 180) return(7) 
  # old curriculum HS (1st year-4th year)
  if (h >= 210 && h <= 250) return(6 + (h - 200) / 10) 
  # K-12 HS
  if (h >= 470 && h <= 520) return((h - 400) / 10)
  
  # post-secondary / vocational
  if ((h >= 310 && h <= 320) || (h >= 601 && h <= 689)) return(14)
  
  # college undergrad
  if (h >= 710 && h <= 760) return(12 + (h - 700) / 10)
  
  # college graduate
  if (h >= 801 && h <= 889) return(16)
  
  # graduate school - masters
  if (h >= 910 && h <= 920) return(18)
  
  # graduate school - doctorate
  if (h >= 930 && h <= 940) return(20)
}

v_map_educ <- Vectorize(map_educ_full)

#----------------------------------------------------------------------------------#

# creating the needed variables
# we filter only for protein-consuming households
apis_final <- apis_data %>% 
  filter(FOOD>0 & MEAT>0 & FISH>0) %>% 
  mutate(
    # mean-center transformations for the interactions
    log_income_c = log(TOINC) - mean(log(TOINC)),
    family_size_c = FSIZE - mean(FSIZE),
    
    # dependent variable
    protein_share = (MEAT+FISH)/FOOD,
    
    # main regressors
    water_source = ifelse(PUFFQ1==1,1,0),
    water_income_interac = water_source * log_income_c,
    water_family_interac = water_source * family_size_c,
    log_income = log(TOINC),
    family_size = FSIZE,
   
    # vector of controls (socioeconomic characteristics)
    educ_head = v_map_educ(PUFH12_HGC),
    age_head = PUFH05_AGE,
    has_kids = ifelse(PUFCHLD_6_11 == 1,1,0),
    
    # vector of controls (time constraints and gender roles)
    head_works   = ifelse(PUFWRK_HEAD == 1, 1, 0),
    female_head = ifelse(PUFH04_SEX == 2,1,0),
    work_female_interac = ifelse(head_works == 1 & female_head == 1, 1, 0),
   
    # vector of controls (household infrastructure characteristics)
        # a. food storage
        has_fridge   = ifelse(PUFEQ6G > 0, 1, 0),
        has_electricity  = ifelse(PUFEQ5 == 1, 1, 0),
        # b. housing quality
        roof = ifelse(PUFEQ2 == 1 | PUFEQ2 == 4, 1, 0),
        wall = ifelse(PUFEQ3 == 1 | PUFEQ3 == 4, 1, 0),
        floor = PUFEQ4B,
        # c. sanitation infrastructure
        toilet = ifelse(PUFFQ2 == 1, 1, 0),
        # d. asset-based urban proxies
        aircon = ifelse(PUFEQ6D > 0, 1, 0),
        telephone = ifelse(PUFEQ6J > 0, 1, 0),
        computer = ifelse(PUFEQ6H > 0, 1, 0),
    
    # vector of controls (market access)
    transportation = TRANSPORT,
    food_outside = ifelse(FOOD_OUTSIDE > median(FOOD_OUTSIDE, na.rm=T), 1, 0),
    
    # fixed effects
    region       = as.factor(REGN),
    building     = as.factor(PUFEQ1),
    
    # falsification variable
    bread_share = BREAD/FOOD,
)

# checking the final number of observations
str(apis_final$water_source)
 num [1:9994] 0 0 1 0 0 0 0 1 1 1 ...

PRIMARY ANALYSIS

# MAIN ANALYSIS

# ols model (protein share)
bennett_ols_protein <- lm(protein_share ~ 
    water_source + 
    water_income_interac +
    water_family_interac + 
    log_income_c +
    family_size_c +
    # control vec. 1
    educ_head + 
    age_head +
    I(age_head^2) +
    has_kids +
    # control vec. 2
    head_works + 
    female_head + 
    work_female_interac +
    # control vec. 3
    has_fridge + 
    has_electricity +
    roof + 
    wall + 
    floor +
    toilet + 
    aircon +
    telephone +
    computer + 
    # control vec. 4
    transportation +
    food_outside +
    # fixed effects
    region + 
    building,
  data = apis_final
)

summary(bennett_ols_protein)

Call:
lm(formula = protein_share ~ water_source + water_income_interac + 
    water_family_interac + log_income_c + family_size_c + educ_head + 
    age_head + I(age_head^2) + has_kids + head_works + female_head + 
    work_female_interac + has_fridge + has_electricity + roof + 
    wall + floor + toilet + aircon + telephone + computer + transportation + 
    food_outside + region + building, data = apis_final)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.28461 -0.06364 -0.00649  0.05693  0.50263 

Coefficients:
                       Estimate Std. Error t value Pr(>|t|)    
(Intercept)           2.357e-01  1.158e-02  20.356  < 2e-16 ***
water_source          8.891e-03  2.314e-03   3.842 0.000123 ***
water_income_interac -1.185e-02  3.059e-03  -3.874 0.000108 ***
water_family_interac  8.295e-03  9.127e-04   9.088  < 2e-16 ***
log_income_c          2.847e-02  2.487e-03  11.446  < 2e-16 ***
family_size_c        -8.118e-03  6.851e-04 -11.850  < 2e-16 ***
educ_head             2.564e-04  2.686e-04   0.955 0.339718    
age_head              1.840e-03  3.804e-04   4.837 1.34e-06 ***
I(age_head^2)        -1.690e-05  3.809e-06  -4.438 9.19e-06 ***
has_kids              3.740e-03  2.120e-03   1.765 0.077667 .  
head_works            7.276e-03  3.623e-03   2.008 0.044640 *  
female_head           5.399e-04  4.492e-03   0.120 0.904327    
work_female_interac  -1.524e-02  5.335e-03  -2.856 0.004300 ** 
has_fridge            1.328e-02  2.539e-03   5.230 1.73e-07 ***
has_electricity      -8.626e-03  3.620e-03  -2.383 0.017195 *  
roof                  2.763e-03  3.178e-03   0.869 0.384690    
wall                  1.288e-03  2.727e-03   0.472 0.636773    
floor                 9.111e-05  2.518e-05   3.618 0.000299 ***
toilet                7.996e-03  2.960e-03   2.702 0.006911 ** 
aircon               -5.669e-03  3.890e-03  -1.457 0.145109    
telephone            -3.685e-03  4.280e-03  -0.861 0.389214    
computer              2.033e-03  2.905e-03   0.700 0.484133    
transportation       -3.259e-07  1.036e-07  -3.147 0.001656 ** 
food_outside         -5.112e-02  2.303e-03 -22.196  < 2e-16 ***
region2               7.411e-04  6.455e-03   0.115 0.908598    
region3               9.082e-03  5.705e-03   1.592 0.111468    
region4              -1.565e-02  6.255e-03  -2.501 0.012393 *  
region5              -1.973e-02  6.196e-03  -3.185 0.001452 ** 
region6              -2.125e-02  5.884e-03  -3.612 0.000305 ***
region7              -2.415e-02  6.175e-03  -3.912 9.23e-05 ***
region8              -2.882e-02  5.993e-03  -4.810 1.53e-06 ***
region9              -4.084e-02  6.717e-03  -6.081 1.24e-09 ***
region10             -6.027e-02  6.037e-03  -9.984  < 2e-16 ***
region11              1.220e-02  6.204e-03   1.967 0.049226 *  
region12             -4.685e-02  6.487e-03  -7.223 5.48e-13 ***
region13             -5.046e-02  5.587e-03  -9.031  < 2e-16 ***
region14             -5.015e-02  6.044e-03  -8.298  < 2e-16 ***
region15             -5.497e-02  6.634e-03  -8.286  < 2e-16 ***
region16             -3.241e-02  6.270e-03  -5.169 2.40e-07 ***
region17             -2.224e-02  6.191e-03  -3.593 0.000329 ***
building2            -5.064e-03  6.148e-03  -0.824 0.410106    
building3            -9.761e-03  4.564e-03  -2.139 0.032482 *  
building4             8.837e-05  1.632e-02   0.005 0.995679    
building5            -2.005e-02  6.396e-02  -0.314 0.753892    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.09016 on 9950 degrees of freedom
Multiple R-squared:  0.1404,    Adjusted R-squared:  0.1367 
F-statistic: 37.79 on 43 and 9950 DF,  p-value: < 2.2e-16
print(coeftest(bennett_ols_protein, 
               vcov = vcovCL(bennett_ols_protein, cluster = ~region)))

t test of coefficients:

                        Estimate  Std. Error  t value  Pr(>|t|)    
(Intercept)           2.3575e-01  1.2206e-02  19.3147 < 2.2e-16 ***
water_source          8.8914e-03  3.2795e-03   2.7112 0.0067152 ** 
water_income_interac -1.1850e-02  6.5465e-03  -1.8101 0.0703041 .  
water_family_interac  8.2948e-03  1.8867e-03   4.3965 1.111e-05 ***
log_income_c          2.8467e-02  4.3136e-03   6.5994 4.339e-11 ***
family_size_c        -8.1179e-03  9.2493e-04  -8.7768 < 2.2e-16 ***
educ_head             2.5645e-04  4.1849e-04   0.6128 0.5400295    
age_head              1.8399e-03  4.4920e-04   4.0959 4.238e-05 ***
I(age_head^2)        -1.6903e-05  4.1802e-06  -4.0435 5.306e-05 ***
has_kids              3.7402e-03  2.2181e-03   1.6862 0.0917850 .  
head_works            7.2758e-03  4.6171e-03   1.5759 0.1150911    
female_head           5.3991e-04  4.4117e-03   0.1224 0.9025989    
work_female_interac  -1.5237e-02  6.1205e-03  -2.4895 0.0128082 *  
has_fridge            1.3279e-02  2.1215e-03   6.2595 4.022e-10 ***
has_electricity      -8.6256e-03  5.8751e-03  -1.4681 0.1420965    
roof                  2.7626e-03  4.7025e-03   0.5875 0.5568942    
wall                  1.2876e-03  4.0648e-03   0.3168 0.7514225    
floor                 9.1109e-05  4.8823e-05   1.8661 0.0620542 .  
toilet                7.9957e-03  6.7984e-03   1.1761 0.2395771    
aircon               -5.6687e-03  5.5658e-03  -1.0185 0.3084741    
telephone            -3.6854e-03  3.1953e-03  -1.1534 0.2487815    
computer              2.0328e-03  3.4419e-03   0.5906 0.5547881    
transportation       -3.2593e-07  8.8200e-08  -3.6953 0.0002208 ***
food_outside         -5.1121e-02  3.7971e-03 -13.4633 < 2.2e-16 ***
region2               7.4107e-04  1.4721e-03   0.5034 0.6146894    
region3               9.0818e-03  1.9468e-03   4.6651 3.125e-06 ***
region4              -1.5646e-02  2.4244e-03  -6.4537 1.143e-10 ***
region5              -1.9735e-02  2.4769e-03  -7.9677 1.794e-15 ***
region6              -2.1255e-02  2.0265e-03 -10.4886 < 2.2e-16 ***
region7              -2.4154e-02  2.6786e-03  -9.0173 < 2.2e-16 ***
region8              -2.8822e-02  2.2326e-03 -12.9099 < 2.2e-16 ***
region9              -4.0843e-02  2.6208e-03 -15.5838 < 2.2e-16 ***
region10             -6.0266e-02  2.2135e-03 -27.2268 < 2.2e-16 ***
region11              1.2203e-02  3.2525e-03   3.7518 0.0001765 ***
region12             -4.6850e-02  2.8371e-03 -16.5135 < 2.2e-16 ***
region13             -5.0458e-02  4.6498e-03 -10.8517 < 2.2e-16 ***
region14             -5.0152e-02  1.9416e-03 -25.8300 < 2.2e-16 ***
region15             -5.4972e-02  5.8262e-03  -9.4354 < 2.2e-16 ***
region16             -3.2410e-02  2.4215e-03 -13.3843 < 2.2e-16 ***
region17             -2.2244e-02  2.6988e-03  -8.2422 < 2.2e-16 ***
building2            -5.0645e-03  5.7400e-03  -0.8823 0.3776263    
building3            -9.7613e-03  3.0800e-03  -3.1693 0.0015326 ** 
building4             8.8367e-05  2.3391e-02   0.0038 0.9969859    
building5            -2.0052e-02  1.5769e-01  -0.1272 0.8988117    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# assumption diagnostics
vif(bennett_ols_protein)
                          GVIF Df GVIF^(1/(2*Df))
water_source          1.626700  1        1.275422
water_income_interac  3.221646  1        1.794895
water_family_interac  2.006816  1        1.416621
log_income_c          4.511720  1        2.124081
family_size_c         2.588251  1        1.608804
educ_head             1.694844  1        1.301862
age_head             38.545977  1        6.208541
I(age_head^2)        40.176003  1        6.338454
has_kids              1.345954  1        1.160153
head_works            2.204835  1        1.484869
female_head           3.972155  1        1.993027
work_female_interac   3.726701  1        1.930467
has_fridge            1.779105  1        1.333831
has_electricity       1.347734  1        1.160919
roof                  1.621014  1        1.273191
wall                  1.718226  1        1.310811
floor                 1.383571  1        1.176253
toilet                1.547047  1        1.243804
aircon                1.648638  1        1.283993
telephone             1.345900  1        1.160129
computer              1.770477  1        1.330593
transportation        1.427557  1        1.194804
food_outside          1.630232  1        1.276805
region                2.743580 16        1.032042
building              1.209854  4        1.024098
bgtest(bennett_ols_protein)

    Breusch-Godfrey test for serial correlation of order up to 1

data:  bennett_ols_protein
LM test = 707.67, df = 1, p-value < 2.2e-16
bptest(bennett_ols_protein)

    studentized Breusch-Pagan test

data:  bennett_ols_protein
BP = 292.63, df = 43, p-value < 2.2e-16
resettest(bennett_ols_protein, 
          power = 2:3, 
          type = "fitted", 
          vcov = function(x) vcovCL(x, cluster = ~apis_final$region))

    RESET test

data:  bennett_ols_protein
RESET = 0.45302, df1 = 2, df2 = 9950, p-value = 0.6357
# we initially used JB test for normality of residuals
# but this failed due to the sensitivity of large samples, 
# wherein even minor, economically insignificant deviations from normality 
# lead to a rejection of the null.
tseries::jarque.bera.test(residuals(bennett_ols_protein))
Registered S3 method overwritten by 'quantmod':
  method            from
  as.zoo.data.frame zoo 

    Jarque Bera Test

data:  residuals(bennett_ols_protein)
X-squared = 369.64, df = 2, p-value < 2.2e-16
# we turn to visual diagnostics
hist(residuals(bennett_ols_protein), main="Histogram of Residuals", breaks=50)

#----------------------------------------------------------------------------------#

# ROBUSTNESS TABLE

# 1. define the 5 robustness models (Same as before)
m1 <- lm(protein_share ~ water_source + region + building, data = apis_final)

m2 <- lm(protein_share ~ water_source + water_income_interac + water_family_interac + 
           log_income_c + family_size_c + 
           educ_head + age_head + I(age_head^2) + has_kids +
           region + building, data = apis_final)

m3 <- lm(protein_share ~ water_source + log_income_c + family_size_c + 
           water_income_interac + water_family_interac +
           educ_head + age_head + I(age_head^2) + has_kids +
           head_works + female_head + work_female_interac + 
           region + building, data = apis_final)

m4 <- lm(protein_share ~ water_source + log_income_c + family_size_c + 
           water_income_interac + water_family_interac + 
           educ_head + age_head + I(age_head^2) + has_kids +
           head_works + female_head + work_female_interac + 
           transportation + food_outside +
           has_fridge + has_electricity + roof + wall + floor + toilet +
           region + building, data = apis_final)

m5 <- lm(protein_share ~ water_source + log_income_c + family_size_c + 
           water_income_interac + water_family_interac + 
           educ_head + age_head + I(age_head^2) + has_kids +
           head_works + female_head + work_female_interac + 
           transportation + food_outside +
           has_fridge + has_electricity + roof + wall + floor + toilet +
           aircon + telephone + computer + 
           region + building, data = apis_final)

# 2. extract clustered standard errors
se_1 <- coeftest(m1, vcov = vcovCL(m1, cluster = ~region))[, 2]
se_2 <- coeftest(m2, vcov = vcovCL(m2, cluster = ~region))[, 2]
se_3 <- coeftest(m3, vcov = vcovCL(m3, cluster = ~region))[, 2]
se_4 <- coeftest(m4, vcov = vcovCL(m4, cluster = ~region))[, 2]
se_5 <- coeftest(m5, vcov = vcovCL(m5, cluster = ~region))[, 2]

# 3. create the final table for robustness
stargazer(m1, m2, m3, m4, m5,
          type = "text",
          se = list(se_1, se_2, se_3, se_4, se_5),
          intercept.bottom = FALSE,
          digits = 4,
          no.space = TRUE, 
          order = c("Constant", "water_source", "water_income_interac", 
                    "water_family_interac", "log_income_c", "family_size_c"),
          omit = c("region", "building"), 
          column.labels = c("Bivariate", "Socioecon", "Time Poverty", "Infra", "Full"),
          add.lines = list(
            c("Region Fixed Effects", "Yes", "Yes", "Yes", "Yes", "Yes"),
            c("Building Fixed Effects", "Yes", "Yes", "Yes", "Yes", "Yes")
          ),
          dep.var.caption = "Dependent variable: Protein share (protein-consuming households)",
          dep.var.labels.include = FALSE,
          model.numbers = TRUE,
          star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          star.char = c("+", "*", "**", "***"),
          notes = c("Clustered standard errors (at Region level) reported in parentheses.", 
                    "Significance: + p<0.1; * p<0.05; ** p<0.01; *** p<0.001",
                    "(1): Bivariate", 
                    "(2): + Main interactions and socioeconomic indicators",
                    "(3): + Time poverty indicators",
                    "(4): + Household infrastructure and market access",
                    "(5): + Urban indicator proxies (full model)"),
          notes.append = FALSE
)

=============================================================================================================================================================
                                                          Dependent variable: Protein share (protein-consuming households)                                   
                       --------------------------------------------------------------------------------------------------------------------------------------
                               Bivariate                  Socioecon                 Time Poverty                  Infra                       Full           
                                  (1)                        (2)                        (3)                        (4)                        (5)            
-------------------------------------------------------------------------------------------------------------------------------------------------------------
Constant                       0.2697***                  0.2179***                  0.2119***                  0.2358***                  0.2357***         
                                (0.0010)                   (0.0119)                   (0.0111)                   (0.0120)                   (0.0122)         
water_source                   0.0186***                   0.0082*                    0.0089*                    0.0088**                   0.0089**         
                                (0.0037)                   (0.0038)                   (0.0037)                   (0.0033)                   (0.0033)         
water_income_interac                                       -0.0102                    -0.0099                    -0.0131+                   -0.0119+         
                                                           (0.0064)                   (0.0065)                   (0.0073)                   (0.0065)         
water_family_interac                                      0.0081***                  0.0079***                  0.0084***                  0.0083***         
                                                           (0.0020)                   (0.0020)                   (0.0020)                   (0.0019)         
log_income_c                                              0.0204***                  0.0202***                  0.0285***                  0.0285***         
                                                           (0.0037)                   (0.0037)                   (0.0040)                   (0.0043)         
family_size_c                                             -0.0089***                 -0.0092***                 -0.0081***                 -0.0081***        
                                                           (0.0010)                   (0.0010)                   (0.0009)                   (0.0009)         
educ_head                                                   0.0002                     0.0003                     0.0002                     0.0003          
                                                           (0.0004)                   (0.0004)                   (0.0004)                   (0.0004)         
age_head                                                  0.0019***                  0.0019***                  0.0019***                  0.0018***         
                                                           (0.0004)                   (0.0005)                   (0.0004)                   (0.0004)         
I(age_head2)                                             -0.00002***                -0.00002***                -0.00002***                -0.00002***        
                                                          (0.000003)                 (0.000004)                 (0.000004)                 (0.000004)        
has_kids                                                    0.0026                     0.0030                    0.0036+                    0.0037+          
                                                           (0.0023)                   (0.0023)                   (0.0022)                   (0.0022)         
head_works                                                                             0.0060                     0.0074                     0.0073          
                                                                                      (0.0049)                   (0.0046)                   (0.0046)         
female_head                                                                           -0.0012                     0.0004                     0.0005          
                                                                                      (0.0044)                   (0.0044)                   (0.0044)         
work_female_interac                                                                   -0.0148*                   -0.0151*                   -0.0152*         
                                                                                      (0.0065)                   (0.0062)                   (0.0061)         
transportation                                                                                                 -0.000000***               -0.000000***       
                                                                                                                (0.000000)                 (0.000000)        
food_outside                                                                                                    -0.0509***                 -0.0511***        
                                                                                                                 (0.0039)                   (0.0038)         
has_fridge                                                                                                      0.0131***                  0.0133***         
                                                                                                                 (0.0025)                   (0.0021)         
has_electricity                                                                                                  -0.0085                    -0.0086          
                                                                                                                 (0.0058)                   (0.0059)         
roof                                                                                                              0.0028                     0.0028          
                                                                                                                 (0.0047)                   (0.0047)         
wall                                                                                                              0.0013                     0.0013          
                                                                                                                 (0.0041)                   (0.0041)         
floor                                                                                                            0.0001+                    0.0001+          
                                                                                                                (0.00005)                  (0.00005)         
toilet                                                                                                            0.0080                     0.0080          
                                                                                                                 (0.0068)                   (0.0068)         
aircon                                                                                                                                      -0.0057          
                                                                                                                                            (0.0056)         
telephone                                                                                                                                   -0.0037          
                                                                                                                                            (0.0032)         
computer                                                                                                                                     0.0020          
                                                                                                                                            (0.0034)         
-------------------------------------------------------------------------------------------------------------------------------------------------------------
Region Fixed Effects              Yes                        Yes                        Yes                        Yes                        Yes            
Building Fixed Effects            Yes                        Yes                        Yes                        Yes                        Yes            
Observations                     9,994                      9,994                      9,994                      9,994                      9,994           
R2                               0.0636                     0.0888                     0.0915                     0.1401                     0.1404          
Adjusted R2                      0.0616                     0.0861                     0.0886                     0.1366                     0.1367          
Residual Std. Error        0.0940 (df = 9972)         0.0928 (df = 9964)         0.0926 (df = 9961)         0.0902 (df = 9953)         0.0902 (df = 9950)    
F Statistic            32.2460*** (df = 21; 9972) 33.4644*** (df = 29; 9964) 31.3577*** (df = 32; 9961) 40.5330*** (df = 40; 9953) 37.7897*** (df = 43; 9950)
=============================================================================================================================================================
Note:                                                                                    Clustered standard errors (at Region level) reported in parentheses.
                                                                                                      Significance: + p<0.1; * p<0.05; ** p<0.01; *** p<0.001
                                                                                                                                               (1): Bivariate
                                                                                                        (2): + Main interactions and socioeconomic indicators
                                                                                                                               (3): + Time poverty indicators
                                                                                                            (4): + Household infrastructure and market access
                                                                                                                  (5): + Urban indicator proxies (full model)
#----------------------------------------------------------------------------------#

# formal oster test: comparing m4 vs m5

# 1. extract stats for the transition from m3 -> m4
beta_3 <- coef(m3)["water_source"]
beta_4 <- coef(m4)["water_source"]
r2_3   <- summary(m3)$r.squared
r2_4   <- summary(m4)$r.squared

# 2. extract stats for the transition from m4 -> m5
beta_5 <- coef(m5)["water_source"]
r2_5   <- summary(m5)$r.squared

# 3. calculate the "oster efficiency" (movement in beta relative to r^2 gain)
# a high ratio in m4 means the controls were informative.
# a tiny ratio in m5 means the controls are somewhat redundant.
gain_m4 <- (r2_4 - r2_3)
gain_m5 <- (r2_5 - r2_4)

shift_m4 <- abs(beta_4 - beta_3)
shift_m5 <- abs(beta_5 - beta_4)

cat("--- oster informational gain comparison ---\n")
--- oster informational gain comparison ---
cat("m3 to m4: r2 gain =", round(gain_m4, 5), 
    "| beta shift =", round(shift_m4, 5), "\n")
m3 to m4: r2 gain = 0.04856 | beta shift = 4e-05 
cat("m4 to m5: r2 gain =", round(gain_m5, 5), 
    "| beta shift =", round(shift_m5, 5), "\n\n")
m4 to m5: r2 gain = 0.00031 | beta shift = 8e-05 
# 4. the formal delta check for m4
r2_max <- 1.3 * r2_4
delta_m4 <- (beta_4 * (r2_4 - r2_3)) / ((beta_3 - beta_4) * (r2_max - r2_4))

cat("formal oster delta for model 4:", round(delta_m4, 4), "\n")
formal oster delta for model 4: 263.5139 
cat("interpretation: because delta is already << -1 (or >> 1), \n")
interpretation: because delta is already << -1 (or >> 1), 
cat("selection on unobservables is already statistically accounted for.\n")
selection on unobservables is already statistically accounted for.
# we use m4 as the final model

FALSIFICATION USING BREAD EXPENDITURE SHARES

# ols model (bread share)
bennett_ols_bread <- lm(bread_share ~ 
    water_source + 
    water_income_interac +
    water_family_interac + 
    log_income_c +
    family_size_c +
    # control vec. 1
    educ_head + 
    age_head +
    I(age_head^2) +
    has_kids +
    # control vec. 2
    head_works + 
    female_head + 
    work_female_interac +
    # control vec. 3
    has_fridge + 
    has_electricity +
    roof + 
    wall + 
    floor +
    toilet + 
    # control vec. 4
    transportation +
    food_outside +
    # fixed effects
    region + 
    building,
  data = apis_final
)

summary(bennett_ols_bread)

Call:
lm(formula = bread_share ~ water_source + water_income_interac + 
    water_family_interac + log_income_c + family_size_c + educ_head + 
    age_head + I(age_head^2) + has_kids + head_works + female_head + 
    work_female_interac + has_fridge + has_electricity + roof + 
    wall + floor + toilet + transportation + food_outside + region + 
    building, data = apis_final)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.49076 -0.05828 -0.00439  0.05566  0.41737 

Coefficients:
                       Estimate Std. Error t value Pr(>|t|)    
(Intercept)           2.822e-01  1.158e-02  24.369  < 2e-16 ***
water_source         -9.813e-03  2.312e-03  -4.244 2.22e-05 ***
water_income_interac  1.044e-03  2.938e-03   0.355 0.722332    
water_family_interac -5.625e-03  9.096e-04  -6.184 6.51e-10 ***
log_income_c         -5.109e-02  2.465e-03 -20.725  < 2e-16 ***
family_size_c         2.059e-02  6.848e-04  30.069  < 2e-16 ***
educ_head            -5.451e-04  2.667e-04  -2.044 0.040973 *  
age_head              2.391e-03  3.804e-04   6.286 3.40e-10 ***
I(age_head^2)        -1.826e-05  3.808e-06  -4.794 1.66e-06 ***
has_kids              3.260e-03  2.119e-03   1.538 0.123976    
head_works            2.896e-03  3.618e-03   0.801 0.423425    
female_head          -4.986e-03  4.492e-03  -1.110 0.266985    
work_female_interac   2.339e-03  5.336e-03   0.438 0.661164    
has_fridge           -1.058e-02  2.476e-03  -4.273 1.95e-05 ***
has_electricity       9.212e-03  3.618e-03   2.546 0.010914 *  
roof                  2.250e-03  3.178e-03   0.708 0.478931    
wall                 -7.449e-03  2.727e-03  -2.732 0.006314 ** 
floor                -5.381e-05  2.491e-05  -2.160 0.030811 *  
toilet               -2.366e-02  2.960e-03  -7.994 1.45e-15 ***
transportation       -4.522e-07  1.033e-07  -4.378 1.21e-05 ***
food_outside         -6.004e-02  2.300e-03 -26.105  < 2e-16 ***
region2               4.699e-03  6.454e-03   0.728 0.466558    
region3              -1.069e-02  5.702e-03  -1.874 0.060951 .  
region4               3.881e-03  6.253e-03   0.621 0.534801    
region5               1.644e-02  6.197e-03   2.652 0.008003 ** 
region6               2.529e-02  5.881e-03   4.301 1.72e-05 ***
region7               3.822e-02  6.171e-03   6.194 6.10e-10 ***
region8               5.714e-02  5.992e-03   9.535  < 2e-16 ***
region9               6.801e-02  6.718e-03  10.124  < 2e-16 ***
region10              8.751e-02  6.036e-03  14.498  < 2e-16 ***
region11              3.013e-02  6.203e-03   4.857 1.21e-06 ***
region12              6.341e-02  6.487e-03   9.775  < 2e-16 ***
region13             -2.156e-02  5.569e-03  -3.871 0.000109 ***
region14              6.263e-02  6.041e-03  10.368  < 2e-16 ***
region15              3.207e-02  6.634e-03   4.834 1.36e-06 ***
region16              6.525e-02  6.264e-03  10.418  < 2e-16 ***
region17              5.407e-02  6.193e-03   8.731  < 2e-16 ***
building2            -4.139e-03  6.147e-03  -0.673 0.500772    
building3            -9.932e-03  4.563e-03  -2.177 0.029521 *  
building4            -4.412e-03  1.632e-02  -0.270 0.786924    
building5             1.429e-02  6.397e-02   0.223 0.823187    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.09019 on 9953 degrees of freedom
Multiple R-squared:  0.5291,    Adjusted R-squared:  0.5272 
F-statistic: 279.6 on 40 and 9953 DF,  p-value: < 2.2e-16
print(coeftest(bennett_ols_bread, 
               vcov = vcovCL(bennett_ols_bread, cluster = ~region)))

t test of coefficients:

                        Estimate  Std. Error  t value  Pr(>|t|)    
(Intercept)           2.8216e-01  1.4268e-02  19.7762 < 2.2e-16 ***
water_source         -9.8135e-03  3.2318e-03  -3.0365 0.0023993 ** 
water_income_interac  1.0440e-03  5.3254e-03   0.1960 0.8445778    
water_family_interac -5.6247e-03  2.2533e-03  -2.4962 0.0125686 *  
log_income_c         -5.1086e-02  3.2948e-03 -15.5050 < 2.2e-16 ***
family_size_c         2.0590e-02  1.0817e-03  19.0347 < 2.2e-16 ***
educ_head            -5.4508e-04  4.6742e-04  -1.1661 0.2435906    
age_head              2.3909e-03  5.4225e-04   4.4092 1.048e-05 ***
I(age_head^2)        -1.8256e-05  5.1373e-06  -3.5535 0.0003818 ***
has_kids              3.2604e-03  2.2367e-03   1.4577 0.1449621    
head_works            2.8965e-03  4.9977e-03   0.5796 0.5622215    
female_head          -4.9863e-03  5.2253e-03  -0.9543 0.3399722    
work_female_interac   2.3390e-03  7.3363e-03   0.3188 0.7498697    
has_fridge           -1.0579e-02  2.9678e-03  -3.5647 0.0003660 ***
has_electricity       9.2122e-03  8.8057e-03   1.0462 0.2955129    
roof                  2.2503e-03  5.0809e-03   0.4429 0.6578480    
wall                 -7.4492e-03  4.3712e-03  -1.7042 0.0883796 .  
floor                -5.3808e-05  4.2536e-05  -1.2650 0.2059062    
toilet               -2.3658e-02  7.1976e-03  -3.2869 0.0010165 ** 
transportation       -4.5215e-07  1.2429e-07  -3.6380 0.0002762 ***
food_outside         -6.0040e-02  3.7013e-03 -16.2214 < 2.2e-16 ***
region2               4.6994e-03  1.4268e-03   3.2936 0.0009927 ***
region3              -1.0686e-02  1.1549e-03  -9.2525 < 2.2e-16 ***
region4               3.8812e-03  1.6991e-03   2.2842 0.0223799 *  
region5               1.6438e-02  2.5158e-03   6.5339 6.721e-11 ***
region6               2.5294e-02  1.7635e-03  14.3433 < 2.2e-16 ***
region7               3.8221e-02  1.6418e-03  23.2802 < 2.2e-16 ***
region8               5.7136e-02  2.0991e-03  27.2197 < 2.2e-16 ***
region9               6.8009e-02  2.6757e-03  25.4173 < 2.2e-16 ***
region10              8.7514e-02  2.0537e-03  42.6121 < 2.2e-16 ***
region11              3.0126e-02  2.6721e-03  11.2744 < 2.2e-16 ***
region12              6.3410e-02  2.1592e-03  29.3675 < 2.2e-16 ***
region13             -2.1555e-02  2.7686e-03  -7.7856 7.627e-15 ***
region14              6.2632e-02  1.9128e-03  32.7427 < 2.2e-16 ***
region15              3.2071e-02  5.8804e-03   5.4539 5.046e-08 ***
region16              6.5253e-02  2.1996e-03  29.6660 < 2.2e-16 ***
region17              5.4067e-02  2.8559e-03  18.9320 < 2.2e-16 ***
building2            -4.1390e-03  5.7112e-03  -0.7247 0.4686383    
building3            -9.9325e-03  4.9848e-03  -1.9926 0.0463358 *  
building4            -4.4115e-03  1.0931e-02  -0.4036 0.6865180    
building5             1.4294e-02  7.7223e-02   0.1851 0.8531524    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# assumption diagnostics
vif(bennett_ols_bread)
                          GVIF Df GVIF^(1/(2*Df))
water_source          1.623243  1        1.274065
water_income_interac  2.969923  1        1.723347
water_family_interac  1.991939  1        1.411361
log_income_c          4.429597  1        2.104661
family_size_c         2.584313  1        1.607580
educ_head             1.669494  1        1.292089
age_head             38.521056  1        6.206533
I(age_head^2)        40.128468  1        6.334703
has_kids              1.344721  1        1.159621
head_works            2.197742  1        1.482478
female_head           3.969691  1        1.992408
work_female_interac   3.725754  1        1.930221
has_fridge            1.690478  1        1.300184
has_electricity       1.345908  1        1.160133
roof                  1.620328  1        1.272921
wall                  1.717634  1        1.310585
floor                 1.353128  1        1.163240
toilet                1.546075  1        1.243413
transportation        1.418134  1        1.190854
food_outside          1.624596  1        1.274596
region                2.625678 16        1.030626
building              1.206677  4        1.023762
bgtest(bennett_ols_bread)

    Breusch-Godfrey test for serial correlation of order up to 1

data:  bennett_ols_bread
LM test = 571.12, df = 1, p-value < 2.2e-16
bptest(bennett_ols_bread)

    studentized Breusch-Pagan test

data:  bennett_ols_bread
BP = 651.39, df = 40, p-value < 2.2e-16
resettest(bennett_ols_bread, 
          power = 2:3, 
          type = "fitted", 
          vcov = function(x) vcovCL(x, cluster = ~apis_final$region))

    RESET test

data:  bennett_ols_bread
RESET = 34.925, df1 = 2, df2 = 9953, p-value = 7.678e-16
hist(residuals(bennett_ols_bread), main="Histogram of Residuals", breaks=50)

#----------------------------------------------------------------------------------#

# creating a robustness table

# define the 4 models for bread_share
b1 <- lm(bread_share ~ water_source + region + building, data = apis_final)

b2 <- lm(bread_share ~ water_source + water_income_interac + water_family_interac + 
           log_income_c + family_size_c + 
           educ_head + age_head + I(age_head^2) + has_kids +
           region + building, data = apis_final)

b3 <- lm(bread_share ~ water_source + log_income_c + family_size_c + 
           water_income_interac + water_family_interac +
           educ_head + age_head + I(age_head^2) + has_kids +
           head_works + female_head + work_female_interac + 
           region + building, data = apis_final)

b4 <- lm(bread_share ~ water_source + log_income_c + family_size_c + 
           water_income_interac + water_family_interac + 
           educ_head + age_head + I(age_head^2) + has_kids +
           head_works + female_head + work_female_interac + 
           transportation + food_outside +
           has_fridge + has_electricity + roof + wall + floor + toilet +
           region + building, data = apis_final)

# extract clustered standard errors (at region level)
seb_1 <- coeftest(b1, vcov = vcovCL(b1, cluster = ~region))[, 2]
seb_2 <- coeftest(b2, vcov = vcovCL(b2, cluster = ~region))[, 2]
seb_3 <- coeftest(b3, vcov = vcovCL(b3, cluster = ~region))[, 2]
seb_4 <- coeftest(b4, vcov = vcovCL(b4, cluster = ~region))[, 2]

# create the final table
stargazer(b1, b2, b3, b4,
          type = "text",
          se = list(seb_1, seb_2, seb_3, seb_4),
          intercept.bottom = FALSE,
          digits = 4,
          no.space = TRUE, 
          order = c("Constant", "water_source", "water_income_interac", 
                    "water_family_interac", "log_income_c", "family_size_c"),
          omit = c("region", "building"), 
          column.labels = c("Bivariate", "Socioecon", "Time Poverty", "Full (Infra)"),
          add.lines = list(
            c("Region Fixed Effects", "Yes", "Yes", "Yes", "Yes"),
            c("Building Fixed Effects", "Yes", "Yes", "Yes", "Yes")
          ),
          dep.var.caption = "Dependent variable: Bread expenditure share",
          dep.var.labels.include = FALSE,
          model.numbers = TRUE,
          star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          star.char = c("+", "*", "**", "***"),
          notes = c("Clustered standard errors (at Region level) reported in parentheses.", 
                    "Significance: + p<0.1; * p<0.05; ** p<0.01; *** p<0.001",
                    "This table serves as a falsification test for the preparation-barrier mechanism."),
          notes.append = FALSE
)

======================================================================================================================================
                                                         Dependent variable: Bread expenditure share                                  
                       ---------------------------------------------------------------------------------------------------------------
                                Bivariate                   Socioecon                 Time Poverty                Full (Infra)        
                                   (1)                         (2)                         (3)                         (4)            
--------------------------------------------------------------------------------------------------------------------------------------
Constant                        0.2835***                   0.2312***                   0.2301***                   0.2822***         
                                (0.0014)                    (0.0116)                    (0.0124)                    (0.0143)          
water_source                   -0.0647***                  -0.0185***                  -0.0179***                   -0.0098**         
                                (0.0053)                    (0.0029)                    (0.0029)                    (0.0032)          
water_income_interac                                         0.0050                      0.0052                      0.0010           
                                                            (0.0066)                    (0.0065)                    (0.0053)          
water_family_interac                                        -0.0066**                   -0.0067**                   -0.0056*          
                                                            (0.0022)                    (0.0022)                    (0.0023)          
log_income_c                                               -0.0746***                  -0.0746***                  -0.0511***         
                                                            (0.0033)                    (0.0033)                    (0.0033)          
family_size_c                                               0.0209***                   0.0208***                   0.0206***         
                                                            (0.0013)                    (0.0013)                    (0.0011)          
educ_head                                                  -0.0013***                  -0.0013***                    -0.0005          
                                                            (0.0004)                    (0.0004)                    (0.0005)          
age_head                                                    0.0023***                   0.0021***                   0.0024***         
                                                            (0.0005)                    (0.0005)                    (0.0005)          
I(age_head2)                                               -0.00002***                 -0.00002**                  -0.00002***        
                                                           (0.000005)                  (0.000005)                   (0.00001)         
has_kids                                                     0.0013                      0.0017                      0.0033           
                                                            (0.0026)                    (0.0026)                    (0.0022)          
head_works                                                                               0.0046                      0.0029           
                                                                                        (0.0052)                    (0.0050)          
female_head                                                                              -0.0076                     -0.0050          
                                                                                        (0.0054)                    (0.0052)          
work_female_interac                                                                      0.0020                      0.0023           
                                                                                        (0.0074)                    (0.0073)          
transportation                                                                                                    -0.000000***        
                                                                                                                   (0.000000)         
food_outside                                                                                                       -0.0600***         
                                                                                                                    (0.0037)          
has_fridge                                                                                                         -0.0106***         
                                                                                                                    (0.0030)          
has_electricity                                                                                                      0.0092           
                                                                                                                    (0.0088)          
roof                                                                                                                 0.0023           
                                                                                                                    (0.0051)          
wall                                                                                                                -0.0074+          
                                                                                                                    (0.0044)          
floor                                                                                                                -0.0001          
                                                                                                                    (0.00004)         
toilet                                                                                                              -0.0237**         
                                                                                                                    (0.0072)          
--------------------------------------------------------------------------------------------------------------------------------------
Region Fixed Effects               Yes                         Yes                         Yes                         Yes            
Building Fixed Effects             Yes                         Yes                         Yes                         Yes            
Observations                      9,994                       9,994                       9,994                       9,994           
R2                               0.3202                      0.4887                      0.4893                      0.5291           
Adjusted R2                      0.3188                      0.4872                      0.4877                      0.5272           
Residual Std. Error        0.1083 (df = 9972)          0.0939 (df = 9964)          0.0939 (df = 9961)          0.0902 (df = 9953)     
F Statistic            223.7175*** (df = 21; 9972) 328.3981*** (df = 29; 9964) 298.2398*** (df = 32; 9961) 279.5627*** (df = 40; 9953)
======================================================================================================================================
Note:                                                             Clustered standard errors (at Region level) reported in parentheses.
                                                                               Significance: + p<0.1; * p<0.05; ** p<0.01; *** p<0.001
                                                      This table serves as a falsification test for the preparation-barrier mechanism.

WILD CLUSTER BOOTSTRAP TEST

# we conducted the wild cluster bootstrap 
# to provide more conservative and reliable inference, 
# accounting for the fact that our 17 regional clusters fall below the typical threshold, 
# where standard cluster-robust standard errors remain unbiased.

# 1. define the null hypothesis: water_source has no effect (beta = 0)
# we run the "restricted model" (the full model minus water_source)
res_formula <- update(formula(bennett_ols_protein), . ~ . - water_source)
mod_restricted <- lm(res_formula, data = apis_final)

# 2. get the original t-statistic from the main model
t_orig <- coeftest(bennett_ols_protein, vcov = vcovCL(bennett_ols_protein, cluster = ~region))["water_source", "t value"]

# 3. setup the bootstrap
B <- 999 
boot_t_stats <- numeric(B)
clusters <- unique(apis_final$region)
n_clusters <- length(clusters)

# get fitted values and residuals from the restricted model
y_hat <- predict(mod_restricted)
u_hat <- residuals(mod_restricted)

set.seed(123)
for (b in 1:B) {
  # generate rademacher weights: +1 or -1 for each of the 17 regions
  weights <- sample(c(-1, 1), n_clusters, replace = TRUE)
  names(weights) <- clusters
  
  # apply the cluster-level weight to the residuals
  # every household in region 1 gets the same +1 or -1 flip
  weight_vector <- weights[as.character(apis_final$region)]
  y_boot <- y_hat + (u_hat * weight_vector)
  
  # re-run the OLS on the bootstrapped Y
  boot_mod <- lm(y_boot ~ . , data = model.frame(bennett_ols_protein))
  
  # extract the t-statistic for water_source
  # note: we use the column index to avoid naming issues
  boot_t_stats[b] <- summary(boot_mod)$coefficients["water_source", "t value"]
}

# 4. calculate the bootstrap p-value
# how often is the absolute value of the fake t-stats >= the original t-stat?
p_val_boot <- mean(abs(boot_t_stats) >= abs(t_orig))

cat("--- Manual Wild Cluster Bootstrap Results ---\n")
--- Manual Wild Cluster Bootstrap Results ---
cat("Original T-Stat:", t_orig, "\n")
Original T-Stat: 2.711218 
cat("Bootstrap P-Value:", p_val_boot, "\n")
Bootstrap P-Value: 0.05405405 

VISUALIZATION FOR THE FALSIFICATION TEST

# visualization: protein vs. bread shares as dependent variables

# function to calculate conditional associations and standard errors
calc_conditional_assoc <- function(model, var_seq, vcov_matrix, inter_name) {
  beta_main  <- coef(model)["water_source"]
  beta_inter <- coef(model)[inter_name]
  assoc <- beta_main + (beta_inter * var_seq)
  se <- sqrt(vcov_matrix["water_source", "water_source"] + 
               (var_seq^2) * vcov_matrix[inter_name, inter_name] + 
               2 * var_seq * vcov_matrix["water_source", inter_name])
  
  data.frame(x = var_seq, assoc = assoc, 
             lower = assoc - 1.96*se, upper = assoc + 1.96*se)
}

# data preparation
# focusing on family size (the significant interaction)
avg_fam <- mean(apis_final$family_size, na.rm=T)
fam_seq_raw <- seq(min(apis_final$family_size), 
                   max(apis_final$family_size), length.out = 100)
fam_seq_c <- fam_seq_raw - avg_fam

# extract results
# main model: protein (bennett_ols_protein)
vcov_prot <- vcovCL(bennett_ols_protein, cluster = ~region)
df_prot_fam <- calc_conditional_assoc(bennett_ols_protein, 
                                      fam_seq_c, vcov_prot, "water_family_interac")

# falsification model: bread (bennett_ols_bread)
vcov_bread <- vcovCL(bennett_ols_bread, cluster = ~region)
df_bread_fam <- calc_conditional_assoc(bennett_ols_bread, 
                                       fam_seq_c, vcov_bread, "water_family_interac")

# combine for plotting
df_prot_fam$group <- "protein share (high preparation burden)"
df_bread_fam$group <- "bread share (falsification / low burden)"
plot_data <- rbind(df_prot_fam, df_bread_fam)

comparison_plot <- ggplot(plot_data, aes(x = x + avg_fam, y = assoc, color = group, fill = group)) +
  # confidence intervals
  geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.15, color = NA) +
  # association lines
  geom_line(aes(linetype = group), size = 1.1) +
  # zero reference line
  geom_hline(yintercept = 0, linetype = "dashed", color = "red", alpha = 0.7) +
  # colors and aesthetics
  scale_fill_manual(values = c("gray50", "darkgreen")) +
  scale_color_manual(values = c("gray50", "darkgreen")) +
  scale_linetype_manual(values = c("twodash", "solid")) +
  labs(
    title = "Comparing Nutrient-Dense Protein vs. Ready-to-Eat Staples (Bread)",
    x = "Family Size (Total Members)",
    y = "Association with Expenditure Share",
    caption = "Note: Estimates derived from the significant water-family interaction terms in the bread model (column 4) and protein (column 4). Shaded areas represent 95% CIs with Region-clustered SEs.\nUnlike protein, bread requires no domestic water for preparation, serving as a falsification test."
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(face = "bold"),
    legend.position = "bottom",
    legend.title = element_blank(),
    panel.grid.minor = element_blank()
  )
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
print(comparison_plot)

ggsave("water_protein_bread_interaction.png", 
       plot = comparison_plot, 
       width = 9, 
       height = 6, 
       dpi = 300)

STRUCTURAL BREAKS (CHOW TEST) FOR THE SUBGROUP ANALYSES

# 1. define the formula (matching the m4 model)
chow_formula <- as.formula("protein_share ~ water_source + log_income_c + family_size + 
                             water_income_interac + water_family_interac + 
                             educ_head + age_head + I(age_head^2) + has_kids + 
                             head_works + female_head + work_female_interac + 
                             transportation + food_outside + 
                             has_fridge + has_electricity + roof + wall + floor + toilet + 
                             region + building")

# 2. function to calculate chow f-statistic
calc_chow <- function(full_data, group_var) {
  # a. Pooled Model
  mod_pooled <- lm(chow_formula, data = full_data)
  ssr_pooled <- sum(residuals(mod_pooled)^2)
  n <- nrow(full_data)
  k <- length(coef(mod_pooled)) # Number of parameters
  
  # b. subsample models
  groups <- unique(full_data[[group_var]])
  ssr_subsamples <- 0
  num_groups <- length(groups)
  
  for(g in groups) {
    sub_data <- full_data[full_data[[group_var]] == g, ]
    mod_sub <- lm(chow_formula, data = sub_data)
    ssr_subsamples <- ssr_subsamples + sum(residuals(mod_sub)^2)
  }
  
  # c. the chow f-statistic
  # F = [(SSR_p - SSR_s) / k*(groups-1)] / [SSR_s / (n - k*groups)]
  numerator <- (ssr_pooled - ssr_subsamples) / (k * (num_groups - 1))
  denominator <- ssr_subsamples / (n - (k * num_groups))
  f_stat <- numerator / denominator
  
  # d. p-value
  p_val <- pf(f_stat, (k * (num_groups - 1)), (n - (k * num_groups)), lower.tail = FALSE)
  return(list(F_stat = f_stat, p_value = p_val))
}

#----------------------------------------------------------------------------------#

# execute tests

# test gender
gender_chow <- calc_chow(apis_final, "female_head")
cat("--- Chow Test for Gender Groups ---\n")
--- Chow Test for Gender Groups ---
print(gender_chow)
$F_stat
[1] 1.346613

$p_value
[1] 0.06871919
# test age groups (create factor first)
apis_final$age_group <- case_when(
    apis_final$age_head < 35 ~ 1,
    apis_final$age_head >= 35 & apis_final$age_head < 65 ~ 2,
    TRUE ~ 3
)

age_chow <- calc_chow(apis_final, "age_group")
cat("\n--- Chow Test for Age Groups ---\n")

--- Chow Test for Age Groups ---
print(age_chow)
$F_stat
[1] 1.541733

$p_value
[1] 0.001263892

SUBSAMPLE ANALYSIS: FEMALE VS. MALE HEADS

# female heads
# in the original model, we include age_head^2.
# however, the RESET test is specifying that the model is misspecified because of this.
# so we only maintain age_head.

apis_subsample_female <- apis_final %>% 
  filter(female_head == 1)

subsample_female <- lm(protein_share ~ 
    water_source +
    water_income_interac +
    water_family_interac + 
    log_income_c +
    family_size_c + 
    # control vec. 1
    educ_head + 
    age_head +
    has_kids +
    # control vec. 2, exc. female_head & work_female_interac
    head_works +
    # control vec. 3
    has_fridge + 
    has_electricity +
    roof + 
    wall + 
    floor +
    toilet + 
    # control vec. 4
    transportation +
    food_outside +
    # fixed effects
    region + 
    building,
  data = apis_subsample_female
)

summary(subsample_female)

Call:
lm(formula = protein_share ~ water_source + water_income_interac + 
    water_family_interac + log_income_c + family_size_c + educ_head + 
    age_head + has_kids + head_works + has_fridge + has_electricity + 
    roof + wall + floor + toilet + transportation + food_outside + 
    region + building, data = apis_subsample_female)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.241371 -0.061832 -0.005229  0.056111  0.303060 

Coefficients:
                       Estimate Std. Error t value Pr(>|t|)    
(Intercept)           2.534e-01  1.998e-02  12.685  < 2e-16 ***
water_source          2.069e-03  5.254e-03   0.394 0.693774    
water_income_interac -1.496e-02  6.247e-03  -2.395 0.016723 *  
water_family_interac  9.343e-03  2.186e-03   4.274 2.01e-05 ***
log_income_c          2.529e-02  5.590e-03   4.523 6.45e-06 ***
family_size_c        -4.722e-03  1.803e-03  -2.619 0.008879 ** 
educ_head             6.837e-04  6.022e-04   1.135 0.256365    
age_head              4.922e-05  1.501e-04   0.328 0.743008    
has_kids              6.459e-03  4.862e-03   1.329 0.184162    
head_works           -4.757e-03  4.365e-03  -1.090 0.276011    
has_fridge            2.226e-02  5.227e-03   4.258 2.16e-05 ***
has_electricity       2.309e-03  9.375e-03   0.246 0.805461    
roof                  2.123e-02  8.493e-03   2.500 0.012517 *  
wall                 -2.735e-03  6.763e-03  -0.404 0.685964    
floor                 2.858e-05  4.857e-05   0.589 0.556240    
toilet                6.280e-03  7.559e-03   0.831 0.406151    
transportation       -2.717e-08  2.827e-07  -0.096 0.923434    
food_outside         -5.635e-02  5.120e-03 -11.005  < 2e-16 ***
region2              -6.398e-03  1.565e-02  -0.409 0.682811    
region3               1.434e-02  1.275e-02   1.125 0.260575    
region4              -4.008e-03  1.411e-02  -0.284 0.776376    
region5              -2.100e-02  1.406e-02  -1.494 0.135267    
region6              -2.591e-02  1.288e-02  -2.012 0.044370 *  
region7              -1.945e-02  1.381e-02  -1.408 0.159216    
region8              -2.219e-02  1.372e-02  -1.617 0.105999    
region9              -2.771e-02  1.546e-02  -1.792 0.073287 .  
region10             -3.455e-02  1.410e-02  -2.451 0.014350 *  
region11              1.731e-02  1.484e-02   1.167 0.243423    
region12             -4.390e-02  1.598e-02  -2.748 0.006053 ** 
region13             -4.940e-02  1.235e-02  -4.001 6.55e-05 ***
region14             -4.225e-02  1.362e-02  -3.103 0.001943 ** 
region15             -6.458e-02  1.763e-02  -3.664 0.000255 ***
region16             -3.712e-02  1.400e-02  -2.652 0.008066 ** 
region17             -2.861e-03  1.442e-02  -0.198 0.842802    
building2            -7.848e-03  1.454e-02  -0.540 0.589549    
building3            -1.421e-02  8.637e-03  -1.645 0.100065    
building4            -2.333e-02  4.520e-02  -0.516 0.605857    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.08975 on 1964 degrees of freedom
Multiple R-squared:  0.1582,    Adjusted R-squared:  0.1427 
F-statistic: 10.25 on 36 and 1964 DF,  p-value: < 2.2e-16
print(coeftest(subsample_female, 
               vcov = vcovCL(subsample_female, cluster = ~region)))

t test of coefficients:

                        Estimate  Std. Error  t value  Pr(>|t|)    
(Intercept)           2.5344e-01  1.8019e-02  14.0653 < 2.2e-16 ***
water_source          2.0689e-03  7.5985e-03   0.2723 0.7854366    
water_income_interac -1.4960e-02  8.9841e-03  -1.6651 0.0960478 .  
water_family_interac  9.3431e-03  2.2616e-03   4.1312 3.759e-05 ***
log_income_c          2.5286e-02  7.3784e-03   3.4270 0.0006228 ***
family_size_c        -4.7217e-03  1.7408e-03  -2.7125 0.0067369 ** 
educ_head             6.8372e-04  6.1053e-04   1.1199 0.2629041    
age_head              4.9221e-05  1.2641e-04   0.3894 0.6970294    
has_kids              6.4588e-03  4.3056e-03   1.5001 0.1337538    
head_works           -4.7565e-03  4.1961e-03  -1.1336 0.2571141    
has_fridge            2.2257e-02  3.8156e-03   5.8331 6.346e-09 ***
has_electricity       2.3093e-03  1.0417e-02   0.2217 0.8245837    
roof                  2.1229e-02  6.4856e-03   3.2733 0.0010813 ** 
wall                 -2.7349e-03  7.6266e-03  -0.3586 0.7199298    
floor                 2.8583e-05  3.2941e-05   0.8677 0.3856561    
toilet                6.2801e-03  1.0803e-02   0.5813 0.5610715    
transportation       -2.7168e-08  3.1804e-07  -0.0854 0.9319333    
food_outside         -5.6348e-02  5.0518e-03 -11.1540 < 2.2e-16 ***
region2              -6.3978e-03  1.1461e-03  -5.5821 2.707e-08 ***
region3               1.4344e-02  4.2135e-03   3.4043 0.0006766 ***
region4              -4.0079e-03  5.0782e-03  -0.7892 0.4300686    
region5              -2.1005e-02  3.9373e-03  -5.3348 1.067e-07 ***
region6              -2.5907e-02  2.2391e-03 -11.5703 < 2.2e-16 ***
region7              -1.9446e-02  4.4568e-03  -4.3632 1.348e-05 ***
region8              -2.2185e-02  2.4533e-03  -9.0431 < 2.2e-16 ***
region9              -2.7713e-02  4.3162e-03  -6.4208 1.695e-10 ***
region10             -3.4550e-02  3.3180e-03 -10.4129 < 2.2e-16 ***
region11              1.7315e-02  3.7431e-03   4.6258 3.975e-06 ***
region12             -4.3904e-02  3.0971e-03 -14.1761 < 2.2e-16 ***
region13             -4.9400e-02  7.3834e-03  -6.6906 2.889e-11 ***
region14             -4.2249e-02  1.6386e-03 -25.7833 < 2.2e-16 ***
region15             -6.4581e-02  8.1496e-03  -7.9245 3.805e-15 ***
region16             -3.7121e-02  3.7454e-03  -9.9110 < 2.2e-16 ***
region17             -2.8606e-03  3.3608e-03  -0.8512 0.3947738    
building2            -7.8475e-03  7.7762e-03  -1.0092 0.3130152    
building3            -1.4210e-02  9.7025e-03  -1.4646 0.1431867    
building4            -2.3328e-02  4.2663e-02  -0.5468 0.5845804    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
vif(subsample_female)
                         GVIF Df GVIF^(1/(2*Df))
water_source         1.691807  1        1.300695
water_income_interac 3.554955  1        1.885459
water_family_interac 2.942803  1        1.715460
log_income_c         5.582345  1        2.362699
family_size_c        3.380519  1        1.838619
educ_head            1.834611  1        1.354478
age_head             1.495928  1        1.223082
has_kids             1.340753  1        1.157909
head_works           1.131093  1        1.063528
has_fridge           1.673408  1        1.293603
has_electricity      1.345387  1        1.159908
roof                 1.612160  1        1.269709
wall                 1.658201  1        1.287712
floor                1.378329  1        1.174022
toilet               1.410944  1        1.187832
transportation       1.600071  1        1.264939
food_outside         1.609005  1        1.268466
region               2.499325 16        1.029039
building             1.223044  3        1.034127
bgtest(subsample_female)

    Breusch-Godfrey test for serial correlation of order up to 1

data:  subsample_female
LM test = 60.681, df = 1, p-value = 6.711e-15
bptest(subsample_female)

    studentized Breusch-Pagan test

data:  subsample_female
BP = 71.792, df = 36, p-value = 0.0003603
resettest(subsample_female, 
          power = 2:3, 
          type = "fitted", 
          vcov = function(x) vcovCL(x, cluster = ~apis_subsample_female$region))

    RESET test

data:  subsample_female
RESET = 2.8156, df1 = 2, df2 = 1964, p-value = 0.06011
hist(residuals(subsample_female), main="Histogram of Residuals", breaks=50)

#----------------------------------------------------------------------------------#

# male heads
apis_subsample_male <- apis_final %>% 
  filter(female_head == 0)

subsample_male <- lm(protein_share ~ 
    water_source + 
    water_income_interac +
    water_family_interac + 
    log_income_c +
    family_size_c + 
    # control vec. 1
    educ_head + 
    age_head +
    I(age_head^2) +
    has_kids +
    # control vec. 2, exc. female_head & work_female_interac
    head_works +
    # control vec. 3
    has_fridge + 
    has_electricity +
    roof + 
    wall + 
    floor +
    toilet + 
    # control vec. 4
    transportation +
    food_outside +
    # fixed effects
    region + 
    building,
  data = apis_subsample_male
)

summary(subsample_male)

Call:
lm(formula = protein_share ~ water_source + water_income_interac + 
    water_family_interac + log_income_c + family_size_c + educ_head + 
    age_head + I(age_head^2) + has_kids + head_works + has_fridge + 
    has_electricity + roof + wall + floor + toilet + transportation + 
    food_outside + region + building, data = apis_subsample_male)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.27704 -0.06367 -0.00667  0.05626  0.50464 

Coefficients:
                       Estimate Std. Error t value Pr(>|t|)    
(Intercept)           2.443e-01  1.292e-02  18.905  < 2e-16 ***
water_source          1.090e-02  2.601e-03   4.190 2.81e-05 ***
water_income_interac -1.204e-02  3.360e-03  -3.585 0.000339 ***
water_family_interac  7.345e-03  1.029e-03   7.139 1.02e-12 ***
log_income_c          2.881e-02  2.767e-03  10.410  < 2e-16 ***
family_size_c        -8.389e-03  7.453e-04 -11.255  < 2e-16 ***
educ_head             1.229e-04  2.983e-04   0.412 0.680295    
age_head              1.559e-03  4.601e-04   3.389 0.000706 ***
I(age_head^2)        -1.383e-05  4.754e-06  -2.909 0.003640 ** 
has_kids              3.128e-03  2.362e-03   1.324 0.185441    
head_works            9.161e-03  3.802e-03   2.410 0.015995 *  
has_fridge            1.057e-02  2.812e-03   3.758 0.000173 ***
has_electricity      -9.869e-03  3.929e-03  -2.512 0.012019 *  
roof                  9.105e-05  3.431e-03   0.027 0.978827    
wall                  2.338e-03  2.987e-03   0.783 0.433758    
floor                 1.029e-04  2.908e-05   3.536 0.000408 ***
toilet                7.824e-03  3.225e-03   2.426 0.015275 *  
transportation       -3.873e-07  1.113e-07  -3.480 0.000504 ***
food_outside         -4.964e-02  2.583e-03 -19.215  < 2e-16 ***
region2               2.098e-03  7.101e-03   0.295 0.767677    
region3               7.500e-03  6.386e-03   1.174 0.240235    
region4              -1.833e-02  6.980e-03  -2.626 0.008661 ** 
region5              -1.937e-02  6.912e-03  -2.803 0.005081 ** 
region6              -2.038e-02  6.637e-03  -3.071 0.002144 ** 
region7              -2.590e-02  6.910e-03  -3.748 0.000179 ***
region8              -3.072e-02  6.671e-03  -4.605 4.20e-06 ***
region9              -4.334e-02  7.465e-03  -5.805 6.68e-09 ***
region10             -6.580e-02  6.685e-03  -9.842  < 2e-16 ***
region11              1.084e-02  6.841e-03   1.585 0.113109    
region12             -4.763e-02  7.127e-03  -6.683 2.50e-11 ***
region13             -5.116e-02  6.263e-03  -8.168 3.61e-16 ***
region14             -5.148e-02  6.746e-03  -7.631 2.60e-14 ***
region15             -5.550e-02  7.228e-03  -7.678 1.81e-14 ***
region16             -3.164e-02  7.011e-03  -4.513 6.50e-06 ***
region17             -2.681e-02  6.871e-03  -3.902 9.62e-05 ***
building2            -4.495e-03  6.788e-03  -0.662 0.507896    
building3            -8.190e-03  5.375e-03  -1.524 0.127647    
building4             1.470e-03  1.754e-02   0.084 0.933209    
building5            -2.515e-02  6.404e-02  -0.393 0.694532    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.09022 on 7954 degrees of freedom
Multiple R-squared:  0.1395,    Adjusted R-squared:  0.1354 
F-statistic: 33.94 on 38 and 7954 DF,  p-value: < 2.2e-16
print(coeftest(subsample_male, 
               vcov = vcovCL(subsample_male, cluster = ~region)))

t test of coefficients:

                        Estimate  Std. Error  t value  Pr(>|t|)    
(Intercept)           2.4425e-01  1.3963e-02  17.4928 < 2.2e-16 ***
water_source          1.0901e-02  2.8347e-03   3.8455 0.0001213 ***
water_income_interac -1.2045e-02  7.6061e-03  -1.5836 0.1133325    
water_family_interac  7.3448e-03  2.1785e-03   3.3715 0.0007511 ***
log_income_c          2.8810e-02  4.5170e-03   6.3782 1.892e-10 ***
family_size_c        -8.3885e-03  1.0091e-03  -8.3133 < 2.2e-16 ***
educ_head             1.2291e-04  4.7468e-04   0.2589 0.7956874    
age_head              1.5592e-03  6.5410e-04   2.3837 0.0171616 *  
I(age_head^2)        -1.3829e-05  6.3574e-06  -2.1753 0.0296363 *  
has_kids              3.1276e-03  2.2011e-03   1.4210 0.1553703    
head_works            9.1611e-03  4.8549e-03   1.8870 0.0592023 .  
has_fridge            1.0566e-02  3.1275e-03   3.3786 0.0007321 ***
has_electricity      -9.8691e-03  6.4025e-03  -1.5414 0.1232504    
roof                  9.1055e-05  5.2338e-03   0.0174 0.9861200    
wall                  2.3381e-03  4.2286e-03   0.5529 0.5803299    
floor                 1.0285e-04  5.7519e-05   1.7881 0.0737968 .  
toilet                7.8241e-03  6.5610e-03   1.1925 0.2330892    
transportation       -3.8727e-07  9.7684e-08  -3.9646 7.417e-05 ***
food_outside         -4.9638e-02  4.5713e-03 -10.8585 < 2.2e-16 ***
region2               2.0979e-03  1.8229e-03   1.1509 0.2498223    
region3               7.5000e-03  1.4335e-03   5.2318 1.721e-07 ***
region4              -1.8328e-02  2.3294e-03  -7.8681 4.074e-15 ***
region5              -1.9373e-02  2.8391e-03  -6.8235 9.538e-12 ***
region6              -2.0380e-02  2.4225e-03  -8.4129 < 2.2e-16 ***
region7              -2.5900e-02  2.5960e-03  -9.9771 < 2.2e-16 ***
region8              -3.0719e-02  2.6959e-03 -11.3948 < 2.2e-16 ***
region9              -4.3336e-02  3.1096e-03 -13.9362 < 2.2e-16 ***
region10             -6.5797e-02  2.4399e-03 -26.9669 < 2.2e-16 ***
region11              1.0840e-02  3.7239e-03   2.9109 0.0036139 ** 
region12             -4.7632e-02  3.2918e-03 -14.4700 < 2.2e-16 ***
region13             -5.1155e-02  4.5936e-03 -11.1363 < 2.2e-16 ***
region14             -5.1480e-02  2.3829e-03 -21.6041 < 2.2e-16 ***
region15             -5.5497e-02  6.1054e-03  -9.0898 < 2.2e-16 ***
region16             -3.1637e-02  2.5545e-03 -12.3850 < 2.2e-16 ***
region17             -2.6812e-02  3.1667e-03  -8.4670 < 2.2e-16 ***
building2            -4.4948e-03  6.3007e-03  -0.7134 0.4756361    
building3            -8.1896e-03  1.9141e-03  -4.2786 1.903e-05 ***
building4             1.4698e-03  2.2411e-02   0.0656 0.9477091    
building5            -2.5148e-02  1.5636e-01  -0.1608 0.8722273    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
vif(subsample_male)
                          GVIF Df GVIF^(1/(2*Df))
water_source          1.616794  1        1.271532
water_income_interac  2.852839  1        1.689035
water_family_interac  1.866466  1        1.366187
log_income_c          4.214313  1        2.052879
family_size_c         2.412280  1        1.553152
educ_head             1.631705  1        1.277382
age_head             40.039873  1        6.327707
I(age_head^2)        41.610239  1        6.450600
has_kids              1.347899  1        1.160991
head_works            1.337867  1        1.156662
has_fridge            1.674965  1        1.294204
has_electricity       1.348411  1        1.161211
roof                  1.615900  1        1.271181
wall                  1.724114  1        1.313055
floor                 1.345329  1        1.159883
toilet                1.567900  1        1.252158
transportation        1.400192  1        1.183297
food_outside          1.635357  1        1.278811
region                2.738622 16        1.031984
building              1.211424  4        1.024264
bgtest(subsample_male)

    Breusch-Godfrey test for serial correlation of order up to 1

data:  subsample_male
LM test = 573, df = 1, p-value < 2.2e-16
bptest(subsample_male)

    studentized Breusch-Pagan test

data:  subsample_male
BP = 248.9, df = 38, p-value < 2.2e-16
resettest(subsample_male, 
          power = 2:3, 
          type = "fitted", 
          vcov = function(x) vcovCL(x, cluster = ~apis_subsample_male$region))

    RESET test

data:  subsample_male
RESET = 0.71522, df1 = 2, df2 = 7954, p-value = 0.4891
hist(residuals(subsample_male), main="Histogram of Residuals", breaks=50)

#----------------------------------------------------------------------------------#

# presenting the comparison, male vs. female, in table format

# define subsample models
# female model: linear age
subsample_female <- lm(protein_share ~ 
    water_source + water_income_interac + water_family_interac + 
    log_income_c + family_size_c + educ_head + age_head + has_kids +
    head_works + has_fridge + has_electricity + roof + wall + floor + 
    toilet + transportation + food_outside +
    region + building, 
  data = apis_subsample_female)

# male model: quadratic age
subsample_male <- lm(protein_share ~ 
    water_source + water_income_interac + water_family_interac + 
    log_income_c + family_size_c + educ_head + age_head + I(age_head^2) + has_kids +
    head_works + has_fridge + has_electricity + roof + wall + floor + 
    toilet + transportation + food_outside +
    region + building, 
  data = apis_subsample_male)

# extract clustered standard errors
se_f <- coeftest(subsample_female, vcov = vcovCL(subsample_female, cluster = ~region))[, 2]
se_m <- coeftest(subsample_male, vcov = vcovCL(subsample_male, cluster = ~region))[, 2]

stargazer(subsample_female, subsample_male,
          type = "text",
          se = list(se_f, se_m),
          intercept.bottom = FALSE,
          digits = 4,
          no.space = TRUE,
          order = c("Constant", "water_source", "water_income_interac", "water_family_interac"),
          omit = c("region", "building"), 
          column.labels = c("Female Headed", "Male Headed"),
          add.lines = list(
            c("Region Fixed Effects", "Yes", "Yes"),
            c("Building Fixed Effects", "Yes", "Yes")
          ),
          dep.var.labels.include = FALSE,
          model.numbers = TRUE,
          # Star/Plus Configuration
          star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          star.char = c("+", "*", "**", "***"), 
          notes = c("Clustered standard errors (at Region level) reported in parentheses.", 
                    "Significance: + p<0.1; * p<0.05; ** p<0.01; *** p<0.001",
                    "Age specification: Linear for Female, Quadratic for Male."),
          notes.append = FALSE)

============================================================================================
                                                Dependent variable:                         
                       ---------------------------------------------------------------------
                                 Female Headed                       Male Headed            
                                      (1)                                (2)                
--------------------------------------------------------------------------------------------
Constant                           0.2534***                          0.2443***             
                                    (0.0180)                           (0.0140)             
water_source                         0.0021                           0.0109***             
                                    (0.0076)                           (0.0028)             
water_income_interac                -0.0150+                           -0.0120              
                                    (0.0090)                           (0.0076)             
water_family_interac               0.0093***                          0.0073***             
                                    (0.0023)                           (0.0022)             
log_income_c                       0.0253***                          0.0288***             
                                    (0.0074)                           (0.0045)             
family_size_c                      -0.0047**                          -0.0084***            
                                    (0.0017)                           (0.0010)             
educ_head                            0.0007                             0.0001              
                                    (0.0006)                           (0.0005)             
age_head                            0.00005                            0.0016*              
                                    (0.0001)                           (0.0007)             
I(age_head2)                                                          -0.00001*             
                                                                      (0.00001)             
has_kids                             0.0065                             0.0031              
                                    (0.0043)                           (0.0022)             
head_works                          -0.0048                            0.0092+              
                                    (0.0042)                           (0.0049)             
has_fridge                         0.0223***                          0.0106***             
                                    (0.0038)                           (0.0031)             
has_electricity                      0.0023                            -0.0099              
                                    (0.0104)                           (0.0064)             
roof                                0.0212**                            0.0001              
                                    (0.0065)                           (0.0052)             
wall                                -0.0027                             0.0023              
                                    (0.0076)                           (0.0042)             
floor                               0.00003                            0.0001+              
                                   (0.00003)                           (0.0001)             
toilet                               0.0063                             0.0078              
                                    (0.0108)                           (0.0066)             
transportation                     -0.000000                         -0.000000***           
                                   (0.000000)                         (0.000000)            
food_outside                       -0.0563***                         -0.0496***            
                                    (0.0051)                           (0.0046)             
--------------------------------------------------------------------------------------------
Region Fixed Effects                  Yes                                Yes                
Building Fixed Effects                Yes                                Yes                
Observations                         2,001                              7,993               
R2                                   0.1582                             0.1395              
Adjusted R2                          0.1427                             0.1354              
Residual Std. Error            0.0897 (df = 1964)                 0.0902 (df = 7954)        
F Statistic                10.2493*** (df = 36; 1964)         33.9386*** (df = 38; 7954)    
============================================================================================
Note:                   Clustered standard errors (at Region level) reported in parentheses.
                                     Significance: + p<0.1; * p<0.05; ** p<0.01; *** p<0.001
                                   Age specification: Linear for Female, Quadratic for Male.
#----------------------------------------------------------------------------------#

# robustness test for each subsample analysis (included in the appendix)

# female robustness table
f1 <- lm(protein_share ~ water_source + region + building, 
         data = apis_subsample_female)
f2 <- lm(protein_share ~ water_source + water_income_interac + water_family_interac + 
           log_income_c + family_size_c + educ_head + age_head + has_kids +
           region + building, data = apis_subsample_female)
f3 <- lm(protein_share ~ water_source + water_income_interac + water_family_interac + 
           log_income_c + family_size_c + educ_head + age_head + has_kids +
           head_works + transportation + food_outside + has_fridge + has_electricity +
           region + building, data = apis_subsample_female)
f4 <- lm(protein_share ~ water_source + water_income_interac + water_family_interac + 
           log_income_c + family_size_c + educ_head + age_head + has_kids +
           head_works + transportation + food_outside +
           has_fridge + has_electricity + roof + wall + floor + toilet +
           region + building, data = apis_subsample_female)

se_f1 <- coeftest(f1, vcov = vcovCL(f1, cluster = ~region))[, 2]
se_f2 <- coeftest(f2, vcov = vcovCL(f2, cluster = ~region))[, 2]
se_f3 <- coeftest(f3, vcov = vcovCL(f3, cluster = ~region))[, 2]
se_f4 <- coeftest(f4, vcov = vcovCL(f4, cluster = ~region))[, 2]

stargazer(f1, f2, f3, f4, 
          type = "text",
          se = list(se_f1, se_f2, se_f3, se_f4),
          intercept.bottom = FALSE, 
          digits = 4, 
          no.space = TRUE, 
          order = c("Constant", "water_source", "water_income_interac", 
                    "water_family_interac", "log_income_c", "family_size_c"),
          omit = c("region", "building"), 
          column.labels = c("Bivariate", "Socioecon", "Employment", "Full"),
          add.lines = list(
            c("Region Fixed Effects", "Yes", "Yes", "Yes", "Yes"),
            c("Building Fixed Effects", "Yes", "Yes", "Yes", "Yes")
          ),
          title = "Robustness Check: Female-Headed Households",
          dep.var.caption = "Dependent variable: Protein share (protein-consuming female-headed households)",
          dep.var.labels.include = FALSE,
          model.numbers = TRUE,
          # Updated for the plus sign
          star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          star.char = c("+", "*", "**", "***"),
          notes = c("Clustered standard errors (at Region level) reported in parentheses.", 
                    "Significance: + p<0.1; * p<0.05; ** p<0.01; *** p<0.001",
                    "(1): Bivariate", 
                    "(2): + Main interactions and socioeconomic indicators",
                    "(3): + Employment indicators",
                    "(4): + Household infrastructure"),
          notes.append = FALSE)

Robustness Check: Female-Headed Households
================================================================================================================================
                                    Dependent variable: Protein share (protein-consuming female-headed households)              
                       ---------------------------------------------------------------------------------------------------------
                               Bivariate                 Socioecon                 Employment                    Full           
                                  (1)                       (2)                       (3)                        (4)            
--------------------------------------------------------------------------------------------------------------------------------
Constant                       0.2735***                 0.2417***                 0.2714***                  0.2534***         
                               (0.0022)                  (0.0100)                   (0.0164)                   (0.0180)         
water_source                    0.0123+                   0.0056                     0.0042                     0.0021          
                               (0.0071)                  (0.0079)                   (0.0074)                   (0.0076)         
water_income_interac                                     -0.0137+                   -0.0170+                   -0.0150+         
                                                         (0.0079)                   (0.0087)                   (0.0090)         
water_family_interac                                     0.0094***                 0.0096***                  0.0093***         
                                                         (0.0025)                   (0.0023)                   (0.0023)         
log_income_c                                             0.0233***                 0.0284***                  0.0253***         
                                                         (0.0060)                   (0.0070)                   (0.0074)         
family_size_c                                           -0.0063***                 -0.0050**                  -0.0047**         
                                                         (0.0016)                   (0.0018)                   (0.0017)         
educ_head                                                 0.0008                     0.0007                     0.0007          
                                                         (0.0007)                   (0.0006)                   (0.0006)         
age_head                                                  0.0003*                    0.0001                    0.00005          
                                                         (0.0001)                   (0.0001)                   (0.0001)         
has_kids                                                  0.0064                     0.0062                     0.0065          
                                                         (0.0044)                   (0.0043)                   (0.0043)         
head_works                                                                          -0.0051                    -0.0048          
                                                                                    (0.0042)                   (0.0042)         
transportation                                                                     -0.000000                  -0.000000         
                                                                                   (0.000000)                 (0.000000)        
food_outside                                                                       -0.0554***                 -0.0563***        
                                                                                    (0.0049)                   (0.0051)         
has_fridge                                                                         0.0227***                  0.0223***         
                                                                                    (0.0040)                   (0.0038)         
has_electricity                                                                      0.0084                     0.0023          
                                                                                    (0.0090)                   (0.0104)         
roof                                                                                                           0.0212**         
                                                                                                               (0.0065)         
wall                                                                                                           -0.0027          
                                                                                                               (0.0076)         
floor                                                                                                          0.00003          
                                                                                                              (0.00003)         
toilet                                                                                                          0.0063          
                                                                                                               (0.0108)         
--------------------------------------------------------------------------------------------------------------------------------
Region Fixed Effects              Yes                       Yes                       Yes                        Yes            
Building Fixed Effects            Yes                       Yes                       Yes                        Yes            
Observations                     2,001                     2,001                     2,001                      2,001           
R2                              0.0685                    0.0940                     0.1543                     0.1582          
Adjusted R2                     0.0591                    0.0816                     0.1406                     0.1427          
Residual Std. Error       0.0940 (df = 1980)        0.0929 (df = 1973)         0.0899 (df = 1968)         0.0897 (df = 1964)    
F Statistic            7.2792*** (df = 20; 1980) 7.5804*** (df = 27; 1973) 11.2247*** (df = 32; 1968) 10.2493*** (df = 36; 1964)
================================================================================================================================
Note:                                                       Clustered standard errors (at Region level) reported in parentheses.
                                                                         Significance: + p<0.1; * p<0.05; ** p<0.01; *** p<0.001
                                                                                                                  (1): Bivariate
                                                                           (2): + Main interactions and socioeconomic indicators
                                                                                                    (3): + Employment indicators
                                                                                                 (4): + Household infrastructure
# male robustness table
m1 <- lm(protein_share ~ water_source + region + building, 
         data = apis_subsample_male)
m2 <- lm(protein_share ~ water_source + water_income_interac + water_family_interac + 
           log_income_c + family_size_c + educ_head + age_head + I(age_head^2) + has_kids +
           region + building, data = apis_subsample_male)
m3 <- lm(protein_share ~ water_source + water_income_interac + water_family_interac + 
           log_income_c + family_size_c + educ_head + age_head + I(age_head^2) + has_kids +
           head_works + transportation + food_outside + has_fridge + has_electricity +
           region + building, data = apis_subsample_male)
m4 <- lm(protein_share ~ water_source + water_income_interac + water_family_interac + 
           log_income_c + family_size_c + educ_head + age_head + I(age_head^2) + has_kids +
           head_works + transportation + food_outside +
           has_fridge + has_electricity + roof + wall + floor + toilet +
           region + building, data = apis_subsample_male)

se_m1 <- coeftest(m1, vcov = vcovCL(m1, cluster = ~region))[, 2]
se_m2 <- coeftest(m2, vcov = vcovCL(m2, cluster = ~region))[, 2]
se_m3 <- coeftest(m3, vcov = vcovCL(m3, cluster = ~region))[, 2]
se_m4 <- coeftest(m4, vcov = vcovCL(m4, cluster = ~region))[, 2]

stargazer(m1, m2, m3, m4, 
          type = "text",
          se = list(se_m1, se_m2, se_m3, se_m4),
          intercept.bottom = FALSE, 
          digits = 4, 
          no.space = TRUE, 
          order = c("Constant", "water_source", "water_income_interac", 
                    "water_family_interac", "log_income_c", "family_size_c"),
          omit = c("region", "building"), 
          column.labels = c("Bivariate", "Socioecon", "Employment", "Full"),
          add.lines = list(
            c("Region Fixed Effects", "Yes", "Yes", "Yes", "Yes"),
            c("Building Fixed Effects", "Yes", "Yes", "Yes", "Yes")
          ),
          title = "Robustness Check: Male-Headed Households",
          dep.var.caption = "Dependent variable: Protein share (protein-consuming male-headed households)",
          dep.var.labels.include = FALSE,
          model.numbers = TRUE,
          # Updated for the plus sign
          star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          star.char = c("+", "*", "**", "***"),
          notes = c("Clustered standard errors (at Region level) reported in parentheses.", 
                    "Significance: + p<0.1; * p<0.05; ** p<0.01; *** p<0.001",
                    "(1): Bivariate", 
                    "(2): + Main interactions and socioeconomic indicators",
                    "(3): + Employment indicators",
                    "(4): + Household infrastructure"),
          notes.append = FALSE)

Robustness Check: Male-Headed Households
==================================================================================================================================
                                      Dependent variable: Protein share (protein-consuming male-headed households)                
                       -----------------------------------------------------------------------------------------------------------
                               Bivariate                  Socioecon                  Employment                    Full           
                                  (1)                        (2)                        (3)                        (4)            
----------------------------------------------------------------------------------------------------------------------------------
Constant                       0.2687***                  0.2258***                  0.2548***                  0.2443***         
                                (0.0009)                   (0.0160)                   (0.0135)                   (0.0140)         
water_source                   0.0210***                   0.0101**                  0.0123***                  0.0109***         
                                (0.0035)                   (0.0036)                   (0.0029)                   (0.0028)         
water_income_interac                                       -0.0089                    -0.0126+                   -0.0120          
                                                           (0.0069)                   (0.0075)                   (0.0076)         
water_family_interac                                       0.0070**                  0.0074***                  0.0073***         
                                                           (0.0023)                   (0.0022)                   (0.0022)         
log_income_c                                              0.0194***                  0.0312***                  0.0288***         
                                                           (0.0045)                   (0.0050)                   (0.0045)         
family_size_c                                             -0.0095***                 -0.0086***                 -0.0084***        
                                                           (0.0011)                   (0.0011)                   (0.0010)         
educ_head                                                   0.0001                     0.0003                     0.0001          
                                                           (0.0004)                   (0.0004)                   (0.0005)         
age_head                                                   0.0016**                   0.0015*                    0.0016*          
                                                           (0.0006)                   (0.0006)                   (0.0007)         
I(age_head2)                                              -0.00001*                  -0.00001*                  -0.00001*         
                                                          (0.00001)                  (0.00001)                  (0.00001)         
has_kids                                                    0.0027                     0.0032                     0.0031          
                                                           (0.0024)                   (0.0023)                   (0.0022)         
head_works                                                                            0.0083+                    0.0092+          
                                                                                      (0.0050)                   (0.0049)         
transportation                                                                      -0.000000***               -0.000000***       
                                                                                     (0.000000)                 (0.000000)        
food_outside                                                                         -0.0494***                 -0.0496***        
                                                                                      (0.0048)                   (0.0046)         
has_fridge                                                                           0.0119***                  0.0106***         
                                                                                      (0.0030)                   (0.0031)         
has_electricity                                                                       -0.0073                    -0.0099          
                                                                                      (0.0070)                   (0.0064)         
roof                                                                                                              0.0001          
                                                                                                                 (0.0052)         
wall                                                                                                              0.0023          
                                                                                                                 (0.0042)         
floor                                                                                                            0.0001+          
                                                                                                                 (0.0001)         
toilet                                                                                                            0.0078          
                                                                                                                 (0.0066)         
----------------------------------------------------------------------------------------------------------------------------------
Region Fixed Effects              Yes                        Yes                        Yes                        Yes            
Building Fixed Effects            Yes                        Yes                        Yes                        Yes            
Observations                     7,993                      7,993                      7,993                      7,993           
R2                               0.0656                     0.0928                     0.1373                     0.1395          
Adjusted R2                      0.0632                     0.0895                     0.1336                     0.1354          
Residual Std. Error        0.0939 (df = 7971)         0.0926 (df = 7963)         0.0903 (df = 7958)         0.0902 (df = 7954)    
F Statistic            26.6695*** (df = 21; 7971) 28.0951*** (df = 29; 7963) 37.2587*** (df = 34; 7958) 33.9386*** (df = 38; 7954)
==================================================================================================================================
Note:                                                         Clustered standard errors (at Region level) reported in parentheses.
                                                                           Significance: + p<0.1; * p<0.05; ** p<0.01; *** p<0.001
                                                                                                                    (1): Bivariate
                                                                             (2): + Main interactions and socioeconomic indicators
                                                                                                      (3): + Employment indicators
                                                                                                   (4): + Household infrastructure

SUBSAMPLE ANALYSIS: EARLY-STAGE (BELOW 35); MID-STAGE (35-64); ELDERLY/RETIRED (65 AND OVER)

# early-stage (below 35)
apis_subsample_below_35 <- apis_final %>%  
  filter(age_head < 35)

subsample_below_35 <- lm(protein_share ~ 
    water_source + 
    water_income_interac +
    water_family_interac + 
    log_income_c +
    family_size_c +
    # control vec. 1
    educ_head + 
    age_head +
    I(age_head^2) +
    has_kids +
    # control vec. 2
    head_works +
    female_head +
    work_female_interac +
    # control vec. 3
    has_fridge + 
    has_electricity +
    roof + 
    wall + 
    floor +
    toilet + 
    # control vec. 4
    transportation +
    food_outside +
    # fixed effects
    region + 
    building,
  data = apis_subsample_below_35
)

summary(subsample_below_35)

Call:
lm(formula = protein_share ~ water_source + water_income_interac + 
    water_family_interac + log_income_c + family_size_c + educ_head + 
    age_head + I(age_head^2) + has_kids + head_works + female_head + 
    work_female_interac + has_fridge + has_electricity + roof + 
    wall + floor + toilet + transportation + food_outside + region + 
    building, data = apis_subsample_below_35)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.26544 -0.06349 -0.00706  0.05502  0.34949 

Coefficients:
                       Estimate Std. Error t value Pr(>|t|)    
(Intercept)           2.536e-01  8.607e-02   2.946 0.003255 ** 
water_source          1.340e-02  5.530e-03   2.423 0.015469 *  
water_income_interac -1.679e-02  7.440e-03  -2.257 0.024122 *  
water_family_interac  1.336e-02  2.616e-03   5.106 3.59e-07 ***
log_income_c          3.404e-02  5.972e-03   5.700 1.37e-08 ***
family_size_c        -9.595e-03  1.950e-03  -4.921 9.31e-07 ***
educ_head            -5.776e-04  6.390e-04  -0.904 0.366121    
age_head              2.982e-03  6.270e-03   0.476 0.634392    
I(age_head^2)        -3.269e-05  1.137e-04  -0.287 0.773793    
has_kids              6.375e-03  5.081e-03   1.255 0.209760    
head_works            9.740e-03  1.183e-02   0.824 0.410266    
female_head           7.176e-03  1.482e-02   0.484 0.628292    
work_female_interac  -2.399e-02  1.676e-02  -1.432 0.152434    
has_fridge            2.053e-03  6.083e-03   0.337 0.735805    
has_electricity      -1.075e-02  7.114e-03  -1.511 0.130956    
roof                  2.864e-03  6.500e-03   0.441 0.659468    
wall                 -6.788e-03  5.723e-03  -1.186 0.235746    
floor                 2.411e-04  7.496e-05   3.216 0.001320 ** 
toilet                4.330e-03  6.167e-03   0.702 0.482694    
transportation       -3.943e-07  3.898e-07  -1.012 0.311845    
food_outside         -6.372e-02  5.310e-03 -11.999  < 2e-16 ***
region2               4.232e-03  1.648e-02   0.257 0.797338    
region3              -1.680e-03  1.417e-02  -0.119 0.905610    
region4              -3.028e-02  1.525e-02  -1.986 0.047160 *  
region5              -4.254e-02  1.525e-02  -2.790 0.005324 ** 
region6              -4.331e-02  1.471e-02  -2.943 0.003287 ** 
region7              -5.565e-02  1.495e-02  -3.722 0.000203 ***
region8              -4.508e-02  1.485e-02  -3.036 0.002427 ** 
region9              -6.127e-02  1.701e-02  -3.602 0.000323 ***
region10             -8.605e-02  1.467e-02  -5.867 5.16e-09 ***
region11             -1.727e-02  1.506e-02  -1.147 0.251523    
region12             -6.079e-02  1.522e-02  -3.993 6.75e-05 ***
region13             -7.822e-02  1.388e-02  -5.637 1.97e-08 ***
region14             -5.986e-02  1.493e-02  -4.010 6.28e-05 ***
region15             -7.988e-02  1.504e-02  -5.313 1.20e-07 ***
region16             -4.855e-02  1.531e-02  -3.171 0.001544 ** 
region17             -5.186e-02  1.492e-02  -3.475 0.000521 ***
building2            -2.563e-03  1.316e-02  -0.195 0.845577    
building3            -2.245e-03  8.713e-03  -0.258 0.796729    
building4             2.329e-02  2.398e-02   0.971 0.331517    
building5             2.116e-01  9.200e-02   2.301 0.021513 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.09073 on 2047 degrees of freedom
Multiple R-squared:  0.1805,    Adjusted R-squared:  0.1645 
F-statistic: 11.27 on 40 and 2047 DF,  p-value: < 2.2e-16
print(coeftest(subsample_below_35, 
               vcov = vcovCL(subsample_below_35, cluster = ~region)))

t test of coefficients:

                        Estimate  Std. Error  t value  Pr(>|t|)    
(Intercept)           2.5357e-01  6.2805e-02   4.0374 5.604e-05 ***
water_source          1.3401e-02  6.6454e-03   2.0166  0.043867 *  
water_income_interac -1.6790e-02  1.3378e-02  -1.2551  0.209590    
water_family_interac  1.3359e-02  5.1064e-03   2.6160  0.008961 ** 
log_income_c          3.4044e-02  7.5266e-03   4.5232 6.441e-06 ***
family_size_c        -9.5949e-03  2.1906e-03  -4.3801 1.246e-05 ***
educ_head            -5.7761e-04  9.2645e-04  -0.6235  0.533048    
age_head              2.9820e-03  4.6737e-03   0.6380  0.523518    
I(age_head^2)        -3.2686e-05  8.3832e-05  -0.3899  0.696649    
has_kids              6.3748e-03  5.1297e-03   1.2427  0.214115    
head_works            9.7401e-03  1.2216e-02   0.7973  0.425354    
female_head           7.1764e-03  1.6342e-02   0.4391  0.660613    
work_female_interac  -2.3990e-02  1.8810e-02  -1.2754  0.202316    
has_fridge            2.0527e-03  4.1549e-03   0.4940  0.621326    
has_electricity      -1.0749e-02  1.2215e-02  -0.8800  0.378987    
roof                  2.8645e-03  7.6139e-03   0.3762  0.706798    
wall                 -6.7881e-03  5.6716e-03  -1.1969  0.231500    
floor                 2.4108e-04  1.0396e-04   2.3190  0.020491 *  
toilet                4.3299e-03  8.2460e-03   0.5251  0.599573    
transportation       -3.9431e-07  3.4215e-07  -1.1525  0.249264    
food_outside         -6.3721e-02  7.1690e-03  -8.8885 < 2.2e-16 ***
region2               4.2323e-03  2.9946e-03   1.4133  0.157721    
region3              -1.6799e-03  3.4480e-03  -0.4872  0.626170    
region4              -3.0278e-02  5.6537e-03  -5.3555 9.490e-08 ***
region5              -4.2543e-02  5.7344e-03  -7.4189 1.721e-13 ***
region6              -4.3305e-02  4.1176e-03 -10.5170 < 2.2e-16 ***
region7              -5.5646e-02  4.7320e-03 -11.7596 < 2.2e-16 ***
region8              -4.5083e-02  4.8592e-03  -9.2778 < 2.2e-16 ***
region9              -6.1272e-02  5.3895e-03 -11.3687 < 2.2e-16 ***
region10             -8.6047e-02  4.2935e-03 -20.0411 < 2.2e-16 ***
region11             -1.7271e-02  5.4201e-03  -3.1864  0.001462 ** 
region12             -6.0794e-02  4.6562e-03 -13.0566 < 2.2e-16 ***
region13             -7.8222e-02  8.6820e-03  -9.0097 < 2.2e-16 ***
region14             -5.9856e-02  4.1735e-03 -14.3420 < 2.2e-16 ***
region15             -7.9875e-02  7.9477e-03 -10.0501 < 2.2e-16 ***
region16             -4.8546e-02  4.7669e-03 -10.1841 < 2.2e-16 ***
region17             -5.1865e-02  5.6716e-03  -9.1447 < 2.2e-16 ***
building2            -2.5634e-03  1.5174e-02  -0.1689  0.865866    
building3            -2.2445e-03  5.3903e-03  -0.4164  0.677163    
building4             2.3288e-02  2.5712e-02   0.9057  0.365197    
building5             2.1165e-01  2.4970e-02   8.4759 < 2.2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
vif(subsample_below_35)
                           GVIF Df GVIF^(1/(2*Df))
water_source           1.863179  1        1.364983
water_income_interac   2.336852  1        1.528677
water_family_interac   2.149937  1        1.466266
log_income_c           3.840384  1        1.959690
family_size_c          2.470122  1        1.571662
educ_head              1.675782  1        1.294520
age_head             161.895849  1       12.723830
I(age_head^2)        161.903175  1       12.724118
has_kids               1.600608  1        1.265151
head_works             2.510244  1        1.584375
female_head            6.193916  1        2.488758
work_female_interac    5.384839  1        2.320526
has_fridge             1.561013  1        1.249405
has_electricity        1.394828  1        1.181028
roof                   1.817525  1        1.348156
wall                   1.835716  1        1.354886
floor                  1.321589  1        1.149604
toilet                 1.664863  1        1.290296
transportation         1.521619  1        1.233539
food_outside           1.766256  1        1.329006
region                 3.741587 16        1.042097
building               1.366710  4        1.039823
bgtest(subsample_below_35)

    Breusch-Godfrey test for serial correlation of order up to 1

data:  subsample_below_35
LM test = 101.46, df = 1, p-value < 2.2e-16
bptest(subsample_below_35)

    studentized Breusch-Pagan test

data:  subsample_below_35
BP = 86.833, df = 40, p-value = 2.586e-05
resettest(subsample_below_35, 
          power = 2:3, 
          type = "fitted", 
          vcov = function(x) vcovCL(x, cluster = ~apis_subsample_below_35$region))

    RESET test

data:  subsample_below_35
RESET = 1.1849, df1 = 2, df2 = 2047, p-value = 0.306
hist(residuals(subsample_below_35), main="Histogram of Residuals", breaks=50)

#----------------------------------------------------------------------------------#

# mid-stage (35-64)
apis_subsample_35_64 <- apis_final %>%  
  filter(age_head >= 35 & age_head < 65)

subsample_35_64 <- lm(protein_share ~ 
    water_source + 
    water_income_interac +
    water_family_interac + 
    log_income_c +
    family_size_c +
    # control vec. 1
    educ_head + 
    age_head +
    I(age_head^2) +
    has_kids +
    # control vec. 2
    head_works +
    female_head +
    work_female_interac +
    # control vec. 3
    has_fridge + 
    has_electricity +
    roof + 
    wall + 
    floor +
    toilet + 
    # control vec. 4
    transportation +
    food_outside +
    # fixed effects
    region + 
    building,
  data = apis_subsample_35_64
)

summary(subsample_35_64)

Call:
lm(formula = protein_share ~ water_source + water_income_interac + 
    water_family_interac + log_income_c + family_size_c + educ_head + 
    age_head + I(age_head^2) + has_kids + head_works + female_head + 
    work_female_interac + has_fridge + has_electricity + roof + 
    wall + floor + toilet + transportation + food_outside + region + 
    building, data = apis_subsample_35_64)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.28561 -0.06356 -0.00664  0.05634  0.43232 

Coefficients:
                       Estimate Std. Error t value Pr(>|t|)    
(Intercept)           2.479e-01  4.189e-02   5.917 3.44e-09 ***
water_source          7.923e-03  2.860e-03   2.770 0.005618 ** 
water_income_interac -1.397e-02  3.604e-03  -3.875 0.000108 ***
water_family_interac  7.675e-03  1.077e-03   7.125 1.15e-12 ***
log_income_c          2.962e-02  3.063e-03   9.669  < 2e-16 ***
family_size_c        -8.480e-03  7.932e-04 -10.691  < 2e-16 ***
educ_head             3.029e-04  3.294e-04   0.919 0.357899    
age_head              1.123e-03  1.699e-03   0.661 0.508714    
I(age_head^2)        -1.018e-05  1.725e-05  -0.590 0.555172    
has_kids              2.343e-03  2.603e-03   0.900 0.368006    
head_works            4.780e-03  5.003e-03   0.955 0.339378    
female_head           2.264e-03  6.762e-03   0.335 0.737748    
work_female_interac  -1.689e-02  7.575e-03  -2.230 0.025793 *  
has_fridge            1.465e-02  2.978e-03   4.919 8.90e-07 ***
has_electricity      -8.357e-03  4.627e-03  -1.806 0.070938 .  
roof                  1.420e-03  3.967e-03   0.358 0.720360    
wall                  5.637e-03  3.358e-03   1.679 0.093259 .  
floor                 8.621e-05  2.995e-05   2.878 0.004011 ** 
toilet                8.785e-03  3.688e-03   2.382 0.017251 *  
transportation       -4.074e-07  1.130e-07  -3.604 0.000316 ***
food_outside         -4.818e-02  2.818e-03 -17.098  < 2e-16 ***
region2               3.384e-03  7.866e-03   0.430 0.667085    
region3               1.070e-02  6.950e-03   1.539 0.123781    
region4              -1.297e-02  7.600e-03  -1.706 0.087979 .  
region5              -1.320e-02  7.655e-03  -1.724 0.084802 .  
region6              -1.465e-02  7.238e-03  -2.024 0.043037 *  
region7              -1.578e-02  7.622e-03  -2.071 0.038413 *  
region8              -2.250e-02  7.362e-03  -3.057 0.002248 ** 
region9              -3.866e-02  8.135e-03  -4.751 2.06e-06 ***
region10             -5.755e-02  7.460e-03  -7.714 1.40e-14 ***
region11              2.126e-02  7.610e-03   2.794 0.005227 ** 
region12             -4.044e-02  7.994e-03  -5.059 4.34e-07 ***
region13             -4.277e-02  6.802e-03  -6.288 3.42e-10 ***
region14             -4.597e-02  7.449e-03  -6.171 7.18e-10 ***
region15             -4.718e-02  8.244e-03  -5.723 1.10e-08 ***
region16             -2.802e-02  7.703e-03  -3.638 0.000277 ***
region17             -1.353e-02  7.644e-03  -1.770 0.076761 .  
building2            -7.751e-03  7.338e-03  -1.056 0.290867    
building3            -1.375e-02  5.699e-03  -2.413 0.015848 *  
building4             6.663e-03  2.405e-02   0.277 0.781732    
building5            -2.365e-01  8.991e-02  -2.630 0.008553 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.08958 on 6484 degrees of freedom
Multiple R-squared:  0.1454,    Adjusted R-squared:  0.1401 
F-statistic: 27.57 on 40 and 6484 DF,  p-value: < 2.2e-16
print(coeftest(subsample_35_64, 
               vcov = vcovCL(subsample_35_64, cluster = ~region)))

t test of coefficients:

                        Estimate  Std. Error  t value  Pr(>|t|)    
(Intercept)           2.4790e-01  4.9371e-02   5.0212 5.273e-07 ***
water_source          7.9232e-03  3.9073e-03   2.0278 0.0426202 *  
water_income_interac -1.3967e-02  6.9531e-03  -2.0087 0.0446104 *  
water_family_interac  7.6752e-03  1.5224e-03   5.0415 4.745e-07 ***
log_income_c          2.9617e-02  4.6358e-03   6.3889 1.787e-10 ***
family_size_c        -8.4802e-03  9.5006e-04  -8.9259 < 2.2e-16 ***
educ_head             3.0290e-04  3.8263e-04   0.7916 0.4286090    
age_head              1.1228e-03  2.1748e-03   0.5163 0.6056927    
I(age_head^2)        -1.0180e-05  2.2115e-05  -0.4603 0.6453020    
has_kids              2.3431e-03  2.5216e-03   0.9292 0.3528187    
head_works            4.7800e-03  5.7448e-03   0.8320 0.4054151    
female_head           2.2642e-03  6.0811e-03   0.3723 0.7096509    
work_female_interac  -1.6890e-02  7.4528e-03  -2.2663 0.0234670 *  
has_fridge            1.4651e-02  3.0189e-03   4.8531 1.244e-06 ***
has_electricity      -8.3573e-03  5.1536e-03  -1.6216 0.1049326    
roof                  1.4203e-03  5.3031e-03   0.2678 0.7888412    
wall                  5.6368e-03  5.0084e-03   1.1255 0.2604297    
floor                 8.6209e-05  4.8245e-05   1.7869 0.0740008 .  
toilet                8.7847e-03  5.8027e-03   1.5139 0.1301016    
transportation       -4.0736e-07  9.6683e-08  -4.2133 2.551e-05 ***
food_outside         -4.8182e-02  4.7817e-03 -10.0763 < 2.2e-16 ***
region2               3.3839e-03  1.6563e-03   2.0431 0.0410861 *  
region3               1.0698e-02  1.6009e-03   6.6827 2.540e-11 ***
region4              -1.2969e-02  2.1169e-03  -6.1263 9.525e-10 ***
region5              -1.3196e-02  2.5173e-03  -5.2420 1.638e-07 ***
region6              -1.4648e-02  2.1986e-03  -6.6622 2.919e-11 ***
region7              -1.5784e-02  2.4427e-03  -6.4620 1.108e-10 ***
region8              -2.2502e-02  2.9134e-03  -7.7236 1.302e-14 ***
region9              -3.8656e-02  3.2147e-03 -12.0245 < 2.2e-16 ***
region10             -5.7548e-02  2.5786e-03 -22.3176 < 2.2e-16 ***
region11              2.1260e-02  3.5261e-03   6.0294 1.737e-09 ***
region12             -4.0437e-02  3.3320e-03 -12.1360 < 2.2e-16 ***
region13             -4.2771e-02  4.5153e-03  -9.4725 < 2.2e-16 ***
region14             -4.5969e-02  2.3280e-03 -19.7458 < 2.2e-16 ***
region15             -4.7180e-02  6.0381e-03  -7.8136 6.439e-15 ***
region16             -2.8020e-02  2.6334e-03 -10.6400 < 2.2e-16 ***
region17             -1.3530e-02  2.9926e-03  -4.5211 6.261e-06 ***
building2            -7.7508e-03  6.4706e-03  -1.1978 0.2310236    
building3            -1.3751e-02  4.1676e-03  -3.2995 0.0009737 ***
building4             6.6626e-03  3.3526e-02   0.1987 0.8424815    
building5            -2.3648e-01  5.4317e-03 -43.5370 < 2.2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
vif(subsample_35_64)
                           GVIF Df GVIF^(1/(2*Df))
water_source           1.650600  1        1.284757
water_income_interac   3.123177  1        1.767251
water_family_interac   1.933504  1        1.390505
log_income_c           4.393101  1        2.095973
family_size_c          2.424382  1        1.557043
educ_head              1.650063  1        1.284548
age_head             167.045181  1       12.924596
I(age_head^2)        166.778062  1       12.914258
has_kids               1.369089  1        1.170081
head_works             1.987893  1        1.409927
female_head            5.521773  1        2.349845
work_female_interac    5.253559  1        2.292064
has_fridge             1.683295  1        1.297419
has_electricity        1.319160  1        1.148547
roof                   1.544416  1        1.242745
wall                   1.667012  1        1.291128
floor                  1.311287  1        1.145115
toilet                 1.504700  1        1.226662
transportation         1.400562  1        1.183453
food_outside           1.608977  1        1.268455
region                 2.662254 16        1.031072
building               1.195262  4        1.022546
bgtest(subsample_35_64)

    Breusch-Godfrey test for serial correlation of order up to 1

data:  subsample_35_64
LM test = 416.84, df = 1, p-value < 2.2e-16
bptest(subsample_35_64)

    studentized Breusch-Pagan test

data:  subsample_35_64
BP = 207.86, df = 40, p-value < 2.2e-16
resettest(subsample_35_64, 
          power = 2:3, 
          type = "fitted", 
          vcov = function(x) vcovCL(x, cluster = ~apis_subsample_35_64$region))

    RESET test

data:  subsample_35_64
RESET = 0.49144, df1 = 2, df2 = 6484, p-value = 0.6118
hist(residuals(subsample_35_64), main="Histogram of Residuals", breaks=50)

#----------------------------------------------------------------------------------#

# elderly/retired (65+)
apis_subsample_65_or_above <- apis_final %>%  
  filter(age_head >= 65)

subsample_65_or_above <- lm(protein_share ~ 
    water_source + 
    water_income_interac +
    water_family_interac + 
    log_income_c +
    family_size_c +
    # control vec. 1
    educ_head + 
    age_head +
    I(age_head^2) +
    has_kids +
    # control vec. 2
    head_works +
    female_head +
    work_female_interac +
    # control vec. 3
    has_fridge + 
    has_electricity +
    roof + 
    wall + 
    floor +
    toilet + 
    # control vec. 4
    transportation +
    food_outside +
    # fixed effects
    region + 
    building,
  data = apis_subsample_65_or_above
)

summary(subsample_65_or_above)

Call:
lm(formula = protein_share ~ water_source + water_income_interac + 
    water_family_interac + log_income_c + family_size_c + educ_head + 
    age_head + I(age_head^2) + has_kids + head_works + female_head + 
    work_female_interac + has_fridge + has_electricity + roof + 
    wall + floor + toilet + transportation + food_outside + region + 
    building, data = apis_subsample_65_or_above)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.21940 -0.06541 -0.00416  0.05660  0.47955 

Coefficients:
                       Estimate Std. Error t value Pr(>|t|)    
(Intercept)           5.919e-01  2.663e-01   2.222 0.026423 *  
water_source          9.785e-03  6.480e-03   1.510 0.131293    
water_income_interac -1.040e-02  7.574e-03  -1.373 0.169989    
water_family_interac  4.569e-03  2.643e-03   1.729 0.084124 .  
log_income_c          1.549e-02  6.280e-03   2.467 0.013754 *  
family_size_c        -4.033e-03  2.166e-03  -1.862 0.062860 .  
educ_head             6.435e-04  6.644e-04   0.969 0.332889    
age_head             -8.889e-03  7.026e-03  -1.265 0.206054    
I(age_head^2)         5.863e-05  4.631e-05   1.266 0.205705    
has_kids              3.958e-03  7.025e-03   0.563 0.573247    
head_works            1.214e-02  6.774e-03   1.792 0.073429 .  
female_head          -5.012e-03  6.771e-03  -0.740 0.459280    
work_female_interac  -5.166e-03  1.044e-02  -0.495 0.620660    
has_fridge            1.229e-02  6.664e-03   1.844 0.065454 .  
has_electricity      -4.003e-03  1.028e-02  -0.389 0.697062    
roof                  1.737e-02  9.487e-03   1.831 0.067295 .  
wall                 -4.708e-03  8.303e-03  -0.567 0.570815    
floor                -1.404e-06  5.710e-05  -0.025 0.980385    
toilet                9.125e-03  8.604e-03   1.061 0.289064    
transportation        2.874e-07  3.422e-07   0.840 0.401069    
food_outside         -3.994e-02  6.362e-03  -6.278 4.61e-10 ***
region2              -8.223e-03  1.555e-02  -0.529 0.596914    
region3               1.084e-02  1.438e-02   0.754 0.451063    
region4              -5.359e-03  1.645e-02  -0.326 0.744710    
region5              -2.005e-02  1.473e-02  -1.361 0.173833    
region6              -2.542e-02  1.392e-02  -1.827 0.067966 .  
region7              -1.941e-02  1.516e-02  -1.280 0.200593    
region8              -3.641e-02  1.448e-02  -2.515 0.012022 *  
region9              -2.286e-02  1.679e-02  -1.361 0.173588    
region10             -4.285e-02  1.466e-02  -2.924 0.003518 ** 
region11              1.166e-02  1.567e-02   0.744 0.456772    
region12             -6.472e-02  1.726e-02  -3.750 0.000184 ***
region13             -4.828e-02  1.416e-02  -3.410 0.000670 ***
region14             -5.468e-02  1.444e-02  -3.787 0.000160 ***
region15             -6.454e-02  1.908e-02  -3.382 0.000742 ***
region16             -3.486e-02  1.538e-02  -2.267 0.023543 *  
region17             -2.343e-02  1.540e-02  -1.521 0.128447    
building2             1.183e-04  2.201e-02   0.005 0.995712    
building3             2.035e-02  1.689e-02   1.205 0.228608    
building4            -7.277e-02  6.519e-02  -1.116 0.264468    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.09058 on 1341 degrees of freedom
Multiple R-squared:  0.1091,    Adjusted R-squared:  0.08323 
F-statistic: 4.213 on 39 and 1341 DF,  p-value: 4.163e-16
print(coeftest(subsample_65_or_above, 
               vcov = vcovCL(subsample_65_or_above, cluster = ~region)))

t test of coefficients:

                        Estimate  Std. Error  t value  Pr(>|t|)    
(Intercept)           5.9187e-01  2.7055e-01   2.1876 0.0288701 *  
water_source          9.7850e-03  7.7467e-03   1.2631 0.2067646    
water_income_interac -1.0399e-02  8.9397e-03  -1.1632 0.2449607    
water_family_interac  4.5685e-03  2.2825e-03   2.0016 0.0455294 *  
log_income_c          1.5493e-02  7.2973e-03   2.1230 0.0339325 *  
family_size_c        -4.0330e-03  2.2572e-03  -1.7867 0.0742081 .  
educ_head             6.4355e-04  7.5856e-04   0.8484 0.3963756    
age_head             -8.8886e-03  7.2268e-03  -1.2299 0.2189324    
I(age_head^2)         5.8630e-05  4.8630e-05   1.2056 0.2281661    
has_kids              3.9580e-03  7.7031e-03   0.5138 0.6074664    
head_works            1.2136e-02  7.1676e-03   1.6931 0.0906653 .  
female_head          -5.0122e-03  6.5274e-03  -0.7679 0.4426924    
work_female_interac  -5.1658e-03  1.0782e-02  -0.4791 0.6319287    
has_fridge            1.2285e-02  5.3567e-03   2.2934 0.0219784 *  
has_electricity      -4.0034e-03  1.2866e-02  -0.3112 0.7557182    
roof                  1.7373e-02  8.7047e-03   1.9958 0.0461552 *  
wall                 -4.7077e-03  7.3088e-03  -0.6441 0.5196108    
floor                -1.4042e-06  5.1158e-05  -0.0274 0.9781070    
toilet                9.1252e-03  1.3853e-02   0.6587 0.5101970    
transportation        2.8743e-07  3.6614e-07   0.7850 0.4325857    
food_outside         -3.9944e-02  5.4976e-03  -7.2658 6.272e-13 ***
region2              -8.2235e-03  1.8245e-03  -4.5071 7.143e-06 ***
region3               1.0841e-02  2.8535e-03   3.7993 0.0001516 ***
region4              -5.3592e-03  3.8540e-03  -1.3905 0.1645948    
region5              -2.0049e-02  2.7075e-03  -7.4048 2.312e-13 ***
region6              -2.5422e-02  2.8282e-03  -8.9889 < 2.2e-16 ***
region7              -1.9410e-02  3.4054e-03  -5.6998 1.473e-08 ***
region8              -3.6414e-02  2.5644e-03 -14.2002 < 2.2e-16 ***
region9              -2.2857e-02  3.5647e-03  -6.4121 1.985e-10 ***
region10             -4.2850e-02  2.5577e-03 -16.7533 < 2.2e-16 ***
region11              1.1664e-02  4.3090e-03   2.7069 0.0068773 ** 
region12             -6.4724e-02  2.9585e-03 -21.8774 < 2.2e-16 ***
region13             -4.8277e-02  6.0897e-03  -7.9277 4.663e-15 ***
region14             -5.4678e-02  1.9716e-03 -27.7334 < 2.2e-16 ***
region15             -6.4537e-02  9.6921e-03  -6.6587 4.018e-11 ***
region16             -3.4859e-02  2.7297e-03 -12.7701 < 2.2e-16 ***
region17             -2.3433e-02  2.9438e-03  -7.9602 3.632e-15 ***
building2             1.1832e-04  1.9486e-02   0.0061 0.9951562    
building3             2.0347e-02  1.9926e-02   1.0212 0.3073666    
building4            -7.2771e-02  5.9210e-02  -1.2290 0.2192777    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
vif(subsample_65_or_above)
                           GVIF Df GVIF^(1/(2*Df))
water_source           1.756690  1        1.325402
water_income_interac   3.386435  1        1.840227
water_family_interac   2.784816  1        1.668777
log_income_c           5.413928  1        2.326785
family_size_c          3.589774  1        1.894670
educ_head              1.678960  1        1.295747
age_head             330.786716  1       18.187543
I(age_head^2)        329.522195  1       18.152746
has_kids               1.398923  1        1.182761
head_works             1.913571  1        1.383319
female_head            1.850646  1        1.360385
work_female_interac    2.222844  1        1.490920
has_fridge             1.776258  1        1.332763
has_electricity        1.443764  1        1.201567
roof                   1.676710  1        1.294878
wall                   1.778699  1        1.333679
floor                  1.464843  1        1.210307
toilet                 1.637968  1        1.279831
transportation         1.597129  1        1.263776
food_outside           1.641623  1        1.281258
region                 2.903506 16        1.033871
building               1.214310  3        1.032892
bgtest(subsample_65_or_above)

    Breusch-Godfrey test for serial correlation of order up to 1

data:  subsample_65_or_above
LM test = 44.557, df = 1, p-value = 2.471e-11
bptest(subsample_65_or_above)

    studentized Breusch-Pagan test

data:  subsample_65_or_above
BP = 56.018, df = 39, p-value = 0.03796
resettest(subsample_65_or_above, 
          power = 2:3, 
          type = "fitted", 
          vcov = function(x) vcovCL(x, cluster = ~apis_subsample_65_or_above$region))

    RESET test

data:  subsample_65_or_above
RESET = 1.6071, df1 = 2, df2 = 1341, p-value = 0.2009
hist(residuals(subsample_65_or_above), main="Histogram of Residuals", breaks=50)

#----------------------------------------------------------------------------------#

# presenting the comparison
# early-stage (<35)
subsample_below_35 <- lm(protein_share ~ water_source + water_income_interac + water_family_interac + 
                           female_head + work_female_interac + log_income_c + educ_head + age_head + 
                           I(age_head^2) + family_size_c + has_kids + head_works + has_fridge + 
                           has_electricity + roof + wall + floor + toilet + transportation + 
                           food_outside + region + building, data = apis_subsample_below_35)

# mid-stage (35-64)
subsample_35_64 <- lm(protein_share ~ water_source + water_income_interac + water_family_interac + 
                        female_head + work_female_interac + log_income_c + educ_head + age_head + 
                        I(age_head^2) + family_size_c + has_kids + head_works + has_fridge + 
                        has_electricity + roof + wall + floor + toilet + transportation + 
                        food_outside + region + building, data = apis_subsample_35_64)

# senior-stage (65+)
subsample_above_65 <- lm(protein_share ~ water_source + water_income_interac + water_family_interac + 
                           female_head + work_female_interac + log_income_c + educ_head + age_head + 
                           I(age_head^2) + family_size_c + has_kids + head_works + has_fridge + 
                           has_electricity + roof + wall + floor + toilet + transportation + 
                           food_outside + region + building, data = apis_subsample_65_or_above)

# extract standard errors (robust/clustered)
se_below_35 <- coeftest(subsample_below_35, vcov = vcovCL(subsample_below_35, cluster = ~region))[, 2]
se_35_64    <- coeftest(subsample_35_64, vcov = vcovCL(subsample_35_64, cluster = ~region))[, 2]
se_above_65 <- coeftest(subsample_65_or_above, vcov = vcovCL(subsample_65_or_above, cluster = ~region))[, 2]

# generate the final table
stargazer(subsample_below_35, subsample_35_64, subsample_65_or_above,
          type = "text",
          se = list(se_below_35, se_35_64, se_above_65),
          intercept.bottom = FALSE, 
          digits = 4,
          no.space = TRUE,
          order = c("Constant", "water_source", "water_income_interac", "water_family_interac", 
                    "female_head", "work_female_interac"),
          omit = c("region", "building"),
          column.labels = c("Early (<35)", "Mid (35-64)", "Senior (65+)"),
          add.lines = list(
            c("Region Fixed Effects", "Yes", "Yes", "Yes"),
            c("Building Fixed Effects", "Yes", "Yes", "Yes")
          ),
          dep.var.caption = "Dependent variable: Protein share",
          dep.var.labels.include = FALSE,
          model.numbers = TRUE,
          star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          star.char = c("+", "*", "**", "***"),
          notes = c("Clustered standard errors (at Region level) reported in parentheses.",
                    "Significance: + p<0.1; * p<0.05; ** p<0.01; *** p<0.001"),
          notes.append = FALSE)

======================================================================================================
                                              Dependent variable: Protein share                       
                       -------------------------------------------------------------------------------
                              Early (<35)                Mid (35-64)               Senior (65+)       
                                  (1)                        (2)                        (3)           
------------------------------------------------------------------------------------------------------
Constant                       0.2536***                  0.2479***                   0.5919*         
                                (0.0628)                   (0.0494)                  (0.2706)         
water_source                    0.0134*                    0.0079*                    0.0098          
                                (0.0066)                   (0.0039)                  (0.0077)         
water_income_interac            -0.0168                    -0.0140*                   -0.0104         
                                (0.0134)                   (0.0070)                  (0.0089)         
water_family_interac            0.0134**                  0.0077***                   0.0046*         
                                (0.0051)                   (0.0015)                  (0.0023)         
female_head                      0.0072                     0.0023                    -0.0050         
                                (0.0163)                   (0.0061)                  (0.0065)         
work_female_interac             -0.0240                    -0.0169*                   -0.0052         
                                (0.0188)                   (0.0075)                  (0.0108)         
log_income_c                   0.0340***                  0.0296***                   0.0155*         
                                (0.0075)                   (0.0046)                  (0.0073)         
educ_head                       -0.0006                     0.0003                    0.0006          
                                (0.0009)                   (0.0004)                  (0.0008)         
age_head                         0.0030                     0.0011                    -0.0089         
                                (0.0047)                   (0.0022)                  (0.0072)         
I(age_head2)                    -0.00003                   -0.00001                   0.0001          
                                (0.0001)                  (0.00002)                  (0.00005)        
family_size_c                  -0.0096***                 -0.0085***                 -0.0040+         
                                (0.0022)                   (0.0010)                  (0.0023)         
has_kids                         0.0064                     0.0023                    0.0040          
                                (0.0051)                   (0.0025)                  (0.0077)         
head_works                       0.0097                     0.0048                    0.0121+         
                                (0.0122)                   (0.0057)                  (0.0072)         
has_fridge                       0.0021                   0.0147***                   0.0123*         
                                (0.0042)                   (0.0030)                  (0.0054)         
has_electricity                 -0.0107                    -0.0084                    -0.0040         
                                (0.0122)                   (0.0052)                  (0.0129)         
roof                             0.0029                     0.0014                    0.0174*         
                                (0.0076)                   (0.0053)                  (0.0087)         
wall                            -0.0068                     0.0056                    -0.0047         
                                (0.0057)                   (0.0050)                  (0.0073)         
floor                           0.0002*                    0.0001+                   -0.000001        
                                (0.0001)                  (0.00005)                  (0.0001)         
toilet                           0.0043                     0.0088                    0.0091          
                                (0.0082)                   (0.0058)                  (0.0139)         
transportation                 -0.000000                 -0.000000***                0.000000         
                               (0.000000)                 (0.000000)                (0.000000)        
food_outside                   -0.0637***                 -0.0482***                -0.0399***        
                                (0.0072)                   (0.0048)                  (0.0055)         
------------------------------------------------------------------------------------------------------
Region Fixed Effects              Yes                        Yes                        Yes           
Building Fixed Effects            Yes                        Yes                        Yes           
Observations                     2,088                      6,525                      1,381          
R2                               0.1805                     0.1454                    0.1091          
Adjusted R2                      0.1645                     0.1401                    0.0832          
Residual Std. Error        0.0907 (df = 2047)         0.0896 (df = 6484)        0.0906 (df = 1341)    
F Statistic            11.2737*** (df = 40; 2047) 27.5746*** (df = 40; 6484) 4.2126*** (df = 39; 1341)
======================================================================================================
Note:                             Clustered standard errors (at Region level) reported in parentheses.
                                               Significance: + p<0.1; * p<0.05; ** p<0.01; *** p<0.001
#----------------------------------------------------------------------------------#

# robustness test for early-stage (below 35)
y1 <- lm(protein_share ~ water_source + region + building, 
         data = apis_subsample_below_35)
y2 <- lm(protein_share ~ water_source + water_income_interac + water_family_interac + 
           log_income_c + family_size_c + educ_head + age_head + I(age_head^2) + has_kids + 
           region + building, data = apis_subsample_below_35)
y3 <- lm(protein_share ~ water_source + water_income_interac + water_family_interac + 
           log_income_c + family_size_c + educ_head + age_head + I(age_head^2) + has_kids + 
           head_works + female_head + work_female_interac + has_fridge + has_electricity +
           region + building, data = apis_subsample_below_35)
y4 <- subsample_below_35 # full model

se_y1 <- coeftest(y1, vcov = vcovCL(y1, cluster = ~region))[, 2]
se_y2 <- coeftest(y2, vcov = vcovCL(y2, cluster = ~region))[, 2]
se_y3 <- coeftest(y3, vcov = vcovCL(y3, cluster = ~region))[, 2]
se_y4 <- se_below_35

stargazer(y1, y2, y3, y4, 
          type = "text",
          se = list(se_y1, se_y2, se_y3, se_y4),
          intercept.bottom = FALSE,
          digits = 4, 
          no.space = TRUE, 
          order = c("Constant", "water_source", "water_income_interac", "water_family_interac",
                    "log_income_c", "family_size", "educ_head", "age_head", "I(age_head^2)", 
                    "has_kids", "head_works", "female_head", "work_female_interac",
                    "has_fridge", "has_electricity", "transportation", "food_outside",
                    "roof", "wall", "floor", "toilet"),
          omit = c("region", "building"), 
          column.labels = c("Bivariate", "Socioecon", "Time Poverty", "Full"),
          star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          star.char = c("+", "*", "**", "***"),
          dep.var.caption = "Dependent variable: Protein share (head below 35)",
          dep.var.labels.include = FALSE,
          notes = "+ p<0.1; * p<0.05; ** p<0.01; *** p<0.001",
          notes.append = FALSE)

=============================================================================================================================
                                                Dependent variable: Protein share (head below 35)                            
                     --------------------------------------------------------------------------------------------------------
                             Bivariate                 Socioecon               Time Poverty                   Full           
                                (1)                       (2)                       (3)                       (4)            
-----------------------------------------------------------------------------------------------------------------------------
Constant                     0.2817***                  0.1457+                   0.1555*                  0.2536***         
                             (0.0013)                  (0.0772)                  (0.0753)                   (0.0628)         
water_source                  0.0074                    0.0104                    0.0113                    0.0134*          
                             (0.0067)                  (0.0083)                  (0.0080)                   (0.0066)         
water_income_interac                                    -0.0114                   -0.0139                   -0.0168          
                                                       (0.0123)                  (0.0122)                   (0.0134)         
water_family_interac                                   0.0146**                  0.0141**                   0.0134**         
                                                       (0.0050)                  (0.0049)                   (0.0051)         
log_income_c                                           0.0185**                  0.0201**                  0.0340***         
                                                       (0.0065)                  (0.0067)                   (0.0075)         
family_size_c                                         -0.0085***                -0.0089***                 -0.0096***        
                                                       (0.0025)                  (0.0026)                   (0.0022)         
educ_head                                               -0.0011                   -0.0008                   -0.0006          
                                                       (0.0009)                  (0.0010)                   (0.0009)         
age_head                                                0.0093+                   0.0085                     0.0030          
                                                       (0.0056)                  (0.0057)                   (0.0047)         
I(age_head2)                                            -0.0001                   -0.0001                   -0.00003         
                                                       (0.0001)                  (0.0001)                   (0.0001)         
has_kids                                                0.0023                    0.0042                     0.0064          
                                                       (0.0054)                  (0.0055)                   (0.0051)         
head_works                                                                        0.0112                     0.0097          
                                                                                 (0.0121)                   (0.0122)         
female_head                                                                       0.0056                     0.0072          
                                                                                 (0.0160)                   (0.0163)         
work_female_interac                                                               -0.0266                   -0.0240          
                                                                                 (0.0178)                   (0.0188)         
has_fridge                                                                        0.0055                     0.0021          
                                                                                 (0.0042)                   (0.0042)         
has_electricity                                                                   -0.0133                   -0.0107          
                                                                                 (0.0149)                   (0.0122)         
transportation                                                                                             -0.000000         
                                                                                                           (0.000000)        
food_outside                                                                                               -0.0637***        
                                                                                                            (0.0072)         
roof                                                                                                         0.0029          
                                                                                                            (0.0076)         
wall                                                                                                        -0.0068          
                                                                                                            (0.0057)         
floor                                                                                                       0.0002*          
                                                                                                            (0.0001)         
toilet                                                                                                       0.0043          
                                                                                                            (0.0082)         
-----------------------------------------------------------------------------------------------------------------------------
Observations                   2,088                     2,088                     2,088                     2,088           
R2                            0.0908                    0.1115                    0.1167                     0.1805          
Adjusted R2                   0.0816                    0.0990                    0.1020                     0.1645          
Residual Std. Error     0.0951 (df = 2066)        0.0942 (df = 2058)        0.0941 (df = 2053)         0.0907 (df = 2047)    
F Statistic          9.8303*** (df = 21; 2066) 8.9084*** (df = 29; 2058) 7.9748*** (df = 34; 2053) 11.2737*** (df = 40; 2047)
=============================================================================================================================
Note:                                                                               + p<0.1; * p<0.05; ** p<0.01; *** p<0.001
# robustness test for mid-stage (35-64)
m1 <- lm(protein_share ~ water_source + region + building, 
         data = apis_subsample_35_64)
m2 <- lm(protein_share ~ water_source + water_income_interac + water_family_interac + 
           log_income_c + family_size_c + educ_head + age_head + I(age_head^2) + has_kids + 
           region + building, data = apis_subsample_35_64)
m3 <- lm(protein_share ~ water_source + water_income_interac + water_family_interac + 
           log_income_c + family_size_c + educ_head + age_head + I(age_head^2) + has_kids + 
           head_works + female_head + work_female_interac + has_fridge + has_electricity +
           region + building, data = apis_subsample_35_64)
m4 <- subsample_35_64 # full model

se_m1 <- coeftest(m1, vcov = vcovCL(m1, cluster = ~region))[, 2]
se_m2 <- coeftest(m2, vcov = vcovCL(m2, cluster = ~region))[, 2]
se_m3 <- coeftest(m3, vcov = vcovCL(m3, cluster = ~region))[, 2]
se_m4 <- se_35_64

stargazer(m1, m2, m3, m4, 
          type = "text",
          se = list(se_m1, se_m2, se_m3, se_m4),
          intercept.bottom = FALSE,
          digits = 4, 
          no.space = TRUE, 
          order = c("Constant", "water_source", "water_income_interac", "water_family_interac",
                    "log_income_c", "family_size", "educ_head", "age_head", "I(age_head^2)", 
                    "has_kids", "head_works", "female_head", "work_female_interac",
                    "has_fridge", "has_electricity", "transportation", "food_outside",
                    "roof", "wall", "floor", "toilet"),
          omit = c("region", "building"), 
          column.labels = c("Bivariate", "Socioecon", "Time Poverty", "Full"),
          star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          star.char = c("+", "*", "**", "***"),
          dep.var.caption = "Dependent variable: Protein share (head 35-64)",
          dep.var.labels.include = FALSE,
          notes = "+ p<0.1; * p<0.05; ** p<0.01; *** p<0.001",
          notes.append = FALSE)

================================================================================================================================
                                                   Dependent variable: Protein share (head 35-64)                               
                     -----------------------------------------------------------------------------------------------------------
                             Bivariate                  Socioecon                 Time Poverty                   Full           
                                (1)                        (2)                        (3)                        (4)            
--------------------------------------------------------------------------------------------------------------------------------
Constant                     0.2667***                  0.2476***                  0.2413***                  0.2479***         
                              (0.0013)                   (0.0515)                   (0.0518)                   (0.0494)         
water_source                 0.0224***                   0.0092*                    0.0077+                    0.0079*          
                              (0.0046)                   (0.0045)                   (0.0043)                   (0.0039)         
water_income_interac                                     -0.0119+                   -0.0132+                   -0.0140*         
                                                         (0.0062)                   (0.0069)                   (0.0070)         
water_family_interac                                    0.0074***                  0.0073***                  0.0077***         
                                                         (0.0016)                   (0.0016)                   (0.0015)         
log_income_c                                            0.0223***                  0.0187***                  0.0296***         
                                                         (0.0041)                   (0.0052)                   (0.0046)         
family_size_c                                           -0.0096***                 -0.0094***                 -0.0085***        
                                                         (0.0010)                   (0.0010)                   (0.0010)         
educ_head                                                 0.0004                     0.0003                     0.0003          
                                                         (0.0004)                   (0.0004)                   (0.0004)         
age_head                                                  0.0004                     0.0005                     0.0011          
                                                         (0.0022)                   (0.0021)                   (0.0022)         
I(age_head2)                                            -0.000001                  -0.000002                   -0.00001         
                                                        (0.00002)                  (0.00002)                  (0.00002)         
has_kids                                                  0.0021                     0.0018                     0.0023          
                                                         (0.0026)                   (0.0026)                   (0.0025)         
head_works                                                                           0.0051                     0.0048          
                                                                                    (0.0057)                   (0.0057)         
female_head                                                                          0.0035                     0.0023          
                                                                                    (0.0061)                   (0.0061)         
work_female_interac                                                                 -0.0194*                   -0.0169*         
                                                                                    (0.0077)                   (0.0075)         
has_fridge                                                                         0.0153***                  0.0147***         
                                                                                    (0.0030)                   (0.0030)         
has_electricity                                                                     -0.0050                    -0.0084          
                                                                                    (0.0042)                   (0.0052)         
transportation                                                                                               -0.000000***       
                                                                                                              (0.000000)        
food_outside                                                                                                  -0.0482***        
                                                                                                               (0.0048)         
roof                                                                                                            0.0014          
                                                                                                               (0.0053)         
wall                                                                                                            0.0056          
                                                                                                               (0.0050)         
floor                                                                                                          0.0001+          
                                                                                                              (0.00005)         
toilet                                                                                                          0.0088          
                                                                                                               (0.0058)         
--------------------------------------------------------------------------------------------------------------------------------
Observations                   6,525                      6,525                      6,525                      6,525           
R2                             0.0650                     0.0963                     0.1026                     0.1454          
Adjusted R2                    0.0619                     0.0923                     0.0979                     0.1401          
Residual Std. Error      0.0936 (df = 6503)         0.0920 (df = 6495)         0.0918 (df = 6490)         0.0896 (df = 6484)    
F Statistic          21.5154*** (df = 21; 6503) 23.8700*** (df = 29; 6495) 21.8311*** (df = 34; 6490) 27.5746*** (df = 40; 6484)
================================================================================================================================
Note:                                                                                  + p<0.1; * p<0.05; ** p<0.01; *** p<0.001
# robustness test for senior-stage (65+)
s1 <- lm(protein_share ~ water_source + region + building, 
         data = apis_subsample_65_or_above)
s2 <- lm(protein_share ~ water_source + water_income_interac + water_family_interac + 
           log_income_c + family_size_c + educ_head + age_head + I(age_head^2) + has_kids + 
           region + building, data = apis_subsample_65_or_above)
s3 <- lm(protein_share ~ water_source + water_income_interac + water_family_interac + 
           log_income_c + family_size_c + educ_head + age_head + I(age_head^2) + has_kids + 
           head_works + female_head + work_female_interac + has_fridge + has_electricity +
           region + building, data = apis_subsample_65_or_above)
s4 <- subsample_above_65 # full model

se_s1 <- coeftest(s1, vcov = vcovCL(s1, cluster = ~region))[, 2]
se_s2 <- coeftest(s2, vcov = vcovCL(s2, cluster = ~region))[, 2]
se_s3 <- coeftest(s3, vcov = vcovCL(s3, cluster = ~region))[, 2]
se_s4 <- se_above_65

stargazer(s1, s2, s3, s4, 
          type = "text",
          se = list(se_s1, se_s2, se_s3, se_s4),
          intercept.bottom = FALSE,
          digits = 4, 
          no.space = TRUE, 
          order = c("Constant", "water_source", "water_income_interac", "water_family_interac",
                    "log_income_c", "family_size", "educ_head", "age_head", "I(age_head^2)", 
                    "has_kids", "head_works", "female_head", "work_female_interac",
                    "has_fridge", "has_electricity", "transportation", "food_outside",
                    "roof", "wall", "floor", "toilet"),
          omit = c("region", "building"), 
          column.labels = c("Bivariate", "Socioecon", "Time Poverty", "Full"),
          star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          star.char = c("+", "*", "**", "***"),
          dep.var.caption = "Dependent variable: Protein share (head 65+)",
          dep.var.labels.include = FALSE,
          notes = "+ p<0.1; * p<0.05; ** p<0.01; *** p<0.001",
          notes.append = FALSE)

============================================================================================================================
                                                  Dependent variable: Protein share (head 65+)                              
                     -------------------------------------------------------------------------------------------------------
                             Bivariate                 Socioecon               Time Poverty                  Full           
                                (1)                       (2)                       (3)                       (4)           
----------------------------------------------------------------------------------------------------------------------------
Constant                     0.2694***                  0.6413*                   0.5733*                   0.5919*         
                             (0.0016)                  (0.2823)                  (0.2719)                  (0.2706)         
water_source                  0.0130+                   0.0060                    0.0065                    0.0098          
                             (0.0067)                  (0.0077)                  (0.0079)                  (0.0077)         
water_income_interac                                    -0.0101                   -0.0103                   -0.0104         
                                                       (0.0081)                  (0.0076)                  (0.0089)         
water_family_interac                                    0.0034                    0.0035                    0.0046*         
                                                       (0.0022)                  (0.0022)                  (0.0023)         
log_income_c                                           0.0155**                   0.0112+                   0.0155*         
                                                       (0.0058)                  (0.0063)                  (0.0073)         
family_size_c                                          -0.0045*                  -0.0046*                  -0.0040+         
                                                       (0.0021)                  (0.0022)                  (0.0023)         
educ_head                                               0.0006                    0.0006                    0.0006          
                                                       (0.0008)                  (0.0008)                  (0.0008)         
age_head                                                -0.0097                   -0.0086                   -0.0089         
                                                       (0.0075)                  (0.0072)                  (0.0072)         
I(age_head2)                                            0.0001                    0.0001                    0.0001          
                                                       (0.0001)                  (0.00005)                 (0.00005)        
has_kids                                                0.00005                   0.0021                    0.0040          
                                                       (0.0071)                  (0.0070)                  (0.0077)         
head_works                                                                        0.0118                    0.0121+         
                                                                                 (0.0074)                  (0.0072)         
female_head                                                                       -0.0079                   -0.0050         
                                                                                 (0.0065)                  (0.0065)         
work_female_interac                                                               -0.0020                   -0.0052         
                                                                                 (0.0110)                  (0.0108)         
has_fridge                                                                        0.0130*                   0.0123*         
                                                                                 (0.0056)                  (0.0054)         
has_electricity                                                                   0.0043                    -0.0040         
                                                                                 (0.0120)                  (0.0129)         
transportation                                                                                             0.000000         
                                                                                                          (0.000000)        
food_outside                                                                                              -0.0399***        
                                                                                                           (0.0055)         
roof                                                                                                        0.0174*         
                                                                                                           (0.0087)         
wall                                                                                                        -0.0047         
                                                                                                           (0.0073)         
floor                                                                                                      -0.000001        
                                                                                                           (0.0001)         
toilet                                                                                                      0.0091          
                                                                                                           (0.0139)         
----------------------------------------------------------------------------------------------------------------------------
Observations                   1,381                     1,381                     1,381                     1,381          
R2                            0.0610                    0.0726                    0.0800                    0.1091          
Adjusted R2                   0.0472                    0.0534                    0.0574                    0.0832          
Residual Std. Error     0.0923 (df = 1360)        0.0920 (df = 1352)        0.0918 (df = 1347)        0.0906 (df = 1341)    
F Statistic          4.4200*** (df = 20; 1360) 3.7793*** (df = 28; 1352) 3.5488*** (df = 33; 1347) 4.2126*** (df = 39; 1341)
============================================================================================================================
Note:                                                                              + p<0.1; * p<0.05; ** p<0.01; *** p<0.001

SUMMARY OF ASSUMPTION DIAGNOSTICS

# primary analysis
vif(bennett_ols_protein)
                          GVIF Df GVIF^(1/(2*Df))
water_source          1.626700  1        1.275422
water_income_interac  3.221646  1        1.794895
water_family_interac  2.006816  1        1.416621
log_income_c          4.511720  1        2.124081
family_size_c         2.588251  1        1.608804
educ_head             1.694844  1        1.301862
age_head             38.545977  1        6.208541
I(age_head^2)        40.176003  1        6.338454
has_kids              1.345954  1        1.160153
head_works            2.204835  1        1.484869
female_head           3.972155  1        1.993027
work_female_interac   3.726701  1        1.930467
has_fridge            1.779105  1        1.333831
has_electricity       1.347734  1        1.160919
roof                  1.621014  1        1.273191
wall                  1.718226  1        1.310811
floor                 1.383571  1        1.176253
toilet                1.547047  1        1.243804
aircon                1.648638  1        1.283993
telephone             1.345900  1        1.160129
computer              1.770477  1        1.330593
transportation        1.427557  1        1.194804
food_outside          1.630232  1        1.276805
region                2.743580 16        1.032042
building              1.209854  4        1.024098
bgtest(bennett_ols_protein)

    Breusch-Godfrey test for serial correlation of order up to 1

data:  bennett_ols_protein
LM test = 707.67, df = 1, p-value < 2.2e-16
bptest(bennett_ols_protein)

    studentized Breusch-Pagan test

data:  bennett_ols_protein
BP = 292.63, df = 43, p-value < 2.2e-16
resettest(bennett_ols_protein, 
          power = 2:3, 
          type = "fitted", 
          vcov = function(x) vcovCL(x, cluster = ~apis_final$region))

    RESET test

data:  bennett_ols_protein
RESET = 0.45302, df1 = 2, df2 = 9950, p-value = 0.6357
hist(residuals(bennett_ols_protein), main="Histogram of Residuals", breaks=50)

# falsification test
vif(bennett_ols_bread)
                          GVIF Df GVIF^(1/(2*Df))
water_source          1.623243  1        1.274065
water_income_interac  2.969923  1        1.723347
water_family_interac  1.991939  1        1.411361
log_income_c          4.429597  1        2.104661
family_size_c         2.584313  1        1.607580
educ_head             1.669494  1        1.292089
age_head             38.521056  1        6.206533
I(age_head^2)        40.128468  1        6.334703
has_kids              1.344721  1        1.159621
head_works            2.197742  1        1.482478
female_head           3.969691  1        1.992408
work_female_interac   3.725754  1        1.930221
has_fridge            1.690478  1        1.300184
has_electricity       1.345908  1        1.160133
roof                  1.620328  1        1.272921
wall                  1.717634  1        1.310585
floor                 1.353128  1        1.163240
toilet                1.546075  1        1.243413
transportation        1.418134  1        1.190854
food_outside          1.624596  1        1.274596
region                2.625678 16        1.030626
building              1.206677  4        1.023762
bgtest(bennett_ols_bread)

    Breusch-Godfrey test for serial correlation of order up to 1

data:  bennett_ols_bread
LM test = 571.12, df = 1, p-value < 2.2e-16
bptest(bennett_ols_bread)

    studentized Breusch-Pagan test

data:  bennett_ols_bread
BP = 651.39, df = 40, p-value < 2.2e-16
resettest(bennett_ols_bread, 
          power = 2:3, 
          type = "fitted", 
          vcov = function(x) vcovCL(x, cluster = ~apis_final$region))

    RESET test

data:  bennett_ols_bread
RESET = 34.925, df1 = 2, df2 = 9953, p-value = 7.678e-16
hist(residuals(bennett_ols_bread), main="Histogram of Residuals", breaks=50)

# female subsample
vif(subsample_female)
                         GVIF Df GVIF^(1/(2*Df))
water_source         1.691807  1        1.300695
water_income_interac 3.554955  1        1.885459
water_family_interac 2.942803  1        1.715460
log_income_c         5.582345  1        2.362699
family_size_c        3.380519  1        1.838619
educ_head            1.834611  1        1.354478
age_head             1.495928  1        1.223082
has_kids             1.340753  1        1.157909
head_works           1.131093  1        1.063528
has_fridge           1.673408  1        1.293603
has_electricity      1.345387  1        1.159908
roof                 1.612160  1        1.269709
wall                 1.658201  1        1.287712
floor                1.378329  1        1.174022
toilet               1.410944  1        1.187832
transportation       1.600071  1        1.264939
food_outside         1.609005  1        1.268466
region               2.499325 16        1.029039
building             1.223044  3        1.034127
bgtest(subsample_female)

    Breusch-Godfrey test for serial correlation of order up to 1

data:  subsample_female
LM test = 60.681, df = 1, p-value = 6.711e-15
bptest(subsample_female)

    studentized Breusch-Pagan test

data:  subsample_female
BP = 71.792, df = 36, p-value = 0.0003603
resettest(subsample_female, 
          power = 2:3, 
          type = "fitted", 
          vcov = function(x) vcovCL(x, cluster = ~apis_subsample_female$region))

    RESET test

data:  subsample_female
RESET = 2.8156, df1 = 2, df2 = 1964, p-value = 0.06011
hist(residuals(subsample_female), main="Histogram of Residuals", breaks=50)

# male subsample
vif(subsample_male)
                          GVIF Df GVIF^(1/(2*Df))
water_source          1.616794  1        1.271532
water_income_interac  2.852839  1        1.689035
water_family_interac  1.866466  1        1.366187
log_income_c          4.214313  1        2.052879
family_size_c         2.412280  1        1.553152
educ_head             1.631705  1        1.277382
age_head             40.039873  1        6.327707
I(age_head^2)        41.610239  1        6.450600
has_kids              1.347899  1        1.160991
head_works            1.337867  1        1.156662
has_fridge            1.674965  1        1.294204
has_electricity       1.348411  1        1.161211
roof                  1.615900  1        1.271181
wall                  1.724114  1        1.313055
floor                 1.345329  1        1.159883
toilet                1.567900  1        1.252158
transportation        1.400192  1        1.183297
food_outside          1.635357  1        1.278811
region                2.738622 16        1.031984
building              1.211424  4        1.024264
bgtest(subsample_male)

    Breusch-Godfrey test for serial correlation of order up to 1

data:  subsample_male
LM test = 573, df = 1, p-value < 2.2e-16
bptest(subsample_male)

    studentized Breusch-Pagan test

data:  subsample_male
BP = 248.9, df = 38, p-value < 2.2e-16
resettest(subsample_male, 
          power = 2:3, 
          type = "fitted", 
          vcov = function(x) vcovCL(x, cluster = ~apis_subsample_male$region))

    RESET test

data:  subsample_male
RESET = 0.71522, df1 = 2, df2 = 7954, p-value = 0.4891
hist(residuals(subsample_male), main="Histogram of Residuals", breaks=50)

# below 35 subsample
vif(subsample_below_35)
                           GVIF Df GVIF^(1/(2*Df))
water_source           1.863179  1        1.364983
water_income_interac   2.336852  1        1.528677
water_family_interac   2.149937  1        1.466266
female_head            6.193916  1        2.488758
work_female_interac    5.384839  1        2.320526
log_income_c           3.840384  1        1.959690
educ_head              1.675782  1        1.294520
age_head             161.895849  1       12.723830
I(age_head^2)        161.903175  1       12.724118
family_size_c          2.470122  1        1.571662
has_kids               1.600608  1        1.265151
head_works             2.510244  1        1.584375
has_fridge             1.561013  1        1.249405
has_electricity        1.394828  1        1.181028
roof                   1.817525  1        1.348156
wall                   1.835716  1        1.354886
floor                  1.321589  1        1.149604
toilet                 1.664863  1        1.290296
transportation         1.521619  1        1.233539
food_outside           1.766256  1        1.329006
region                 3.741587 16        1.042097
building               1.366710  4        1.039823
bgtest(subsample_below_35)

    Breusch-Godfrey test for serial correlation of order up to 1

data:  subsample_below_35
LM test = 101.46, df = 1, p-value < 2.2e-16
bptest(subsample_below_35)

    studentized Breusch-Pagan test

data:  subsample_below_35
BP = 86.833, df = 40, p-value = 2.586e-05
resettest(subsample_below_35, 
          power = 2:3, 
          type = "fitted", 
          vcov = function(x) vcovCL(x, cluster = ~apis_subsample_below_35$region))

    RESET test

data:  subsample_below_35
RESET = 1.1849, df1 = 2, df2 = 2047, p-value = 0.306
hist(residuals(subsample_below_35), main="Histogram of Residuals", breaks=50)

# 35-64 subsample
vif(subsample_35_64)
                           GVIF Df GVIF^(1/(2*Df))
water_source           1.650600  1        1.284757
water_income_interac   3.123177  1        1.767251
water_family_interac   1.933504  1        1.390505
female_head            5.521773  1        2.349845
work_female_interac    5.253559  1        2.292064
log_income_c           4.393101  1        2.095973
educ_head              1.650063  1        1.284548
age_head             167.045181  1       12.924596
I(age_head^2)        166.778062  1       12.914258
family_size_c          2.424382  1        1.557043
has_kids               1.369089  1        1.170081
head_works             1.987893  1        1.409927
has_fridge             1.683295  1        1.297419
has_electricity        1.319160  1        1.148547
roof                   1.544416  1        1.242745
wall                   1.667012  1        1.291128
floor                  1.311287  1        1.145115
toilet                 1.504700  1        1.226662
transportation         1.400562  1        1.183453
food_outside           1.608977  1        1.268455
region                 2.662254 16        1.031072
building               1.195262  4        1.022546
bgtest(subsample_35_64)

    Breusch-Godfrey test for serial correlation of order up to 1

data:  subsample_35_64
LM test = 416.84, df = 1, p-value < 2.2e-16
bptest(subsample_35_64)

    studentized Breusch-Pagan test

data:  subsample_35_64
BP = 207.86, df = 40, p-value < 2.2e-16
resettest(subsample_35_64, 
          power = 2:3, 
          type = "fitted", 
          vcov = function(x) vcovCL(x, cluster = ~apis_subsample_35_64$region))

    RESET test

data:  subsample_35_64
RESET = 0.49144, df1 = 2, df2 = 6484, p-value = 0.6118
hist(residuals(subsample_35_64), main="Histogram of Residuals", breaks=50)

# 65+ subsample
vif(subsample_65_or_above)
                           GVIF Df GVIF^(1/(2*Df))
water_source           1.756690  1        1.325402
water_income_interac   3.386435  1        1.840227
water_family_interac   2.784816  1        1.668777
log_income_c           5.413928  1        2.326785
family_size_c          3.589774  1        1.894670
educ_head              1.678960  1        1.295747
age_head             330.786716  1       18.187543
I(age_head^2)        329.522195  1       18.152746
has_kids               1.398923  1        1.182761
head_works             1.913571  1        1.383319
female_head            1.850646  1        1.360385
work_female_interac    2.222844  1        1.490920
has_fridge             1.776258  1        1.332763
has_electricity        1.443764  1        1.201567
roof                   1.676710  1        1.294878
wall                   1.778699  1        1.333679
floor                  1.464843  1        1.210307
toilet                 1.637968  1        1.279831
transportation         1.597129  1        1.263776
food_outside           1.641623  1        1.281258
region                 2.903506 16        1.033871
building               1.214310  3        1.032892
bgtest(subsample_65_or_above)

    Breusch-Godfrey test for serial correlation of order up to 1

data:  subsample_65_or_above
LM test = 44.557, df = 1, p-value = 2.471e-11
bptest(subsample_65_or_above)

    studentized Breusch-Pagan test

data:  subsample_65_or_above
BP = 56.018, df = 39, p-value = 0.03796
resettest(subsample_65_or_above, 
          power = 2:3, 
          type = "fitted", 
          vcov = function(x) vcovCL(x, cluster = ~apis_subsample_65_or_above$region))

    RESET test

data:  subsample_65_or_above
RESET = 1.6071, df1 = 2, df2 = 1341, p-value = 0.2009
hist(residuals(subsample_65_or_above), main="Histogram of Residuals", breaks=50)

#----------------------------------------------------------------------------------#

# compiling the VIFs of all models in one table

# helper function to extract and format VIFs
get_vif_data <- function(model, model_name) {
  v_raw <- car::vif(model)
  # Adjust for factors (GVIF) to make it comparable to standard VIF
  v_vals <- if(ncol(as.matrix(v_raw)) > 1) v_raw[, ncol(v_raw)]^2 else v_raw
  df <- data.frame(Variables = names(v_vals), val = as.numeric(v_vals))
  colnames(df)[2] <- model_name
  return(df)
}

# extract VIFs for all 7 models
# note: ensure these model objects are in the environment
list_vifs <- list(
  get_vif_data(bennett_ols_protein, "Model (1)"),
  get_vif_data(bennett_ols_bread,   "Model (2)"),
  get_vif_data(subsample_female,    "Model (3)"),
  get_vif_data(subsample_male,      "Model (4)"),
  get_vif_data(subsample_below_35,  "Model (5)"),
  get_vif_data(subsample_35_64,     "Model (6)"),
  get_vif_data(subsample_above_65,  "Model (7)")
)

# join all into one master table
vif_table <- Reduce(function(x, y) full_join(x, y, by = "Variables"), list_vifs)

# clean variable names for the table
# this removes things like I() or _c for a cleaner look
vif_table$Variables <- gsub("I\\(age_head\\^2\\)", "age_head_sq", vif_table$Variables)

# generate styled html table and save to file
vif_table %>%
  mutate(across(starts_with("Model"), ~ifelse(is.na(.), "-", sprintf("%.3f", .)))) %>%
  kable(format = "html", 
        caption = "Appendix X: Assumption Diagnostics (Multicollinearity)",
        align = "lccccccc",
        table.attr = 'style="font-family: Times New Roman; width: 100%;"') %>%
  kable_classic(full_width = F, html_font = "Times New Roman") %>%
  # Adding the thick horizontal rules from your screenshot
  row_spec(0, bold = TRUE, extra_css = "border-top: 2px solid black; border-bottom: 1.5px solid black;") %>%
  row_spec(nrow(vif_table), extra_css = "border-bottom: 2px solid black;") %>%
  footnote(general = "Model (1) Baseline Protein; (2) Bread Falsification; (3) Female-Headed; (4) Male-Headed; (5) <35; (6) 35-64; (7) 65+. VIF values for fixed effects represent squared Generalized VIF. Age-squared omitted in Model 3 and Age cohorts based on RESET results.",
           general_title = "Note: ",
           footnote_as_chunk = T) %>%
  save_kable(file = "Appendix_X_VIF.html") # This creates the file in your working directory

cat("VIF Table has been exported to Appendix_X_VIF.html")
VIF Table has been exported to Appendix_X_VIF.html
#----------------------------------------------------------------------------------#

library(knitr)
library(kableExtra)
library(tidyverse)

# 1. Improved Helper Function (Returns a single row per model)
get_diag_horizontal <- function(model, model_name, cluster_var = NULL) {
  
  bg_p <- bgtest(model)$p.value
  bp_p <- bptest(model)$p.value
  re_p <- resettest(model, power = 2:3, type = "fitted", 
                    vcov = function(x) vcovCL(x, cluster = cluster_var))$p.value
  
  data.frame(
    Model = model_name,
    `Breusch-Godfrey (p-val)` = bg_p,
    `Breusch-Pagan (p-val)` = bp_p,
    `Ramsey RESET (p-val)` = re_p,
    check.names = FALSE
  )
}

# 2. Compile the rows for all 7 models
horizontal_diags <- bind_rows(
  get_diag_horizontal(bennett_ols_protein, "Model 1: Baseline Protein", apis_final$region),
  get_diag_horizontal(bennett_ols_bread,   "Model 2: Bread Falsification", apis_final$region),
  get_diag_horizontal(subsample_female,    "Model 3: Female-Headed", apis_subsample_female$region),
  get_diag_horizontal(subsample_male,      "Model 4: Male-Headed", apis_subsample_male$region),
  get_diag_horizontal(subsample_below_35,  "Model 5: Head <35", apis_subsample_below_35$region),
  get_diag_horizontal(subsample_35_64,     "Model 6: Head 35-64", apis_subsample_35_64$region),
  get_diag_horizontal(subsample_above_65,  "Model 7: Head 65+", apis_subsample_65_or_above$region)
)

# 3. Generate the styled table
horizontal_diags %>%
  mutate(across(where(is.numeric), ~sprintf("%.4f", .))) %>%
  kable(format = "html", 
        caption = "Appendix Y: Summary of Diagnostic Tests (p-values)",
        align = "lccc",
        table.attr = 'style="font-family: Times New Roman; width: 100%;"') %>%
  kable_classic(full_width = F, html_font = "Times New Roman") %>%
  row_spec(0, bold = TRUE, extra_css = "border-top: 2px solid black; border-bottom: 1.5px solid black;") %>%
  row_spec(nrow(horizontal_diags), extra_css = "border-bottom: 2px solid black;") %>%
  footnote(general = "Table reports p-values for each diagnostic test. Ramsey RESET utilizes clustered standard errors. Significant p-values (< 0.05) in Breusch-Pagan justify the use of Clustered Robust Standard Errors in the main analysis.",
           general_title = "Note: ",
           footnote_as_chunk = T) %>%
  save_kable(file = "Appendix_Y_Horizontal.html")

cat("Horizontal Diagnostic Table exported to Appendix_Y_Horizontal.html")
Horizontal Diagnostic Table exported to Appendix_Y_Horizontal.html