library(haven)    ### package for reading .dta files

## Warning: package 'haven' was built under R version 4.1.3

data <- as.data.frame(read_dta("C:/Stuffs/IIT/3rd Semester/Advanced Econ Lab/UYdata.dta"),header=TRUE)


View(data)

library(RCT)   ### Github library by Isidoro Garcia-Urquieta for RCT

## Warning: package 'RCT' was built under R version 4.1.3

summary_statistics(data = data)

## # A tibble: 88 x 12
##    variable       mean     n   `0` `0.05` `0.1` `0.25` `0.5` `0.75` `0.9` `0.95`
##    <chr>         <dbl> <dbl> <dbl>  <dbl> <dbl>  <dbl> <dbl>  <dbl> <dbl>  <dbl>
##  1 google_km   19.7     3003  1.64   6.89  8.27   12.7  19.2   25.4  31.6   34.5
##  2 tehsil_e     3.07    2949  1      1     1       1     4      5     5      5  
##  3 treatment    0.973   2843  0      0     0       0     1      2     2      2  
##  4 noncomplia~  0.0533  3003  0      0     0       0     0      0     0      1  
##  5 treatment0~  0.614   3003  0      0     0       0     1      1     1      1  
##  6 treatment_h  0.307   3003  0      0     0       0     0      1     1      1  
##  7 treatment_~  0.307   3003  0      0     0       0     0      1     1      1  
##  8 hhscr_8_b    0.672   3003  0      0     0       0     1      1     1      1  
##  9 hhgender_1~  0.0613  3003  0      0     0       0     0      0     0      1  
## 10 mrtlstatus~  1.14    3003  1      1     1       1     1      1     1      3  
## # ... with 78 more rows, and 1 more variable: `1` <dbl>

library(expss)   ### library for extracting labels from the .dta file

## Warning: package 'expss' was built under R version 4.1.3

## Loading required package: maditr

## Warning: package 'maditr' was built under R version 4.1.3

## 
## To aggregate several columns with one summary: take(mtcars, mpg, hp, fun = mean, by = am)

## 
## Use 'expss_output_viewer()' to display tables in the RStudio Viewer.
##  To return to the console output, use 'expss_output_default()'.

## 
## Attaching package: 'expss'

## The following objects are masked from 'package:haven':
## 
##     is.labelled, read_spss

lapply(data, var_lab)

## $villagecode_str
## [1] "2011census"
## 
## $google_km
## NULL
## 
## $tehsil_e
## [1] "Tehsil name"
## 
## $treatment
## [1] "Treatment Assignment - C/H/H+S"
## 
## $noncompliance_village
## [1] "Villages added to C from H/H+S"
## 
## $treatment012
## [1] "Dummy for whether assigned any treament"
## 
## $treatment_h
## [1] "Dummy for whether assigned to treament - health"
## 
## $treatment_hs
## [1] "Dummy for whether assigned to treament - health+subsidy"
## 
## $hhscr_8_b
## [1] "Have LPG connection"
## 
## $hhgender_1_b
## [1] "Female headed hh."
## 
## $mrtlstatus_1_b
## [1] "Marital Status"
## 
## $hhnum_b
## [1] "Household size"
## 
## $hh_caste_b
## [1] "caste"
## 
## $fire_1_b
## [1] "Use firewood for cooking"
## 
## $fire_9_b
## [1] "If collect firewood"
## 
## $dung_1_b
## [1] "Use dungcakes for cooking"
## 
## $lpg_1_b
## [1] "Use LPG for cooking"
## 
## $cookarea_1_b
## [1] "Separate cooking room YesNo"
## 
## $cookarea_2_b
## [1] "Chimney/outlet YesNo"
## 
## $age_pc_b
## [1] "Age of primary cook"
## 
## $occu_b
## [1] "Hh. head self-employed or salaried"
## 
## $obcpop_b
## [1] "OBC"
## 
## $hindu_b
## [1] "Hindu"
## 
## $ashacred_b
## [1] "Trust info. from ASHA"
## 
## $education_b
## [1] "Pvt. primary school"
## 
## $educationmid_b
## [1] "Govt. middle school"
## 
## $healthstatus_b
## [1] "Primary health sub center"
## 
## $irrigation_b
## [1] "Proportion of irrigated land"
## 
## $road_b
## [1] "All weather road"
## 
## $edu_hhhead_b
## [1] "Household head edu. above primary"
## 
## $pc_edu_b
## [1] "Primary cook's edu. above primary"
## 
## $scst_b
## [1] "SC/ST"
## 
## $distance_dealer_close_b
## [1] "Closest distance between each hh to it nearest serving LPG dealer (based on geod"
## 
## $fire_1_e
## [1] "Firewood"
## 
## $fire_9_e
## [1] "Firewood collection YesNo"
## 
## $dung_1_e
## [1] "Dung cakes"
## 
## $cookarea_1_e
## [1] "Separate cooking room YesNo"
## 
## $cookarea_2_e
## [1] "Chimney/outlet YesNo"
## 
## $subdistricthqdist_c
## [1] "Sub District Head Quarter (Distance in km)"
## 
## $bv85_c
## [1] "Lighting Electricity"
## 
## $bv91_c
## [1] "Have latrine within house"
## 
## $assets_index
## [1] "Household wealth index"
## 
## $nonhindu
## NULL
## 
## $totrefills_omc_e2
## NULL
## 
## $totrefills_omc_b2
## [1] "Total no. of LPG refills (annual)"
## 
## $pcdecisionindex_std_b
## [1] "Index for PC's decision making/bargaining power (using PCA)"
## 
## $edu_trt
## NULL
## 
## $edu_trth
## NULL
## 
## $edu_trths
## NULL
## 
## $asset_trt
## NULL
## 
## $asset_trth
## NULL
## 
## $asset_trths
## NULL
## 
## $pcdecision_trt
## NULL
## 
## $pcdecision_trth
## NULL
## 
## $pcdecision_trths
## NULL
## 
## $distdlr_imputed_trt
## NULL
## 
## $distdlr_imputed_trth
## NULL
## 
## $distdlr_imputed_trths
## NULL
## 
## $distdlr_hybrid_b
## NULL
## 
## $distdlr_hybrid_trt
## NULL
## 
## $distdlr_hybrid_trth
## NULL
## 
## $distdlr_hybrid_trths
## NULL
## 
## $last_lpg_e
## NULL
## 
## $last_lpg_b
## NULL
## 
## $last_chulha_e
## NULL
## 
## $last_chulha_b
## NULL
## 
## $last_induction_e
## NULL
## 
## $last_induction_b
## NULL
## 
## $fueluse_index_b
## [1] "Index of Fuel Used in Last Meal"
## 
## $fueluse_index_e
## [1] "Index of Fuel Used in Last Meal"
## 
## $fire_visitb
## NULL
## 
## $fire_visite
## NULL
## 
## $dungmakeorcollect_b
## [1] "HH either makes dungcake or collects dung or does both"
## 
## $dungmakeorcollect_e
## [1] "HH either makes dungcake or collects dung or does both"
## 
## $dungmakeorcollectvisits_b
## [1] "HH makes visits for making dung or collecting dung or both"
## 
## $dungmakeorcollectvisits_e
## [1] "HH makes visits for making dung or collecting dung or both"
## 
## $induction_use_e
## [1] "Use induction stove for cooking"
## 
## $induction_use_b
## [1] "Use induction stove for cooking"
## 
## $monthly_winb
## [1] "No. of LPG refills per month (winter)"
## 
## $monthly_sumb
## [1] "No. of LPG refills per month (summer)"
## 
## $monthly_wetb
## [1] "No. of LPG refills per month (monsoon)"
## 
## $att_missing
## NULL
## 
## $treatment01
## NULL
## 
## $treatment02
## NULL
## 
## $treatment12
## NULL
## 
## $hhhead_mrtlstatus
## [1] "Household head is married"
## 
## $firewood_qty_b
## [1] "Qty. of firewood purchased last month (kg)"
## 
## $dung_qty_b
## [1] "Qty. of dung cakes purchased last month"
## 
## $`_merge`
## NULL

data <- data[!(data$att_missing==1),] ### dropping all the rows where attribution is absent

##Table 1 is a timeline of the survey so I am skipping it

##Table 2: Balance of household characteristics at baseline.

df <- subset(data,select = c("hhnum_b", "hhgender_1_b", "age_pc_b", "edu_hhhead_b", "pc_edu_b", "hhhead_mrtlstatus", "occu_b", "scst_b", "obcpop_b", "hindu_b", "assets_index", "ashacred_b","treatment"))

balance_table(data = df,treatment = "treatment") ### Covariate balance table

## # A tibble: 12 x 6
##    variables1        Media_control1 Media_trat1 Media_trat2 p_value1 p_value2
##    <chr>                      <dbl>       <dbl>       <dbl>    <dbl>    <dbl>
##  1 age_pc_b                 34.2        33.9        33.6      0.611   0.267  
##  2 ashacred_b                0.830       0.813       0.839    0.326   0.587  
##  3 assets_index              1.55        1.63        1.51     0.0192  0.211  
##  4 edu_hhhead_b              0.416       0.430       0.368    0.553   0.0314 
##  5 hhgender_1_b              0.0611      0.0551      0.0732   0.579   0.297  
##  6 hhhead_mrtlstatus         0.930       0.928       0.926    0.906   0.737  
##  7 hhnum_b                   6.13        6.15        6.17     0.853   0.738  
##  8 hindu_b                   0.930       0.931       0.890    0.945   0.00288
##  9 obcpop_b                  0.435       0.421       0.436    0.549   0.970  
## 10 occu_b                    0.511       0.534       0.493    0.330   0.439  
## 11 pc_edu_b                  0.372       0.364       0.343    0.724   0.188  
## 12 scst_b                    0.393       0.407       0.428    0.542   0.125

##Table 3: Balance of household fuel consumption at baseline.

df2 <- subset(data,select = c("fire_1_b","lpg_1_b","dung_1_b","induction_use_b","firewood_qty_b","dung_qty_b","hhscr_8_b","totrefills_omc_b2","monthly_winb","monthly_sumb","monthly_wetb","treatment"))

balance_table(data = df2,treatment = "treatment")

## # A tibble: 11 x 6
##    variables1        Media_control1 Media_trat1 Media_trat2 p_value1 p_value2
##    <chr>                      <dbl>       <dbl>       <dbl>    <dbl>    <dbl>
##  1 dung_1_b                  0.874       0.886       0.874   0.395    0.994  
##  2 dung_qty_b               20.5        38.3        32.7     0.0709   0.00465
##  3 fire_1_b                  0.755       0.728       0.763   0.183    0.679  
##  4 firewood_qty_b            9.43       15.8        12.4     0.145    0.309  
##  5 hhscr_8_b                 0.643       0.705       0.672   0.00407  0.181  
##  6 induction_use_b           0.0560      0.0761      0.0532  0.0803   0.790  
##  7 lpg_1_b                   0.719       0.771       0.738   0.00986  0.344  
##  8 monthly_sumb              0.279       0.299       0.306   0.255    0.118  
##  9 monthly_wetb              0.299       0.324       0.321   0.121    0.190  
## 10 monthly_winb              0.273       0.285       0.276   0.435    0.845  
## 11 totrefills_omc_b2         3.12        3.33        3.30    0.184    0.260

library(sjPlot)

## Warning: package 'sjPlot' was built under R version 4.1.3

## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!

Regressions Replication

\[ Y_{i v}^1=\beta_c+\beta_T T_v+\beta_0 Y_{i v}^0+\beta_X^{\prime} \mathbf{X}_{i v}+\beta_Z^{\prime} \mathbf{Z}_v+\varepsilon_{i v} \quad (Overall\ Treatment) \] \[ Y_{i v}^1=\beta_c+\beta_T^h T_v^h+\beta_T^{h s} T_v^{h s}+\beta_0 Y_{i v}^0+\beta_X^{\prime} \mathbf{X}_{i v}+\beta_Z^{\prime} \mathbf{Z}_v+\nu_{i v} \quad (Segregated \ effect) \]

##Table 4 (1) non-FE

reg1 <- lm(totrefills_omc_e2~ totrefills_omc_b2 +treatment012 , data =data)
summary(reg1)

## 
## Call:
## lm(formula = totrefills_omc_e2 ~ totrefills_omc_b2 + treatment012, 
##     data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.1429  -1.0632  -0.7271   1.1277  15.3186 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1.00597    0.08397  11.980   <2e-16 ***
## totrefills_omc_b2  0.80913    0.01290  62.721   <2e-16 ***
## treatment012       0.05721    0.09145   0.626    0.532    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.279 on 2726 degrees of freedom
##   (62 observations deleted due to missingness)
## Multiple R-squared:  0.591,  Adjusted R-squared:  0.5907 
## F-statistic:  1970 on 2 and 2726 DF,  p-value: < 2.2e-16

##Table 4 (2) non-FE

reg2 <- lm(totrefills_omc_e2~totrefills_omc_b2 +treatment_h +treatment_hs,data = data)
summary(reg2)

## 
## Call:
## lm(formula = totrefills_omc_e2 ~ totrefills_omc_b2 + treatment_h + 
##     treatment_hs, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.1433  -1.1088  -0.7272   1.1392  15.3648 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1.00585    0.08398  11.978   <2e-16 ***
## totrefills_omc_b2  0.80916    0.01290  62.721   <2e-16 ***
## treatment_h        0.01100    0.10641   0.103    0.918    
## treatment_hs       0.10299    0.10615   0.970    0.332    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.279 on 2725 degrees of freedom
##   (62 observations deleted due to missingness)
## Multiple R-squared:  0.5911, Adjusted R-squared:  0.5907 
## F-statistic:  1313 on 3 and 2725 DF,  p-value: < 2.2e-16

##Table 4 (3) FE

reg3 <- lm(totrefills_omc_e2~ totrefills_omc_b2 +treatment012 +hhnum_b+ edu_hhhead_b+ occu_b +age_pc_b +pc_edu_b +hh_caste_b + nonhindu + assets_index +irrigation_b +education_b+healthstatus_b +road_b + subdistricthqdist_c, data =data)
tab_model(reg3,terms = c("totrefills_omc_b2","treatment012"))

	totrefills_omc_e2
Predictors	Estimates	CI	p
totrefills_omc_b2	0.81	0.78 – 0.83	<0.001
Dummy for whether assigned any treament	0.04	-0.15 – 0.23	0.664
Observations	2498
R² / R² adjusted	0.590 / 0.588

##Table 4 (4) FE

reg4 <- lm(totrefills_omc_e2~ totrefills_omc_b2 +treatment_h +treatment_hs +hhnum_b+ edu_hhhead_b+ occu_b +age_pc_b +pc_edu_b +hh_caste_b + nonhindu + assets_index +irrigation_b +education_b+healthstatus_b +road_b + subdistricthqdist_c, data =data)
tab_model(reg4,terms = c("totrefills_omc_b2","treatment_h","treatment_hs"),dv.labels = "Impact of information campaign on annual LPG refill consumption.")

	Impact of information campaign on annual LPG refill consumption.
Predictors	Estimates	CI	p
totrefills_omc_b2	0.81	0.78 – 0.83	<0.001
Dummy for whether assigned to treament - health	0.00	-0.22 – 0.22	0.985
Dummy for whether assigned to treament - health+subsidy	0.08	-0.14 – 0.31	0.457
Observations	2498
R² / R² adjusted	0.590 / 0.588

##Table 5 Data is missing , contacted the author and verified

Replication of “A Breath of Fresh Air: Raising Awareness for Clean Fuel Adoption”

Ayanik Anwesh Patra

19/10/2022

Regressions Replication