rm(list = ls())
setwd("~/Desktop")
library(readstata13)
df <- read.dta13("cashtransfers_v12.dta")
Libraries/themes
library(ggplot2)
library(tidyverse)
library(gridExtra)
library(finalfit)
library(kableExtra)
library(xtable)
jrothsch_theme <- theme_bw() +
theme(text = element_text(size = 10, face = "bold", color = "deepskyblue4"),panel.grid = element_blank(),axis.text = element_text(size = 10, color = "gray13"), axis.title = element_text(size = 10, color = "red"), legend.text = element_text(colour="Black", size=10), legend.title = element_text(colour="Black", size=7), plot.subtitle = element_text(size=14, face="italic", color="black"))
Randomization is important for this study for the same reason it is for any social science experiment: to provide an unbiased way of measuring the effect of the intervention. In order to understand how well the intervention worked, we must have another, similar group to compare to. Randomizing assures that the all group variables are identical in expectation, and for large enough samples is extremely like to create similar groups. This study uses two levels of randomization. By randomizing at the village level, we will be able to measure whether villages in the treatment group have better outcomes than those in the control group, and be more confident that these differences are a result of the treatment, not prior factors (additional measures can be created to test whether the treatment and control villages are similar). Similarly, by randomizing at the individual level we can ensure that the differences between treated and untreated individuals reflect the individualized effects of cash transfers.
#this creates a dataframe where each row is a village/purecontrol summary
village_rand <- df %>%
group_by(purecontrol, village) %>%
summarize()
#this tells us how many villages are control and not control
table(village_rand$purecontrol)
##
## 0 1
## 62 63
#making a categorical variable to all 3 treaments
df <- df %>%
mutate(treatment_cat = ifelse(treat == 1, "atreat",
ifelse(purecontrol == 1, "control", "spillover")))
##checking dropouts by condition
df_NAs <- df %>% filter(is.na(age))
table(df_NAs$treatment_cat)
##
## atreat control spillover
## 20 1 13
#comparing numbers, key variables between all 3 populations
indiv_sum <- df %>%
filter(age > 0) %>%
filter(!is.na(wvs_life_sat)) %>%
group_by(treatment_cat) %>%
summarize(n = n(),
avg_age = mean(age),
male_pct = sum(gender == "male")/n(),
avg_hhs = mean(hh_size),
avg_children = mean(children),
)
indiv_sum
## # A tibble: 3 x 6
## treatment_cat n avg_age male_pct avg_hhs avg_children
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 atreat 281 35.3 0.228 5.36 2.76
## 2 control 250 34.8 0.292 5.05 2.31
## 3 spillover 269 36.5 0.257 5.20 2.51
It appears that the household-level (meaning only spillover vs. treatment) randomization was succesful. There are a similar number of participants in each group. While there are slight differences in household size, number of children, average age, and male percentage, all seem to be small enough as to be unproblematic and unlikely to be the result of poor randomization.
#table(df$wvs_life_sat)
#table(df$wvs_happiness)
df <- df %>%
mutate(wvs_happiness_num = ifelse(wvs_happiness == "Very happy", 4,
ifelse(wvs_happiness == "Quite happy", 3,
ifelse(wvs_happiness == "Not very happy", 2 ,1))))
wv_life_sat is already coded from 1-10, presumably with 1 being very unsatisfied, and 10 being very satisfied. For the purpose of group level analysis, this seems like a good form for it. I changed wvs_happiness such that it is also a numeric scale from 1-4, where 1 is “Not at all happy” and 4 is “Very happy”.
outcomes_sum <- df %>%
filter(!is.na(wvs_life_sat)) %>%
group_by(treatment_cat) %>%
summarize(n = n(),
sat = mean(wvs_life_sat),
happy = mean(wvs_happiness_num),
food = mean(cons_food),
social = mean(cons_social),
total = mean(cons_total)
)
outcomes_sum
## # A tibble: 3 x 7
## treatment_cat n sat happy food social total
## <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 atreat 281 3.60 1.85 1756. 4575. 13104.
## 2 control 251 2.98 2.12 1597. 4671. 10896.
## 3 spillover 269 3.20 2.06 1493. 3269. 10250.
r1 <- df %>%
lm(wvs_life_sat ~ treatment_cat , data = .) %>%
summary()
r2 <- df %>%
lm(wvs_happiness_num ~ treatment_cat , data = .) %>%
summary()
r3 <- df %>%
lm(cons_food ~ treatment_cat , data = .) %>%
summary()
r4 <- df %>%
lm(cons_social ~ treatment_cat , data = .) %>%
summary()
r5 <- df %>%
lm(cons_total ~ treatment_cat , data = .) %>%
summary()
r1
##
## Call:
## lm(formula = wvs_life_sat ~ treatment_cat, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.6014 -1.9801 -0.2007 1.3986 7.0199
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.6014 0.1280 28.146 < 2e-16 ***
## treatment_catcontrol -0.6213 0.1863 -3.335 0.000891 ***
## treatment_catspillover -0.4007 0.1830 -2.190 0.028818 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.145 on 798 degrees of freedom
## (148 observations deleted due to missingness)
## Multiple R-squared: 0.0143, Adjusted R-squared: 0.01183
## F-statistic: 5.787 on 2 and 798 DF, p-value: 0.003197
r2
##
## Call:
## lm(formula = wvs_happiness_num ~ treatment_cat, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.11952 -0.11952 -0.05948 0.14947 2.14947
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.85053 0.04230 43.750 < 2e-16 ***
## treatment_catcontrol 0.26899 0.06158 4.368 1.42e-05 ***
## treatment_catspillover 0.20895 0.06048 3.455 0.00058 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.709 on 798 degrees of freedom
## (148 observations deleted due to missingness)
## Multiple R-squared: 0.0262, Adjusted R-squared: 0.02376
## F-statistic: 10.74 on 2 and 798 DF, p-value: 2.506e-05
r3
##
## Call:
## lm(formula = cons_food ~ treatment_cat, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1654.0 -616.7 -164.5 442.6 4374.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1653.98 52.99 31.215 <2e-16 ***
## treatment_catcontrol -56.16 77.69 -0.723 0.470
## treatment_catspillover -219.75 76.17 -2.885 0.004 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 974.1 on 946 degrees of freedom
## Multiple R-squared: 0.009282, Adjusted R-squared: 0.007187
## F-statistic: 4.431 on 2 and 946 DF, p-value: 0.01215
r4
##
## Call:
## lm(formula = cons_social ~ treatment_cat, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4587 -3087 -1956 394 38713
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4387.2 300.4 14.606 < 2e-16 ***
## treatment_catcontrol 199.6 440.4 0.453 0.65055
## treatment_catspillover -1280.7 431.8 -2.966 0.00309 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5522 on 946 degrees of freedom
## Multiple R-squared: 0.01383, Adjusted R-squared: 0.01174
## F-statistic: 6.633 on 2 and 946 DF, p-value: 0.001378
r5
##
## Call:
## lm(formula = cons_total ~ treatment_cat, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12241.7 -4037.1 -833.2 2841.1 24939.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12241.7 339.9 36.018 < 2e-16 ***
## treatment_catcontrol -1399.8 498.3 -2.809 0.00507 **
## treatment_catspillover -2402.0 488.6 -4.916 1.04e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6249 on 946 degrees of freedom
## Multiple R-squared: 0.02521, Adjusted R-squared: 0.02315
## F-statistic: 12.23 on 2 and 946 DF, p-value: 5.692e-06
ggplot(data = df, aes(x = cons_total)) + geom_density(aes(color = treatment_cat)) + jrothsch_theme
ggplot(data = df, aes(x = wvs_life_sat)) + geom_density(aes(color = treatment_cat)) + jrothsch_theme
table(df$wvs_happiness, df$treatment_cat)
##
## atreat control spillover
## Very happy 7 10 10
## Quite happy 30 49 48
## Not very happy 158 153 159
## Not at all happy 86 39 52
The effect of cash transfers on outcomes appears largely positive. Total consumption and life satisfaction are both significantly higher for the treatment condition than the control condition at the .05 level. A quick look at the distribution doesn’t suggest any reason to be suspicious of these numbers. Food and social spending arent’t very different between treatment than control. We therefore have some reason to believe that a cash transfer improves critical outcomes.
One concerning outcome is that the WVS happiness metric is lower for the treatment group. This appears mainly driven by a large increase in people who rate themselves “Not at all happy”. It would be worth looking further into possible reasons that cash transfers could cause extremely self-reported happiness.
Analyzing spillover effects is interesting, as there are plausible apriori explanations by which it could raise or lower different outcomes. Similarly to the difference between treatment and control, the spillover condition shows lower total spending and life satisfaction, but higher happiness than the treatment condition. However, the psychological effects are weaker. This suggests that total consumption isn’t affected by having neighbors receive cash transfers, but there is likely a psychological impact.
The social spending outcome is suggestive of an interesting spillover effect. Specifically, social spending is much lower in the spillover condition than the control condition, while food spending and total spending are only slightly lower. One possible intuition for this is that for social events, those with more money are willing to pay for it, but this kind of obligation doesn’t extend to other forms of spending.
Note: Regressions with age/gender controls are included in appendix; I didn’t discuss in main analysis because they show similar results to the above regressions.
tx1 <- xtable(outcomes_sum)
colnames(tx1) <- c("Condition", "Participants", "Satisfaction", "Happiness", "Food_spending", "Social_spending", "Total_spending")
tx1 <- tx1 %>%
mutate(Satisfaction = round(Satisfaction, 2)) %>%
mutate(Happiness = round(Happiness, 2)) %>%
mutate(Food_spending = round(Food_spending, 0)) %>%
mutate(Social_spending = round(Social_spending,0)) %>%
mutate(Total_spending = round(Total_spending, 0)) %>%
mutate(Condition = c("Treatment", "Control", "Spillover"))
kable(tx1, row.names= T, align=c("l", "l", "l", "l", "l", "l") ,
booktabs=TRUE, escape = F) %>%
kable_styling(font_size=8)
| Condition | Participants | Satisfaction | Happiness | Food_spending | Social_spending | Total_spending | |
|---|---|---|---|---|---|---|---|
| 1 | Treatment | 281 | 3.60 | 1.85 | 1756 | 4575 | 13104 |
| 2 | Control | 251 | 2.98 | 2.12 | 1597 | 4671 | 10896 |
| 3 | Spillover | 269 | 3.20 | 2.06 | 1493 | 3269 | 10250 |
library(readxl)
prices <- read_excel("villages.xls")
df2 <- merge(df, prices, by = "village", all.x = T)
See code above. Note: There were some merging problems with different dataset names, but for time purposes/because I doubt its the purpose of this assigment, I’m just going to ignore the ones that didn’t merge correctly, which I wouldn’t do for a used analysis.
summary_village <-
df2 %>%
filter(!is.na(v_price_index)) %>%
group_by(village) %>%
summarize(n = n(),
price_index = mean(v_price_index),
purecontrol = max(purecontrol))
ggplot(data = summary_village, aes(x = price_index, color = as.factor(purecontrol))) + geom_density() + jrothsch_theme
summary_village_control <- summary_village %>%
group_by(purecontrol) %>%
summarize(mean_vpi = mean(price_index))
summary_village_control
## # A tibble: 2 x 2
## purecontrol mean_vpi
## <dbl> <dbl>
## 1 0 0.00332
## 2 1 0.00344
summary_village %>%
lm(price_index ~ purecontrol, data = .) %>%
summary()
##
## Call:
## lm(formula = price_index ~ purecontrol, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.37248 -0.07674 0.00781 0.07737 0.61427
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0033168 0.0198284 0.167 0.867
## purecontrol 0.0001212 0.0274305 0.004 0.996
##
## Residual standard error: 0.1444 on 109 degrees of freedom
## Multiple R-squared: 1.79e-07, Adjusted R-squared: -0.009174
## F-statistic: 1.952e-05 on 1 and 109 DF, p-value: 0.9965
A distribution plot, mean comparison, and regression all suggest that there is no meaningful difference in price levels between villages.
I wasn’t able to answer this question. I did some research and was able to find an r library that checks the distance between two geopoints. However, I wasn’t able to figure out how to use it to check how many neighbors each point has.
For g, I intended to use a linear regression on the created variable. For h, I would have used a linear regression with an interaction term for treatmentXvariable.
df %>%
lm(wvs_life_sat ~ treatment_cat + age + gender + education , data = .) %>%
summary()
##
## Call:
## lm(formula = wvs_life_sat ~ treatment_cat + age + gender + education,
## data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.1417 -1.7087 -0.4129 1.2680 7.1949
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.518822 0.599566 7.537 1.34e-13 ***
## treatment_catcontrol -0.673192 0.189355 -3.555 0.0004 ***
## treatment_catspillover -0.376060 0.182348 -2.062 0.0395 *
## age -0.016516 0.007298 -2.263 0.0239 *
## genderfemale -0.378645 0.181807 -2.083 0.0376 *
## educationPre-school 2.152377 1.547154 1.391 0.1646
## educationStandard 1 0.181894 1.275247 0.143 0.8866
## educationStandard 2 -0.493836 0.591597 -0.835 0.4041
## educationStandard 3 0.067103 0.574154 0.117 0.9070
## educationStandard 4 0.072451 0.508947 0.142 0.8868
## educationStandard 5 0.119590 0.464091 0.258 0.7967
## educationStandard 6 -0.574911 0.407189 -1.412 0.1584
## educationStandard 7 -0.068070 0.392681 -0.173 0.8624
## educationStandard 8 0.118389 0.395564 0.299 0.7648
## educationForm 1 -0.304035 0.564912 -0.538 0.5906
## educationForm 2 -0.068014 0.498546 -0.136 0.8915
## educationForm 3 0.358696 0.737930 0.486 0.6270
## educationForm 4 0.373219 0.497130 0.751 0.4530
## educationCollege 1 0.094570 2.162402 0.044 0.9651
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.132 on 781 degrees of freedom
## (149 observations deleted due to missingness)
## Multiple R-squared: 0.04671, Adjusted R-squared: 0.02474
## F-statistic: 2.126 on 18 and 781 DF, p-value: 0.004187
df %>%
lm(wvs_happiness_num ~ treatment_cat + age + gender + education , data = .) %>%
summary()
##
## Call:
## lm(formula = wvs_happiness_num ~ treatment_cat + age + gender +
## education, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.40543 -0.15298 -0.05336 0.15874 2.15695
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.864162 0.199976 9.322 < 2e-16 ***
## treatment_catcontrol 0.257839 0.063157 4.083 4.91e-05 ***
## treatment_catspillover 0.208802 0.060820 3.433 0.000628 ***
## age 0.000368 0.002434 0.151 0.879885
## genderfemale -0.044505 0.060639 -0.734 0.463208
## educationPre-school -0.591295 0.516030 -1.146 0.252207
## educationStandard 1 0.312107 0.425339 0.734 0.463301
## educationStandard 2 0.091831 0.197318 0.465 0.641779
## educationStandard 3 0.024813 0.191500 0.130 0.896940
## educationStandard 4 0.045903 0.169752 0.270 0.786913
## educationStandard 5 0.019303 0.154791 0.125 0.900789
## educationStandard 6 -0.046689 0.135812 -0.344 0.731109
## educationStandard 7 0.011666 0.130973 0.089 0.929045
## educationStandard 8 0.016068 0.131934 0.122 0.903096
## educationForm 1 0.054366 0.188418 0.289 0.773009
## educationForm 2 -0.081257 0.166282 -0.489 0.625212
## educationForm 3 0.194713 0.246125 0.791 0.429118
## educationForm 4 0.025104 0.165810 0.151 0.879697
## educationCollege 1 -0.090007 0.721237 -0.125 0.900718
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7112 on 781 degrees of freedom
## (149 observations deleted due to missingness)
## Multiple R-squared: 0.03188, Adjusted R-squared: 0.009565
## F-statistic: 1.429 on 18 and 781 DF, p-value: 0.1103
df %>%
lm(cons_food ~ treatment_cat + age + gender + education , data = .) %>%
summary()
##
## Call:
## lm(formula = cons_food ~ treatment_cat + age + gender + education,
## data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1790.7 -627.9 -182.3 382.0 4492.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1515.872 237.726 6.377 2.9e-10 ***
## treatment_catcontrol -165.948 76.342 -2.174 0.029986 *
## treatment_catspillover -268.702 74.657 -3.599 0.000337 ***
## age 2.188 2.826 0.774 0.438896
## genderfemale -258.756 69.574 -3.719 0.000212 ***
## educationPre-school -244.898 553.444 -0.442 0.658235
## educationStandard 1 -390.095 482.322 -0.809 0.418853
## educationStandard 2 49.529 249.494 0.199 0.842684
## educationStandard 3 269.628 243.565 1.107 0.268587
## educationStandard 4 151.640 201.611 0.752 0.452163
## educationStandard 5 385.122 187.741 2.051 0.040524 *
## educationStandard 6 378.348 165.961 2.280 0.022857 *
## educationStandard 7 323.405 161.239 2.006 0.045183 *
## educationStandard 8 448.353 160.720 2.790 0.005388 **
## educationForm 1 695.603 233.710 2.976 0.002995 **
## educationForm 2 117.609 201.032 0.585 0.558677
## educationForm 3 516.572 284.073 1.818 0.069329 .
## educationForm 4 358.785 197.748 1.814 0.069958 .
## educationCollege 1 1456.886 939.964 1.550 0.121510
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 928.2 on 896 degrees of freedom
## (34 observations deleted due to missingness)
## Multiple R-squared: 0.06037, Adjusted R-squared: 0.04149
## F-statistic: 3.198 on 18 and 896 DF, p-value: 8.038e-06
df %>%
lm(cons_social ~ treatment_cat + age + gender + education , data = .) %>%
summary()
##
## Call:
## lm(formula = cons_social ~ treatment_cat + age + gender + education,
## data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7474 -3015 -1697 486 37180
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4655.028 1410.950 3.299 0.001008 **
## treatment_catcontrol -181.656 453.104 -0.401 0.688578
## treatment_catspillover -1482.945 443.102 -3.347 0.000852 ***
## age 2.318 16.772 0.138 0.890128
## genderfemale -1383.759 412.934 -3.351 0.000839 ***
## educationPre-school -1570.789 3284.791 -0.478 0.632624
## educationStandard 1 -2297.986 2862.672 -0.803 0.422337
## educationStandard 2 4416.491 1480.796 2.983 0.002936 **
## educationStandard 3 491.695 1445.603 0.340 0.733837
## educationStandard 4 853.083 1196.598 0.713 0.476079
## educationStandard 5 735.922 1114.280 0.660 0.509137
## educationStandard 6 560.462 985.007 0.569 0.569504
## educationStandard 7 623.376 956.981 0.651 0.514957
## educationStandard 8 1579.071 953.902 1.655 0.098197 .
## educationForm 1 465.259 1387.113 0.335 0.737390
## educationForm 2 -170.820 1193.163 -0.143 0.886192
## educationForm 3 397.582 1686.026 0.236 0.813634
## educationForm 4 462.224 1173.674 0.394 0.693803
## educationCollege 1 13831.589 5578.864 2.479 0.013348 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5509 on 896 degrees of freedom
## (34 observations deleted due to missingness)
## Multiple R-squared: 0.05183, Adjusted R-squared: 0.03278
## F-statistic: 2.721 on 18 and 896 DF, p-value: 0.0001451
df %>%
lm(cons_total ~ treatment_cat + age + gender + education , data = .) %>%
summary()
##
## Call:
## lm(formula = cons_total ~ treatment_cat + age + gender + education,
## data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11025 -4037 -1133 2870 24473
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10982.852 1499.912 7.322 5.43e-13 ***
## treatment_catcontrol -2199.544 481.673 -4.566 5.65e-06 ***
## treatment_catspillover -2771.294 471.040 -5.883 5.67e-09 ***
## age 7.239 17.830 0.406 0.684846
## genderfemale -1210.874 438.970 -2.758 0.005926 **
## educationPre-school -2563.870 3491.901 -0.734 0.462999
## educationStandard 1 -2633.902 3043.167 -0.866 0.386989
## educationStandard 2 1806.819 1574.162 1.148 0.251358
## educationStandard 3 2247.417 1536.750 1.462 0.143969
## educationStandard 4 1502.212 1272.045 1.181 0.237939
## educationStandard 5 3275.931 1184.537 2.766 0.005799 **
## educationStandard 6 2148.765 1047.113 2.052 0.040452 *
## educationStandard 7 2651.154 1017.320 2.606 0.009312 **
## educationStandard 8 3387.198 1014.046 3.340 0.000872 ***
## educationForm 1 4585.327 1474.572 3.110 0.001933 **
## educationForm 2 1336.659 1268.393 1.054 0.292249
## educationForm 3 3228.408 1792.332 1.801 0.072002 .
## educationForm 4 3092.926 1247.676 2.479 0.013360 *
## educationCollege 1 17767.877 5930.619 2.996 0.002811 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5857 on 896 degrees of freedom
## (34 observations deleted due to missingness)
## Multiple R-squared: 0.08748, Adjusted R-squared: 0.06915
## F-statistic: 4.772 on 18 and 896 DF, p-value: 2.821e-10