rm(list = ls())
setwd("~/Desktop")
library(readstata13)
df <- read.dta13("cashtransfers_v12.dta")

Libraries/themes

library(ggplot2)
library(tidyverse)
library(gridExtra)
library(finalfit)
library(kableExtra)
library(xtable)
jrothsch_theme <-  theme_bw() + 
  theme(text = element_text(size = 10, face = "bold", color = "deepskyblue4"),panel.grid = element_blank(),axis.text = element_text(size = 10, color = "gray13"), axis.title = element_text(size = 10, color = "red"), legend.text = element_text(colour="Black", size=10), legend.title = element_text(colour="Black", size=7), plot.subtitle = element_text(size=14, face="italic", color="black"))

Question 1

Randomization is important for this study for the same reason it is for any social science experiment: to provide an unbiased way of measuring the effect of the intervention. In order to understand how well the intervention worked, we must have another, similar group to compare to. Randomizing assures that the all group variables are identical in expectation, and for large enough samples is extremely like to create similar groups. This study uses two levels of randomization. By randomizing at the village level, we will be able to measure whether villages in the treatment group have better outcomes than those in the control group, and be more confident that these differences are a result of the treatment, not prior factors (additional measures can be created to test whether the treatment and control villages are similar). Similarly, by randomizing at the individual level we can ensure that the differences between treated and untreated individuals reflect the individualized effects of cash transfers.

a)

#this creates a dataframe where each row is a village/purecontrol summary
village_rand <- df %>%
  group_by(purecontrol, village) %>%
  summarize()

#this tells us how many villages are control and not control
table(village_rand$purecontrol)
## 
##  0  1 
## 62 63
#making a categorical variable to all 3 treaments
df <- df %>%
  mutate(treatment_cat = ifelse(treat == 1, "atreat",
                                ifelse(purecontrol == 1, "control", "spillover")))

##checking dropouts by condition
df_NAs <- df %>% filter(is.na(age)) 
table(df_NAs$treatment_cat)
## 
##    atreat   control spillover 
##        20         1        13
#comparing numbers, key variables between all 3 populations
indiv_sum <- df %>%
  filter(age > 0) %>%
  filter(!is.na(wvs_life_sat)) %>%
  group_by(treatment_cat) %>%
  summarize(n = n(),
            avg_age = mean(age),
            male_pct = sum(gender == "male")/n(),
            avg_hhs = mean(hh_size),
            avg_children = mean(children),
            )


indiv_sum
## # A tibble: 3 x 6
##   treatment_cat     n avg_age male_pct avg_hhs avg_children
##   <chr>         <int>   <dbl>    <dbl>   <dbl>        <dbl>
## 1 atreat          281    35.3    0.228    5.36         2.76
## 2 control         250    34.8    0.292    5.05         2.31
## 3 spillover       269    36.5    0.257    5.20         2.51

It appears that the household-level (meaning only spillover vs. treatment) randomization was succesful. There are a similar number of participants in each group. While there are slight differences in household size, number of children, average age, and male percentage, all seem to be small enough as to be unproblematic and unlikely to be the result of poor randomization.

Question 2

#table(df$wvs_life_sat)
#table(df$wvs_happiness)

df <- df %>%
  mutate(wvs_happiness_num = ifelse(wvs_happiness == "Very happy", 4,
                                       ifelse(wvs_happiness == "Quite happy", 3,
                                              ifelse(wvs_happiness == "Not very happy", 2 ,1))))

a)

wv_life_sat is already coded from 1-10, presumably with 1 being very unsatisfied, and 10 being very satisfied. For the purpose of group level analysis, this seems like a good form for it. I changed wvs_happiness such that it is also a numeric scale from 1-4, where 1 is “Not at all happy” and 4 is “Very happy”.

b,c)

outcomes_sum <- df %>%
  filter(!is.na(wvs_life_sat)) %>%
  group_by(treatment_cat) %>%
  summarize(n = n(),
            sat = mean(wvs_life_sat),
            happy = mean(wvs_happiness_num),
            food = mean(cons_food),
            social = mean(cons_social),
            total = mean(cons_total)
            )

outcomes_sum
## # A tibble: 3 x 7
##   treatment_cat     n   sat happy  food social  total
##   <chr>         <int> <dbl> <dbl> <dbl>  <dbl>  <dbl>
## 1 atreat          281  3.60  1.85 1756.  4575. 13104.
## 2 control         251  2.98  2.12 1597.  4671. 10896.
## 3 spillover       269  3.20  2.06 1493.  3269. 10250.
r1 <- df %>%
  lm(wvs_life_sat ~ treatment_cat , data = .) %>%
  summary()
r2 <- df %>%
  lm(wvs_happiness_num ~ treatment_cat , data = .) %>%
  summary()
r3 <- df %>%
  lm(cons_food ~ treatment_cat , data = .) %>%
  summary()
r4 <- df %>%
  lm(cons_social ~ treatment_cat , data = .) %>%
  summary()
r5 <- df %>%
  lm(cons_total ~ treatment_cat , data = .) %>%
  summary()
r1
## 
## Call:
## lm(formula = wvs_life_sat ~ treatment_cat, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6014 -1.9801 -0.2007  1.3986  7.0199 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              3.6014     0.1280  28.146  < 2e-16 ***
## treatment_catcontrol    -0.6213     0.1863  -3.335 0.000891 ***
## treatment_catspillover  -0.4007     0.1830  -2.190 0.028818 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.145 on 798 degrees of freedom
##   (148 observations deleted due to missingness)
## Multiple R-squared:  0.0143, Adjusted R-squared:  0.01183 
## F-statistic: 5.787 on 2 and 798 DF,  p-value: 0.003197
r2
## 
## Call:
## lm(formula = wvs_happiness_num ~ treatment_cat, data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.11952 -0.11952 -0.05948  0.14947  2.14947 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             1.85053    0.04230  43.750  < 2e-16 ***
## treatment_catcontrol    0.26899    0.06158   4.368 1.42e-05 ***
## treatment_catspillover  0.20895    0.06048   3.455  0.00058 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.709 on 798 degrees of freedom
##   (148 observations deleted due to missingness)
## Multiple R-squared:  0.0262, Adjusted R-squared:  0.02376 
## F-statistic: 10.74 on 2 and 798 DF,  p-value: 2.506e-05
r3
## 
## Call:
## lm(formula = cons_food ~ treatment_cat, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1654.0  -616.7  -164.5   442.6  4374.4 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             1653.98      52.99  31.215   <2e-16 ***
## treatment_catcontrol     -56.16      77.69  -0.723    0.470    
## treatment_catspillover  -219.75      76.17  -2.885    0.004 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 974.1 on 946 degrees of freedom
## Multiple R-squared:  0.009282,   Adjusted R-squared:  0.007187 
## F-statistic: 4.431 on 2 and 946 DF,  p-value: 0.01215
r4
## 
## Call:
## lm(formula = cons_social ~ treatment_cat, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -4587  -3087  -1956    394  38713 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              4387.2      300.4  14.606  < 2e-16 ***
## treatment_catcontrol      199.6      440.4   0.453  0.65055    
## treatment_catspillover  -1280.7      431.8  -2.966  0.00309 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5522 on 946 degrees of freedom
## Multiple R-squared:  0.01383,    Adjusted R-squared:  0.01174 
## F-statistic: 6.633 on 2 and 946 DF,  p-value: 0.001378
r5
## 
## Call:
## lm(formula = cons_total ~ treatment_cat, data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12241.7  -4037.1   -833.2   2841.1  24939.8 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             12241.7      339.9  36.018  < 2e-16 ***
## treatment_catcontrol    -1399.8      498.3  -2.809  0.00507 ** 
## treatment_catspillover  -2402.0      488.6  -4.916 1.04e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6249 on 946 degrees of freedom
## Multiple R-squared:  0.02521,    Adjusted R-squared:  0.02315 
## F-statistic: 12.23 on 2 and 946 DF,  p-value: 5.692e-06
ggplot(data = df, aes(x = cons_total)) + geom_density(aes(color = treatment_cat)) + jrothsch_theme

ggplot(data = df, aes(x = wvs_life_sat)) + geom_density(aes(color = treatment_cat)) + jrothsch_theme

table(df$wvs_happiness, df$treatment_cat)
##                   
##                    atreat control spillover
##   Very happy            7      10        10
##   Quite happy          30      49        48
##   Not very happy      158     153       159
##   Not at all happy     86      39        52

The effect of cash transfers on outcomes appears largely positive. Total consumption and life satisfaction are both significantly higher for the treatment condition than the control condition at the .05 level. A quick look at the distribution doesn’t suggest any reason to be suspicious of these numbers. Food and social spending arent’t very different between treatment than control. We therefore have some reason to believe that a cash transfer improves critical outcomes.

One concerning outcome is that the WVS happiness metric is lower for the treatment group. This appears mainly driven by a large increase in people who rate themselves “Not at all happy”. It would be worth looking further into possible reasons that cash transfers could cause extremely self-reported happiness.

Analyzing spillover effects is interesting, as there are plausible apriori explanations by which it could raise or lower different outcomes. Similarly to the difference between treatment and control, the spillover condition shows lower total spending and life satisfaction, but higher happiness than the treatment condition. However, the psychological effects are weaker. This suggests that total consumption isn’t affected by having neighbors receive cash transfers, but there is likely a psychological impact.

The social spending outcome is suggestive of an interesting spillover effect. Specifically, social spending is much lower in the spillover condition than the control condition, while food spending and total spending are only slightly lower. One possible intuition for this is that for social events, those with more money are willing to pay for it, but this kind of obligation doesn’t extend to other forms of spending.

Note: Regressions with age/gender controls are included in appendix; I didn’t discuss in main analysis because they show similar results to the above regressions.

d

tx1 <- xtable(outcomes_sum)
colnames(tx1) <- c("Condition", "Participants", "Satisfaction", "Happiness", "Food_spending", "Social_spending", "Total_spending")
 
  tx1 <- tx1 %>%
    mutate(Satisfaction = round(Satisfaction, 2)) %>%
    mutate(Happiness = round(Happiness, 2)) %>%
    mutate(Food_spending = round(Food_spending, 0)) %>%
    mutate(Social_spending = round(Social_spending,0)) %>%
    mutate(Total_spending = round(Total_spending, 0))  %>%
    mutate(Condition = c("Treatment", "Control", "Spillover"))

  
  



kable(tx1, row.names= T, align=c("l", "l", "l", "l", "l", "l")  ,
            booktabs=TRUE, escape = F) %>% 
    kable_styling(font_size=8) 
Condition Participants Satisfaction Happiness Food_spending Social_spending Total_spending
1 Treatment 281 3.60 1.85 1756 4575 13104
2 Control 251 2.98 2.12 1597 4671 10896
3 Spillover 269 3.20 2.06 1493 3269 10250

3

a)

library(readxl)
prices <- read_excel("villages.xls")

df2 <- merge(df, prices, by = "village", all.x = T)

See code above. Note: There were some merging problems with different dataset names, but for time purposes/because I doubt its the purpose of this assigment, I’m just going to ignore the ones that didn’t merge correctly, which I wouldn’t do for a used analysis.

b)

summary_village <-
  df2 %>%
  filter(!is.na(v_price_index)) %>%
  group_by(village) %>%
  summarize(n = n(),
            price_index = mean(v_price_index),
            purecontrol = max(purecontrol))


ggplot(data = summary_village, aes(x = price_index, color = as.factor(purecontrol))) + geom_density() + jrothsch_theme 

summary_village_control <- summary_village %>%
  group_by(purecontrol) %>%
  summarize(mean_vpi = mean(price_index))

summary_village_control
## # A tibble: 2 x 2
##   purecontrol mean_vpi
##         <dbl>    <dbl>
## 1           0  0.00332
## 2           1  0.00344
summary_village %>%
  lm(price_index ~ purecontrol, data = .)  %>%
  summary()
## 
## Call:
## lm(formula = price_index ~ purecontrol, data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.37248 -0.07674  0.00781  0.07737  0.61427 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0033168  0.0198284   0.167    0.867
## purecontrol 0.0001212  0.0274305   0.004    0.996
## 
## Residual standard error: 0.1444 on 109 degrees of freedom
## Multiple R-squared:  1.79e-07,   Adjusted R-squared:  -0.009174 
## F-statistic: 1.952e-05 on 1 and 109 DF,  p-value: 0.9965

A distribution plot, mean comparison, and regression all suggest that there is no meaningful difference in price levels between villages.

4

I wasn’t able to answer this question. I did some research and was able to find an r library that checks the distance between two geopoints. However, I wasn’t able to figure out how to use it to check how many neighbors each point has.

For g, I intended to use a linear regression on the created variable. For h, I would have used a linear regression with an interaction term for treatmentXvariable.

appendix

df %>%
  lm(wvs_life_sat ~ treatment_cat + age + gender + education , data = .) %>%
  summary()
## 
## Call:
## lm(formula = wvs_life_sat ~ treatment_cat + age + gender + education, 
##     data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1417 -1.7087 -0.4129  1.2680  7.1949 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             4.518822   0.599566   7.537 1.34e-13 ***
## treatment_catcontrol   -0.673192   0.189355  -3.555   0.0004 ***
## treatment_catspillover -0.376060   0.182348  -2.062   0.0395 *  
## age                    -0.016516   0.007298  -2.263   0.0239 *  
## genderfemale           -0.378645   0.181807  -2.083   0.0376 *  
## educationPre-school     2.152377   1.547154   1.391   0.1646    
## educationStandard 1     0.181894   1.275247   0.143   0.8866    
## educationStandard 2    -0.493836   0.591597  -0.835   0.4041    
## educationStandard 3     0.067103   0.574154   0.117   0.9070    
## educationStandard 4     0.072451   0.508947   0.142   0.8868    
## educationStandard 5     0.119590   0.464091   0.258   0.7967    
## educationStandard 6    -0.574911   0.407189  -1.412   0.1584    
## educationStandard 7    -0.068070   0.392681  -0.173   0.8624    
## educationStandard 8     0.118389   0.395564   0.299   0.7648    
## educationForm 1        -0.304035   0.564912  -0.538   0.5906    
## educationForm 2        -0.068014   0.498546  -0.136   0.8915    
## educationForm 3         0.358696   0.737930   0.486   0.6270    
## educationForm 4         0.373219   0.497130   0.751   0.4530    
## educationCollege 1      0.094570   2.162402   0.044   0.9651    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.132 on 781 degrees of freedom
##   (149 observations deleted due to missingness)
## Multiple R-squared:  0.04671,    Adjusted R-squared:  0.02474 
## F-statistic: 2.126 on 18 and 781 DF,  p-value: 0.004187
df %>%
  lm(wvs_happiness_num ~ treatment_cat + age + gender + education , data = .) %>%
  summary()
## 
## Call:
## lm(formula = wvs_happiness_num ~ treatment_cat + age + gender + 
##     education, data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.40543 -0.15298 -0.05336  0.15874  2.15695 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             1.864162   0.199976   9.322  < 2e-16 ***
## treatment_catcontrol    0.257839   0.063157   4.083 4.91e-05 ***
## treatment_catspillover  0.208802   0.060820   3.433 0.000628 ***
## age                     0.000368   0.002434   0.151 0.879885    
## genderfemale           -0.044505   0.060639  -0.734 0.463208    
## educationPre-school    -0.591295   0.516030  -1.146 0.252207    
## educationStandard 1     0.312107   0.425339   0.734 0.463301    
## educationStandard 2     0.091831   0.197318   0.465 0.641779    
## educationStandard 3     0.024813   0.191500   0.130 0.896940    
## educationStandard 4     0.045903   0.169752   0.270 0.786913    
## educationStandard 5     0.019303   0.154791   0.125 0.900789    
## educationStandard 6    -0.046689   0.135812  -0.344 0.731109    
## educationStandard 7     0.011666   0.130973   0.089 0.929045    
## educationStandard 8     0.016068   0.131934   0.122 0.903096    
## educationForm 1         0.054366   0.188418   0.289 0.773009    
## educationForm 2        -0.081257   0.166282  -0.489 0.625212    
## educationForm 3         0.194713   0.246125   0.791 0.429118    
## educationForm 4         0.025104   0.165810   0.151 0.879697    
## educationCollege 1     -0.090007   0.721237  -0.125 0.900718    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7112 on 781 degrees of freedom
##   (149 observations deleted due to missingness)
## Multiple R-squared:  0.03188,    Adjusted R-squared:  0.009565 
## F-statistic: 1.429 on 18 and 781 DF,  p-value: 0.1103
df %>%
  lm(cons_food ~ treatment_cat + age + gender + education , data = .) %>%
  summary()
## 
## Call:
## lm(formula = cons_food ~ treatment_cat + age + gender + education, 
##     data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1790.7  -627.9  -182.3   382.0  4492.0 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            1515.872    237.726   6.377  2.9e-10 ***
## treatment_catcontrol   -165.948     76.342  -2.174 0.029986 *  
## treatment_catspillover -268.702     74.657  -3.599 0.000337 ***
## age                       2.188      2.826   0.774 0.438896    
## genderfemale           -258.756     69.574  -3.719 0.000212 ***
## educationPre-school    -244.898    553.444  -0.442 0.658235    
## educationStandard 1    -390.095    482.322  -0.809 0.418853    
## educationStandard 2      49.529    249.494   0.199 0.842684    
## educationStandard 3     269.628    243.565   1.107 0.268587    
## educationStandard 4     151.640    201.611   0.752 0.452163    
## educationStandard 5     385.122    187.741   2.051 0.040524 *  
## educationStandard 6     378.348    165.961   2.280 0.022857 *  
## educationStandard 7     323.405    161.239   2.006 0.045183 *  
## educationStandard 8     448.353    160.720   2.790 0.005388 ** 
## educationForm 1         695.603    233.710   2.976 0.002995 ** 
## educationForm 2         117.609    201.032   0.585 0.558677    
## educationForm 3         516.572    284.073   1.818 0.069329 .  
## educationForm 4         358.785    197.748   1.814 0.069958 .  
## educationCollege 1     1456.886    939.964   1.550 0.121510    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 928.2 on 896 degrees of freedom
##   (34 observations deleted due to missingness)
## Multiple R-squared:  0.06037,    Adjusted R-squared:  0.04149 
## F-statistic: 3.198 on 18 and 896 DF,  p-value: 8.038e-06
df %>%
  lm(cons_social ~ treatment_cat + age + gender + education , data = .) %>%
  summary()
## 
## Call:
## lm(formula = cons_social ~ treatment_cat + age + gender + education, 
##     data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -7474  -3015  -1697    486  37180 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             4655.028   1410.950   3.299 0.001008 ** 
## treatment_catcontrol    -181.656    453.104  -0.401 0.688578    
## treatment_catspillover -1482.945    443.102  -3.347 0.000852 ***
## age                        2.318     16.772   0.138 0.890128    
## genderfemale           -1383.759    412.934  -3.351 0.000839 ***
## educationPre-school    -1570.789   3284.791  -0.478 0.632624    
## educationStandard 1    -2297.986   2862.672  -0.803 0.422337    
## educationStandard 2     4416.491   1480.796   2.983 0.002936 ** 
## educationStandard 3      491.695   1445.603   0.340 0.733837    
## educationStandard 4      853.083   1196.598   0.713 0.476079    
## educationStandard 5      735.922   1114.280   0.660 0.509137    
## educationStandard 6      560.462    985.007   0.569 0.569504    
## educationStandard 7      623.376    956.981   0.651 0.514957    
## educationStandard 8     1579.071    953.902   1.655 0.098197 .  
## educationForm 1          465.259   1387.113   0.335 0.737390    
## educationForm 2         -170.820   1193.163  -0.143 0.886192    
## educationForm 3          397.582   1686.026   0.236 0.813634    
## educationForm 4          462.224   1173.674   0.394 0.693803    
## educationCollege 1     13831.589   5578.864   2.479 0.013348 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5509 on 896 degrees of freedom
##   (34 observations deleted due to missingness)
## Multiple R-squared:  0.05183,    Adjusted R-squared:  0.03278 
## F-statistic: 2.721 on 18 and 896 DF,  p-value: 0.0001451
df %>%
  lm(cons_total ~ treatment_cat + age + gender + education , data = .) %>%
  summary()
## 
## Call:
## lm(formula = cons_total ~ treatment_cat + age + gender + education, 
##     data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -11025  -4037  -1133   2870  24473 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            10982.852   1499.912   7.322 5.43e-13 ***
## treatment_catcontrol   -2199.544    481.673  -4.566 5.65e-06 ***
## treatment_catspillover -2771.294    471.040  -5.883 5.67e-09 ***
## age                        7.239     17.830   0.406 0.684846    
## genderfemale           -1210.874    438.970  -2.758 0.005926 ** 
## educationPre-school    -2563.870   3491.901  -0.734 0.462999    
## educationStandard 1    -2633.902   3043.167  -0.866 0.386989    
## educationStandard 2     1806.819   1574.162   1.148 0.251358    
## educationStandard 3     2247.417   1536.750   1.462 0.143969    
## educationStandard 4     1502.212   1272.045   1.181 0.237939    
## educationStandard 5     3275.931   1184.537   2.766 0.005799 ** 
## educationStandard 6     2148.765   1047.113   2.052 0.040452 *  
## educationStandard 7     2651.154   1017.320   2.606 0.009312 ** 
## educationStandard 8     3387.198   1014.046   3.340 0.000872 ***
## educationForm 1         4585.327   1474.572   3.110 0.001933 ** 
## educationForm 2         1336.659   1268.393   1.054 0.292249    
## educationForm 3         3228.408   1792.332   1.801 0.072002 .  
## educationForm 4         3092.926   1247.676   2.479 0.013360 *  
## educationCollege 1     17767.877   5930.619   2.996 0.002811 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5857 on 896 degrees of freedom
##   (34 observations deleted due to missingness)
## Multiple R-squared:  0.08748,    Adjusted R-squared:  0.06915 
## F-statistic: 4.772 on 18 and 896 DF,  p-value: 2.821e-10