library(haven)
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(broom)
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.1. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(questionr)
#importing RECS 2015 data
recs2015v2<-read.csv("C:/Users/tobik/Downloads/recs2015_public_v2.csv")
Creating a subset without missing values:
myrecs3<-subset(recs2015v2,ZMICRO=1) #has a microwave
myrecs3<-subset(myrecs3,ZOVEN=1) #has an oven
myrecs3<-subset(myrecs3,ZEDUCATION=1) #eductaion question answered
myrecs3<-subset(myrecs3,ZSTOVE=1) #has a stove
myrecs3<-subset(myrecs3,ZOVEN=1) #has an oven
myrecs3<-subset(myrecs3,ZMONEYPY=1) #reported income
Recoding educational attainment and household income:
myrecs3$edu<-recode(myrecs3$EDUCATION,recodes="
1='No HS';
2='HS';
3='Some Col.';
4='BS/BA';
5='MS/PhD'
",as.factor.result=T)
myrecs3$inc<-recode(myrecs3$MONEYPY,recodes="
1='<$20k';
2='$20k-39k';
3='$40k-59k';
4='$60k-79k';
5='$80k-99k';
6='$100k-119k';
7='$120k-139k';
8='$140k+'
",as.factor.result=T)
Recoding regions:
myrecs3$reg<-recode(myrecs3$REGIONC,recodes="
1='Northeast';
2='Midwest';
3='South';
4='West';
",as.factor.result=T)
Recoding missing values as zeros for calcuation of cooking methods (-2 indicates “NA”, thus it is effectively a not used case, or 0)
myrecs3$COOKTUSE<-recode(myrecs3$COOKTUSE, recodes = "-2:0=0")
myrecs3$OVENUSE<-recode(myrecs3$OVENUSE, recodes = "-2:0=0")
myrecs3$SEPCOOKTUSE<-recode(myrecs3$SEPCOOKTUSE, recodes = "-2:0=0")
myrecs3$SEPOVENUSE<-recode(myrecs3$SEPOVENUSE, recodes = "-2:0=0")
myrecs3$AMTMICRO<-recode(myrecs3$AMTMICRO, recodes = "-2:0=0")
Recoding NUMMEAL so I can easily use it because originally, this is the coding: 1 Three or more times a day 2 Two times a day 3 Once a day 4 A few times each week 5 About once a week 6 Less than once a week 0 Never
myrecs3$NUMMEAL<-recode(myrecs3$NUMMEAL,recodes="
1=60;
2=50;
3=40;
4=30;
5=20;
6=10;
",as.factor.result=T)
Calculating the value to be used for binary indicator
myrecs3$mycooks<-myrecs3$COOKTUSE
myrecs3$mycooks<-myrecs3$mycooks+myrecs3$OVENUSE
myrecs3$mycooks<-myrecs3$mycooks+myrecs3$SEPCOOKTUSE
myrecs3$mycooks<-myrecs3$mycooks+myrecs3$SEPOVENUSE
myrecs3$mycooks<-myrecs3$mycooks-myrecs3$AMTMICRO
Recoding as binary variable:
myrecs3$mycooks<-recode(myrecs3$mycooks, recodes="0:396=1;-99:-1=0")
## 1 = prefers cooking with stove / stovetop
## 0 = uses microwave at least as often
Test<-wtd.table(myrecs3$NUMMEAL, weights = myrecs3$NWEIGHT)
plot(Test)
There should be no offest term necessary. Both measure the same construct (cooked meals per week).
des<-svydesign(ids=~1, weights=myrecs3$NWEIGHT, data=myrecs3[is.na(myrecs3$NWEIGHT)==F,])
svyby(~AMTMICRO, ~mycooks, des, svymean, na.rm=T)
## mycooks AMTMICRO se
## 0 0 17.833559 0.2944783
## 1 1 7.428422 0.1260241
pois1<-svyglm(AMTMICRO~factor(mycooks)+factor(inc)+factor(edu)+factor(reg), design=des,family=poisson)
summary(pois1)
##
## Call:
## svyglm(formula = AMTMICRO ~ factor(mycooks) + factor(inc) + factor(edu) +
## factor(reg), design = des, family = poisson)
##
## Survey design:
## svydesign(ids = ~1, weights = myrecs3$NWEIGHT, data = myrecs3[is.na(myrecs3$NWEIGHT) ==
## F, ])
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.003568 0.048453 61.989 < 2e-16 ***
## factor(mycooks)1 -0.871971 0.023749 -36.716 < 2e-16 ***
## factor(inc)$120k-139k -0.004994 0.054336 -0.092 0.92678
## factor(inc)$140k+ 0.074143 0.049213 1.507 0.13197
## factor(inc)$20k-39k -0.123888 0.045667 -2.713 0.00669 **
## factor(inc)$40k-59k -0.114761 0.047731 -2.404 0.01623 *
## factor(inc)$60k-79k -0.055126 0.054895 -1.004 0.31532
## factor(inc)$80k-99k -0.137554 0.052971 -2.597 0.00943 **
## factor(inc)<$20k -0.168129 0.053013 -3.171 0.00152 **
## factor(edu)HS 0.025392 0.042185 0.602 0.54726
## factor(edu)MS/PhD 0.005351 0.035000 0.153 0.87850
## factor(edu)No HS 0.089588 0.065398 1.370 0.17078
## factor(edu)Some Col. -0.002027 0.033145 -0.061 0.95125
## factor(reg)Northeast -0.071223 0.043407 -1.641 0.10089
## factor(reg)South -0.050231 0.032273 -1.556 0.11966
## factor(reg)West -0.083460 0.034254 -2.437 0.01486 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 6.797432)
##
## Number of Fisher Scoring iterations: 5
pois2<-glm(AMTMICRO~factor(mycooks)+factor(inc)+factor(edu)+factor(reg), data=myrecs3,family=poisson)
summary(pois2)
##
## Call:
## glm(formula = AMTMICRO ~ factor(mycooks) + factor(inc) + factor(edu) +
## factor(reg), family = poisson, data = myrecs3)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -5.4903 -1.8032 -0.5373 0.8239 13.6068
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.979323 0.016463 180.966 < 2e-16 ***
## factor(mycooks)1 -0.865730 0.008065 -107.340 < 2e-16 ***
## factor(inc)$120k-139k 0.019433 0.020797 0.934 0.350107
## factor(inc)$140k+ 0.096517 0.017255 5.594 2.22e-08 ***
## factor(inc)$20k-39k -0.108970 0.016067 -6.782 1.18e-11 ***
## factor(inc)$40k-59k -0.116742 0.016696 -6.992 2.71e-12 ***
## factor(inc)$60k-79k -0.047488 0.016874 -2.814 0.004888 **
## factor(inc)$80k-99k -0.107780 0.018670 -5.773 7.80e-09 ***
## factor(inc)<$20k -0.152851 0.017126 -8.925 < 2e-16 ***
## factor(edu)HS 0.005552 0.012343 0.450 0.652870
## factor(edu)MS/PhD 0.010017 0.012603 0.795 0.426704
## factor(edu)No HS 0.091574 0.018030 5.079 3.79e-07 ***
## factor(edu)Some Col. -0.002520 0.011048 -0.228 0.819562
## factor(reg)Northeast -0.053432 0.013027 -4.102 4.10e-05 ***
## factor(reg)South -0.036675 0.010023 -3.659 0.000253 ***
## factor(reg)West -0.052734 0.010620 -4.965 6.86e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 45585 on 5685 degrees of freedom
## Residual deviance: 32665 on 5670 degrees of freedom
## AIC: 54992
##
## Number of Fisher Scoring iterations: 5
scale<-sqrt<-(pois2$deviance/pois2$df.residual)
scale
## [1] 5.761103
1-pchisq(pois2$deviance, df=pois2$df.residual)
## [1] 0
p=0 means that the model does not fit the data.
#I stole this from my professor who stole it from: http://drewdimmery.com/robust-ses-in-r/
#and http://people.su.se/~ma/clustering.pdf
#I also added a correction to use this with the hurdle and zero-inflated models
#This is how stata gets robust se's
clx2 <- function(fm, dfcw, cluster){
# R-codes (www.r-project.org) for computing
# clustered-standard errors. Mahmood Arai, Jan 26, 2008.
# The arguments of the function are:
# fitted model, cluster1
# You need to install libraries `sandwich' and `lmtest'
# reweighting the var-cov matrix for the within model
require(sandwich);require(lmtest)
if(class(fm)=="zeroinfl"|class(fm)=="hurdle") {
M <- length(unique(cluster))
N <- length(cluster)
K <- dim(fm$vcov)[1] #here is the rank from the zero inflated fits
dfc <- (M/(M-1))*((N-1)/(N-K))
uj <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum, na.rm=T));
vcovCL <- dfc[1]*sandwich(fm, meat=crossprod(uj)/N)*dfcw #fix a length problem in dfc
list(summary=coeftest(fm, vcovCL))}
else if(class(fm)!="zeroinfl"){
M <- length(unique(cluster))
N <- length(cluster)
K <- fm$rank
dfc <- (M/(M - 1)) * ((N - 1)/(N - K))
uj <- apply(estfun(fm), 2, function(x) tapply(x, cluster, sum, na.rm=T));
rcse.cov <- dfc * sandwich(fm, meat = crossprod(uj)/N)
rcse.se <- coeftest(fm, rcse.cov)
return(list( rcse.se))}
}
setting new weights:
myrecs3$newweight<-myrecs3$NWEIGHT/mean(myrecs3$NWEIGHT, na.rm=T)
pois3<-glm(AMTMICRO~factor(mycooks)+factor(inc)+factor(edu)+factor(reg), data=myrecs3, weights=newweight, family=poisson)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(sandwich)
clx2(pois3,cluster=myrecs3$reg)
## Warning in if (class(fm) == "zeroinfl" | class(fm) == "hurdle") {: the
## condition has length > 1 and only the first element will be used
## Warning in if (class(fm) != "zeroinfl") {: the condition has length > 1 and
## only the first element will be used
## [[1]]
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.00356761 0.07522223 39.9293 < 2.2e-16 ***
## factor(mycooks)1 -0.87197139 0.02346390 -37.1622 < 2.2e-16 ***
## factor(inc)$120k-139k -0.00499365 0.03614764 -0.1381 0.8901251
## factor(inc)$140k+ 0.07414339 0.04036440 1.8369 0.0662319 .
## factor(inc)$20k-39k -0.12388772 0.05426464 -2.2830 0.0224287 *
## factor(inc)$40k-59k -0.11476128 0.04408307 -2.6033 0.0092332 **
## factor(inc)$60k-79k -0.05512636 0.04920607 -1.1203 0.2625790
## factor(inc)$80k-99k -0.13755357 0.09253244 -1.4865 0.1371352
## factor(inc)<$20k -0.16812909 0.04571244 -3.6780 0.0002351 ***
## factor(edu)HS 0.02539163 0.09101702 0.2790 0.7802627
## factor(edu)MS/PhD 0.00535074 0.06140770 0.0871 0.9305645
## factor(edu)No HS 0.08958803 0.05850169 1.5314 0.1256767
## factor(edu)Some Col. -0.00202656 0.06138705 -0.0330 0.9736644
## factor(reg)Northeast -0.07122292 0.00300426 -23.7073 < 2.2e-16 ***
## factor(reg)South -0.05023091 0.00058027 -86.5648 < 2.2e-16 ***
## factor(reg)West -0.08345961 0.00212707 -39.2369 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(pois3, vcov=vcovHC(pois3,type="HC1",cluster="reg"))
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.0035676 0.0485171 61.9073 < 2.2e-16 ***
## factor(mycooks)1 -0.8719714 0.0237806 -36.6674 < 2.2e-16 ***
## factor(inc)$120k-139k -0.0049936 0.0544074 -0.0918 0.926871
## factor(inc)$140k+ 0.0741434 0.0492776 1.5046 0.132425
## factor(inc)$20k-39k -0.1238877 0.0457278 -2.7092 0.006744 **
## factor(inc)$40k-59k -0.1147613 0.0477944 -2.4011 0.016344 *
## factor(inc)$60k-79k -0.0551264 0.0549677 -1.0029 0.315916
## factor(inc)$80k-99k -0.1375536 0.0530408 -2.5934 0.009505 **
## factor(inc)<$20k -0.1681291 0.0530833 -3.1673 0.001539 **
## factor(edu)HS 0.0253916 0.0422412 0.6011 0.547766
## factor(edu)MS/PhD 0.0053507 0.0350464 0.1527 0.878654
## factor(edu)No HS 0.0895880 0.0654847 1.3681 0.171288
## factor(edu)Some Col. -0.0020266 0.0331887 -0.0611 0.951310
## factor(reg)Northeast -0.0712229 0.0434645 -1.6386 0.101287
## factor(reg)South -0.0502309 0.0323160 -1.5544 0.120097
## factor(reg)West -0.0834596 0.0342990 -2.4333 0.014962 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Negative Binomial Significances:
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
fit.nb2<-glm.nb(AMTMICRO~factor(mycooks)+factor(inc)+factor(edu)+factor(reg),
data=myrecs3,
weights=newweight)
clx2(fit.nb2,cluster =myrecs3$reg)
## Warning in if (class(fm) == "zeroinfl" | class(fm) == "hurdle") {: the
## condition has length > 1 and only the first element will be used
## Warning in if (class(fm) != "zeroinfl") {: the condition has length > 1 and
## only the first element will be used
## [[1]]
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.01240224 0.08366402 36.0059 < 2.2e-16 ***
## factor(mycooks)1 -0.87237181 0.02235833 -39.0178 < 2.2e-16 ***
## factor(inc)$120k-139k -0.00707861 0.04929939 -0.1436 0.885829
## factor(inc)$140k+ 0.07122505 0.06040915 1.1790 0.238381
## factor(inc)$20k-39k -0.13601561 0.07057680 -1.9272 0.053955 .
## factor(inc)$40k-59k -0.12157869 0.05595107 -2.1729 0.029784 *
## factor(inc)$60k-79k -0.09110493 0.05258002 -1.7327 0.083151 .
## factor(inc)$80k-99k -0.14246946 0.09848405 -1.4466 0.148002
## factor(inc)<$20k -0.18240518 0.05723794 -3.1868 0.001439 **
## factor(edu)HS 0.02625496 0.08469697 0.3100 0.756571
## factor(edu)MS/PhD 0.01749805 0.06739108 0.2596 0.795134
## factor(edu)No HS 0.05215241 0.03468325 1.5037 0.132664
## factor(edu)Some Col. 0.00039321 0.05785401 0.0068 0.994577
## factor(reg)Northeast -0.07800398 0.00416575 -18.7251 < 2.2e-16 ***
## factor(reg)South -0.04011010 0.00106667 -37.6032 < 2.2e-16 ***
## factor(reg)West -0.08054408 0.00349429 -23.0502 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(fit.nb2, vcov=vcovHC(fit.nb2, type="HC1",cluster="reg"))
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.01240224 0.04795006 62.8237 < 2.2e-16 ***
## factor(mycooks)1 -0.87237181 0.02384662 -36.5826 < 2.2e-16 ***
## factor(inc)$120k-139k -0.00707861 0.05626657 -0.1258 0.8998864
## factor(inc)$140k+ 0.07122505 0.04954508 1.4376 0.1505531
## factor(inc)$20k-39k -0.13601561 0.04467738 -3.0444 0.0023315 **
## factor(inc)$40k-59k -0.12157869 0.04759767 -2.5543 0.0106402 *
## factor(inc)$60k-79k -0.09110493 0.05146585 -1.7702 0.0766936 .
## factor(inc)$80k-99k -0.14246946 0.05035365 -2.8294 0.0046639 **
## factor(inc)<$20k -0.18240518 0.05152240 -3.5403 0.0003997 ***
## factor(edu)HS 0.02625496 0.03990549 0.6579 0.5105841
## factor(edu)MS/PhD 0.01749805 0.03699903 0.4729 0.6362613
## factor(edu)No HS 0.05215241 0.05860446 0.8899 0.3735168
## factor(edu)Some Col. 0.00039321 0.03348884 0.0117 0.9906319
## factor(reg)Northeast -0.07800398 0.04201566 -1.8565 0.0633758 .
## factor(reg)South -0.04011010 0.03078791 -1.3028 0.1926474
## factor(reg)West -0.08054408 0.03244942 -2.4821 0.0130595 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
We find significant differences in preference of cooking with stove top vs microwave use, differences among income groups, no difference in educational attainment, but some in regions.