Homework 4

library(haven)
library(readr)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)
library(broom)
library(car)

## 
## Attaching package: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

library(stargazer)

## 
## Please cite as:

##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.

##  R package version 5.2.1. https://CRAN.R-project.org/package=stargazer

library(survey)

## Loading required package: grid

## Loading required package: Matrix

## Loading required package: survival

## 
## Attaching package: 'survey'

## The following object is masked from 'package:graphics':
## 
##     dotchart

library(questionr)

#importing RECS 2015 data
recs2015v2<-read.csv("C:/Users/tobik/Downloads/recs2015_public_v2.csv")

Creating a subset without missing values:

myrecs3<-subset(recs2015v2,ZMICRO=1) #has a microwave
myrecs3<-subset(myrecs3,ZOVEN=1) #has an oven
myrecs3<-subset(myrecs3,ZEDUCATION=1) #eductaion question answered
myrecs3<-subset(myrecs3,ZSTOVE=1) #has a stove
myrecs3<-subset(myrecs3,ZOVEN=1) #has an oven
myrecs3<-subset(myrecs3,ZMONEYPY=1) #reported income

Recoding educational attainment and household income:

myrecs3$edu<-recode(myrecs3$EDUCATION,recodes="
                        1='No HS';
                        2='HS';
                        3='Some Col.';
                        4='BS/BA';
                        5='MS/PhD'
                        ",as.factor.result=T)
myrecs3$inc<-recode(myrecs3$MONEYPY,recodes="
                        1='<$20k';
                        2='$20k-39k';
                        3='$40k-59k';
                        4='$60k-79k';
                        5='$80k-99k';
                        6='$100k-119k';
                        7='$120k-139k';
                        8='$140k+'
                        ",as.factor.result=T)

Recoding regions:

myrecs3$reg<-recode(myrecs3$REGIONC,recodes="
                        1='Northeast';
                        2='Midwest';
                        3='South';
                        4='West';
                        ",as.factor.result=T)

Recoding missing values as zeros for calcuation of cooking methods (-2 indicates “NA”, thus it is effectively a not used case, or 0)

myrecs3$COOKTUSE<-recode(myrecs3$COOKTUSE, recodes = "-2:0=0")
myrecs3$OVENUSE<-recode(myrecs3$OVENUSE, recodes = "-2:0=0")
myrecs3$SEPCOOKTUSE<-recode(myrecs3$SEPCOOKTUSE, recodes = "-2:0=0")
myrecs3$SEPOVENUSE<-recode(myrecs3$SEPOVENUSE, recodes = "-2:0=0")
myrecs3$AMTMICRO<-recode(myrecs3$AMTMICRO, recodes = "-2:0=0")

Recoding NUMMEAL so I can easily use it because originally, this is the coding: 1 Three or more times a day 2 Two times a day 3 Once a day 4 A few times each week 5 About once a week 6 Less than once a week 0 Never

myrecs3$NUMMEAL<-recode(myrecs3$NUMMEAL,recodes="
                        1=60;
                        2=50;
                        3=40;
                        4=30;
                        5=20;
                        6=10;
                        ",as.factor.result=T)

Calculating the value to be used for binary indicator

myrecs3$mycooks<-myrecs3$COOKTUSE
myrecs3$mycooks<-myrecs3$mycooks+myrecs3$OVENUSE
myrecs3$mycooks<-myrecs3$mycooks+myrecs3$SEPCOOKTUSE
myrecs3$mycooks<-myrecs3$mycooks+myrecs3$SEPOVENUSE
myrecs3$mycooks<-myrecs3$mycooks-myrecs3$AMTMICRO

Recoding as binary variable:

myrecs3$mycooks<-recode(myrecs3$mycooks, recodes="0:396=1;-99:-1=0")
## 1 = prefers cooking with stove / stovetop
## 0 = uses microwave at least as often

Do people cook more often depending in which region they live, eductional attainment they achieved, different income they have, and when they prefer the oven over a microwave for cooking?

Hot meals have the following distribution in households:

Test<-wtd.table(myrecs3$NUMMEAL, weights = myrecs3$NWEIGHT)
plot(Test)

There should be no offest term necessary. Both measure the same construct (cooked meals per week).

Checking for difference in preferences for Microwave or Stovetop/Oven Use:

des<-svydesign(ids=~1, weights=myrecs3$NWEIGHT, data=myrecs3[is.na(myrecs3$NWEIGHT)==F,])

svyby(~AMTMICRO, ~mycooks, des, svymean, na.rm=T)

##   mycooks  AMTMICRO        se
## 0       0 17.833559 0.2944783
## 1       1  7.428422 0.1260241

Checking the Poisson distribution for fit:

pois1<-svyglm(AMTMICRO~factor(mycooks)+factor(inc)+factor(edu)+factor(reg), design=des,family=poisson)
summary(pois1)

## 
## Call:
## svyglm(formula = AMTMICRO ~ factor(mycooks) + factor(inc) + factor(edu) + 
##     factor(reg), design = des, family = poisson)
## 
## Survey design:
## svydesign(ids = ~1, weights = myrecs3$NWEIGHT, data = myrecs3[is.na(myrecs3$NWEIGHT) == 
##     F, ])
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            3.003568   0.048453  61.989  < 2e-16 ***
## factor(mycooks)1      -0.871971   0.023749 -36.716  < 2e-16 ***
## factor(inc)$120k-139k -0.004994   0.054336  -0.092  0.92678    
## factor(inc)$140k+      0.074143   0.049213   1.507  0.13197    
## factor(inc)$20k-39k   -0.123888   0.045667  -2.713  0.00669 ** 
## factor(inc)$40k-59k   -0.114761   0.047731  -2.404  0.01623 *  
## factor(inc)$60k-79k   -0.055126   0.054895  -1.004  0.31532    
## factor(inc)$80k-99k   -0.137554   0.052971  -2.597  0.00943 ** 
## factor(inc)<$20k      -0.168129   0.053013  -3.171  0.00152 ** 
## factor(edu)HS          0.025392   0.042185   0.602  0.54726    
## factor(edu)MS/PhD      0.005351   0.035000   0.153  0.87850    
## factor(edu)No HS       0.089588   0.065398   1.370  0.17078    
## factor(edu)Some Col.  -0.002027   0.033145  -0.061  0.95125    
## factor(reg)Northeast  -0.071223   0.043407  -1.641  0.10089    
## factor(reg)South      -0.050231   0.032273  -1.556  0.11966    
## factor(reg)West       -0.083460   0.034254  -2.437  0.01486 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 6.797432)
## 
## Number of Fisher Scoring iterations: 5

pois2<-glm(AMTMICRO~factor(mycooks)+factor(inc)+factor(edu)+factor(reg), data=myrecs3,family=poisson)
summary(pois2)

## 
## Call:
## glm(formula = AMTMICRO ~ factor(mycooks) + factor(inc) + factor(edu) + 
##     factor(reg), family = poisson, data = myrecs3)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -5.4903  -1.8032  -0.5373   0.8239  13.6068  
## 
## Coefficients:
##                        Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)            2.979323   0.016463  180.966  < 2e-16 ***
## factor(mycooks)1      -0.865730   0.008065 -107.340  < 2e-16 ***
## factor(inc)$120k-139k  0.019433   0.020797    0.934 0.350107    
## factor(inc)$140k+      0.096517   0.017255    5.594 2.22e-08 ***
## factor(inc)$20k-39k   -0.108970   0.016067   -6.782 1.18e-11 ***
## factor(inc)$40k-59k   -0.116742   0.016696   -6.992 2.71e-12 ***
## factor(inc)$60k-79k   -0.047488   0.016874   -2.814 0.004888 ** 
## factor(inc)$80k-99k   -0.107780   0.018670   -5.773 7.80e-09 ***
## factor(inc)<$20k      -0.152851   0.017126   -8.925  < 2e-16 ***
## factor(edu)HS          0.005552   0.012343    0.450 0.652870    
## factor(edu)MS/PhD      0.010017   0.012603    0.795 0.426704    
## factor(edu)No HS       0.091574   0.018030    5.079 3.79e-07 ***
## factor(edu)Some Col.  -0.002520   0.011048   -0.228 0.819562    
## factor(reg)Northeast  -0.053432   0.013027   -4.102 4.10e-05 ***
## factor(reg)South      -0.036675   0.010023   -3.659 0.000253 ***
## factor(reg)West       -0.052734   0.010620   -4.965 6.86e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 45585  on 5685  degrees of freedom
## Residual deviance: 32665  on 5670  degrees of freedom
## AIC: 54992
## 
## Number of Fisher Scoring iterations: 5

scale<-sqrt<-(pois2$deviance/pois2$df.residual)
scale

## [1] 5.761103

1-pchisq(pois2$deviance, df=pois2$df.residual)

## [1] 0

p=0 means that the model does not fit the data.

Moving on to negative binomial distribution

#I stole this from my professor who stole it from: http://drewdimmery.com/robust-ses-in-r/
#and http://people.su.se/~ma/clustering.pdf
#I also added a correction to use this with the hurdle and zero-inflated models
#This is how stata gets robust se's


clx2 <-   function(fm, dfcw,  cluster){
    # R-codes (www.r-project.org) for computing
    # clustered-standard errors. Mahmood Arai, Jan 26, 2008.
    
    # The arguments of the function are:
    # fitted model, cluster1
    # You need to install libraries `sandwich' and `lmtest'
    
    # reweighting the var-cov matrix for the within model
    require(sandwich);require(lmtest)
    if(class(fm)=="zeroinfl"|class(fm)=="hurdle") {
    M <- length(unique(cluster))   
    N <- length(cluster)           
    K <- dim(fm$vcov)[1]        #here is the rank from the zero inflated fits             
    dfc <- (M/(M-1))*((N-1)/(N-K))  
    uj  <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum, na.rm=T));
    vcovCL <- dfc[1]*sandwich(fm, meat=crossprod(uj)/N)*dfcw #fix a length problem in dfc
    list(summary=coeftest(fm, vcovCL))}
    else if(class(fm)!="zeroinfl"){
    M <- length(unique(cluster))
    N <- length(cluster)
    K <- fm$rank
    dfc <- (M/(M - 1)) * ((N - 1)/(N - K))
    uj <- apply(estfun(fm), 2, function(x) tapply(x, cluster, sum, na.rm=T));
    rcse.cov <- dfc * sandwich(fm, meat = crossprod(uj)/N)
    rcse.se <- coeftest(fm, rcse.cov)
    return(list( rcse.se))}
}

setting new weights:

myrecs3$newweight<-myrecs3$NWEIGHT/mean(myrecs3$NWEIGHT, na.rm=T)
pois3<-glm(AMTMICRO~factor(mycooks)+factor(inc)+factor(edu)+factor(reg), data=myrecs3, weights=newweight, family=poisson)

library(lmtest)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(sandwich)

clx2(pois3,cluster=myrecs3$reg)

## Warning in if (class(fm) == "zeroinfl" | class(fm) == "hurdle") {: the
## condition has length > 1 and only the first element will be used

## Warning in if (class(fm) != "zeroinfl") {: the condition has length > 1 and
## only the first element will be used

## [[1]]
## 
## z test of coefficients:
## 
##                          Estimate  Std. Error  z value  Pr(>|z|)    
## (Intercept)            3.00356761  0.07522223  39.9293 < 2.2e-16 ***
## factor(mycooks)1      -0.87197139  0.02346390 -37.1622 < 2.2e-16 ***
## factor(inc)$120k-139k -0.00499365  0.03614764  -0.1381 0.8901251    
## factor(inc)$140k+      0.07414339  0.04036440   1.8369 0.0662319 .  
## factor(inc)$20k-39k   -0.12388772  0.05426464  -2.2830 0.0224287 *  
## factor(inc)$40k-59k   -0.11476128  0.04408307  -2.6033 0.0092332 ** 
## factor(inc)$60k-79k   -0.05512636  0.04920607  -1.1203 0.2625790    
## factor(inc)$80k-99k   -0.13755357  0.09253244  -1.4865 0.1371352    
## factor(inc)<$20k      -0.16812909  0.04571244  -3.6780 0.0002351 ***
## factor(edu)HS          0.02539163  0.09101702   0.2790 0.7802627    
## factor(edu)MS/PhD      0.00535074  0.06140770   0.0871 0.9305645    
## factor(edu)No HS       0.08958803  0.05850169   1.5314 0.1256767    
## factor(edu)Some Col.  -0.00202656  0.06138705  -0.0330 0.9736644    
## factor(reg)Northeast  -0.07122292  0.00300426 -23.7073 < 2.2e-16 ***
## factor(reg)South      -0.05023091  0.00058027 -86.5648 < 2.2e-16 ***
## factor(reg)West       -0.08345961  0.00212707 -39.2369 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

coeftest(pois3, vcov=vcovHC(pois3,type="HC1",cluster="reg"))

## 
## z test of coefficients:
## 
##                         Estimate Std. Error  z value  Pr(>|z|)    
## (Intercept)            3.0035676  0.0485171  61.9073 < 2.2e-16 ***
## factor(mycooks)1      -0.8719714  0.0237806 -36.6674 < 2.2e-16 ***
## factor(inc)$120k-139k -0.0049936  0.0544074  -0.0918  0.926871    
## factor(inc)$140k+      0.0741434  0.0492776   1.5046  0.132425    
## factor(inc)$20k-39k   -0.1238877  0.0457278  -2.7092  0.006744 ** 
## factor(inc)$40k-59k   -0.1147613  0.0477944  -2.4011  0.016344 *  
## factor(inc)$60k-79k   -0.0551264  0.0549677  -1.0029  0.315916    
## factor(inc)$80k-99k   -0.1375536  0.0530408  -2.5934  0.009505 ** 
## factor(inc)<$20k      -0.1681291  0.0530833  -3.1673  0.001539 ** 
## factor(edu)HS          0.0253916  0.0422412   0.6011  0.547766    
## factor(edu)MS/PhD      0.0053507  0.0350464   0.1527  0.878654    
## factor(edu)No HS       0.0895880  0.0654847   1.3681  0.171288    
## factor(edu)Some Col.  -0.0020266  0.0331887  -0.0611  0.951310    
## factor(reg)Northeast  -0.0712229  0.0434645  -1.6386  0.101287    
## factor(reg)South      -0.0502309  0.0323160  -1.5544  0.120097    
## factor(reg)West       -0.0834596  0.0342990  -2.4333  0.014962 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Negative Binomial Significances:

library(MASS)

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

fit.nb2<-glm.nb(AMTMICRO~factor(mycooks)+factor(inc)+factor(edu)+factor(reg),
              data=myrecs3,
              weights=newweight)
clx2(fit.nb2,cluster =myrecs3$reg)

## Warning in if (class(fm) == "zeroinfl" | class(fm) == "hurdle") {: the
## condition has length > 1 and only the first element will be used

## Warning in if (class(fm) != "zeroinfl") {: the condition has length > 1 and
## only the first element will be used

## [[1]]
## 
## z test of coefficients:
## 
##                          Estimate  Std. Error  z value  Pr(>|z|)    
## (Intercept)            3.01240224  0.08366402  36.0059 < 2.2e-16 ***
## factor(mycooks)1      -0.87237181  0.02235833 -39.0178 < 2.2e-16 ***
## factor(inc)$120k-139k -0.00707861  0.04929939  -0.1436  0.885829    
## factor(inc)$140k+      0.07122505  0.06040915   1.1790  0.238381    
## factor(inc)$20k-39k   -0.13601561  0.07057680  -1.9272  0.053955 .  
## factor(inc)$40k-59k   -0.12157869  0.05595107  -2.1729  0.029784 *  
## factor(inc)$60k-79k   -0.09110493  0.05258002  -1.7327  0.083151 .  
## factor(inc)$80k-99k   -0.14246946  0.09848405  -1.4466  0.148002    
## factor(inc)<$20k      -0.18240518  0.05723794  -3.1868  0.001439 ** 
## factor(edu)HS          0.02625496  0.08469697   0.3100  0.756571    
## factor(edu)MS/PhD      0.01749805  0.06739108   0.2596  0.795134    
## factor(edu)No HS       0.05215241  0.03468325   1.5037  0.132664    
## factor(edu)Some Col.   0.00039321  0.05785401   0.0068  0.994577    
## factor(reg)Northeast  -0.07800398  0.00416575 -18.7251 < 2.2e-16 ***
## factor(reg)South      -0.04011010  0.00106667 -37.6032 < 2.2e-16 ***
## factor(reg)West       -0.08054408  0.00349429 -23.0502 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

coeftest(fit.nb2, vcov=vcovHC(fit.nb2, type="HC1",cluster="reg"))

## 
## z test of coefficients:
## 
##                          Estimate  Std. Error  z value  Pr(>|z|)    
## (Intercept)            3.01240224  0.04795006  62.8237 < 2.2e-16 ***
## factor(mycooks)1      -0.87237181  0.02384662 -36.5826 < 2.2e-16 ***
## factor(inc)$120k-139k -0.00707861  0.05626657  -0.1258 0.8998864    
## factor(inc)$140k+      0.07122505  0.04954508   1.4376 0.1505531    
## factor(inc)$20k-39k   -0.13601561  0.04467738  -3.0444 0.0023315 ** 
## factor(inc)$40k-59k   -0.12157869  0.04759767  -2.5543 0.0106402 *  
## factor(inc)$60k-79k   -0.09110493  0.05146585  -1.7702 0.0766936 .  
## factor(inc)$80k-99k   -0.14246946  0.05035365  -2.8294 0.0046639 ** 
## factor(inc)<$20k      -0.18240518  0.05152240  -3.5403 0.0003997 ***
## factor(edu)HS          0.02625496  0.03990549   0.6579 0.5105841    
## factor(edu)MS/PhD      0.01749805  0.03699903   0.4729 0.6362613    
## factor(edu)No HS       0.05215241  0.05860446   0.8899 0.3735168    
## factor(edu)Some Col.   0.00039321  0.03348884   0.0117 0.9906319    
## factor(reg)Northeast  -0.07800398  0.04201566  -1.8565 0.0633758 .  
## factor(reg)South      -0.04011010  0.03078791  -1.3028 0.1926474    
## factor(reg)West       -0.08054408  0.03244942  -2.4821 0.0130595 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

We find significant differences in preference of cooking with stove top vs microwave use, differences among income groups, no difference in educational attainment, but some in regions.