Loading possible libraries and the RECS file

library(haven)
library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(broom)
library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.1. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(questionr)

#importing RECS 2015 data
recs2015v2<-read.csv("https://raw.githubusercontent.com/demograf/_Thesis/b9101bf3e4a94c9437557c6a7361e380384ce365/recs2015_public_v2.csv")

Creating a database that only contains elements I am intersted in (only theorectical, all households had both, oven and microwave, and answered the eductaional attainment question):

myrecs<-subset(recs2015v2,ZMICRO=1) #has a microwave
myrecs<-subset(myrecs,ZOVEN=1) #has an oven
myrecs<-subset(myrecs,ZEDUCATION=1) #eductaion question answered
myrecs<-subset(recs2015v2,ZSTOVE=1) #has a stove
myrecs<-subset(myrecs,ZOVEN=1) #has an oven

Recoding missing values as zeros for calcuation of cooking methods (-2 indicates “NA”, thus it is effectively a not used case, or 0)

myrecs$COOKTUSE<-recode(myrecs$COOKTUSE, recodes = "-2:0=0")
myrecs$OVENUSE<-recode(myrecs$OVENUSE, recodes = "-2:0=0")
myrecs$SEPCOOKTUSE<-recode(myrecs$SEPCOOKTUSE, recodes = "-2:0=0")
myrecs$SEPOVENUSE<-recode(myrecs$SEPOVENUSE, recodes = "-2:0=0")
myrecs$AMTMICRO<-recode(myrecs$AMTMICRO, recodes = "-2:0=0")

Calculating the value to be used for binary indicator

myrecs$mycooks<-myrecs$COOKTUSE
myrecs$mycooks<-myrecs$mycooks+myrecs$OVENUSE
myrecs$mycooks<-myrecs$mycooks+myrecs$SEPCOOKTUSE
myrecs$mycooks<-myrecs$mycooks+myrecs$SEPOVENUSE
myrecs$mycooks<-myrecs$mycooks-myrecs$AMTMICRO

Recoding previous outcomes as binary value for preferred cooking method

myrecs$mycooks<-recode(myrecs$mycooks, recodes="0:396=1;-99:-1=0")
## 1 = prefers cooking with stove / stovetop
## 0 = uses microwave at least as often

Recoding educational attainment level

myrecs$myedu<-recode(myrecs$EDUCATION,recodes="1='1NoHS';2='2HighSchool';3='3SomeCollege';4='4Bachelors';5='5MastersPhD'", as.factor.result=T)

Research Question:

Do households with a different maximum educational attainment level differ in the preference of oven or stove top use versus the use of microwave ovens?

To test this question, the RECS 2015 (https://www.eia.gov/consumption/residential/about.php) was used.

Simple Table non-weighted, frequency

The raw frequency of answers:

table(myrecs$myedu,myrecs$mycooks)
##               
##                   0    1
##   1NoHS         146  259
##   2HighSchool   627  717
##   3SomeCollege  880 1015
##   4Bachelors    514  671
##   5MastersPhD   402  455

Simple Table non-weighted, percentages:

prop.table(table(myrecs$mycooks,myrecs$myedu), margin =2)
##    
##         1NoHS 2HighSchool 3SomeCollege 4Bachelors 5MastersPhD
##   0 0.3604938   0.4665179    0.4643799  0.4337553   0.4690782
##   1 0.6395062   0.5334821    0.5356201  0.5662447   0.5309218

First, analysis without weight (NWEIGHT) via Pearson’a Chi Square:

chisq.test(table(myrecs$myedu,myrecs$mycooks))
## 
##  Pearson's Chi-squared test
## 
## data:  table(myrecs$myedu, myrecs$mycooks)
## X-squared = 18.609, df = 4, p-value = 0.0009377

Now, with simple weights:

cat<-wtd.table(myrecs$mycooks,myrecs$myedu, weights=myrecs$NWEIGHT)
prop.table(wtd.table(myrecs$mycooks,myrecs$myedu, weights = myrecs$NWEIGHT), margin=2)
##       1NoHS 2HighSchool 3SomeCollege 4Bachelors 5MastersPhD
## 0 0.3854068   0.4584732    0.4638190  0.4277120   0.4661823
## 1 0.6145932   0.5415268    0.5361810  0.5722880   0.5338177

Calculating n and p for Standard Errors: Note: The table’sStandard Errors are incorrect because they represent the whole population. The correct errors are in the Survey Design part.

n<-table(is.na(myrecs$myedu)==F)
p<-prop.table(wtd.table(myrecs$mycooks,myrecs$myedu, weights = myrecs$NWEIGHT))
se<-sqrt((p*(1-p))/n[1])
data.frame(proportion=p, se=se)
##    proportion.Var1 proportion.Var2 proportion.Freq se.Var1      se.Var2
## 1                0           1NoHS      0.02899922       0        1NoHS
## 2                1           1NoHS      0.04624392       1        1NoHS
## 3                0     2HighSchool      0.11225346       0  2HighSchool
## 4                1     2HighSchool      0.13258848       1  2HighSchool
## 5                0    3SomeCollege      0.15260225       0 3SomeCollege
## 6                1    3SomeCollege      0.17641023       1 3SomeCollege
## 7                0      4Bachelors      0.08907532       0   4Bachelors
## 8                1      4Bachelors      0.11918471       1   4Bachelors
## 9                0     5MastersPhD      0.06649736       0  5MastersPhD
## 10               1     5MastersPhD      0.07614504       1  5MastersPhD
##        se.Freq
## 1  0.002225356
## 2  0.002785111
## 3  0.004186402
## 4  0.004497406
## 5  0.004768927
## 6  0.005054916
## 7  0.003777605
## 8  0.004296841
## 9  0.003304126
## 10 0.003517380

Survey Design

options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1,strada=~ststr, weights=~myrecs$NWEIGHT, data = myrecs[is.na(myrecs$myedu)==F,])
cat<-svytable(~myrecs$mycooks+myrecs$myedu, design = des)
##prop.table(svytable(~myrecs$mycooks+myrecs$myedu, design = des), margin = 2)
sv.table<-svyby(formula = ~mycooks, by = ~myedu, design = des, FUN = svymean, na.rm=T)
sv.table
##                     myedu   mycooks         se
## 1NoHS               1NoHS 0.6145932 0.02795179
## 2HighSchool   2HighSchool 0.5415268 0.01548395
## 3SomeCollege 3SomeCollege 0.5361810 0.01318545
## 4Bachelors     4Bachelors 0.5722880 0.01639110
## 5MastersPhD   5MastersPhD 0.5338177 0.01867883

Conclusion

The distribution of use of stove tops or ovens used at least as often as microwaves is the following:

61.46% (+/-2.80%) of households without high school diploma 54.15% (+/-1.55%) of households high school diploma 53.62% (+/-1.32%) of households with at some college or Associates degree 57.22% (+/-1.64%) of households with at a Bachelors degree 53.38% (+/-1.87%) of households with a Masters degree or doctorates

Households without high school diploma and households with a Bachelors degree as highest attainmant use microwaves significantly less over ovens and stove tops compared to the rest of the population.