Loading possible libraries and the RECS file
library(haven)
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(broom)
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.1. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(questionr)
#importing RECS 2015 data
recs2015v2<-read.csv("https://raw.githubusercontent.com/demograf/_Thesis/b9101bf3e4a94c9437557c6a7361e380384ce365/recs2015_public_v2.csv")
Creating a database that only contains elements I am intersted in (only theorectical, all households had both, oven and microwave, and answered the eductaional attainment question):
myrecs<-subset(recs2015v2,ZMICRO=1) #has a microwave
myrecs<-subset(myrecs,ZOVEN=1) #has an oven
myrecs<-subset(myrecs,ZEDUCATION=1) #eductaion question answered
myrecs<-subset(recs2015v2,ZSTOVE=1) #has a stove
myrecs<-subset(myrecs,ZOVEN=1) #has an oven
Recoding missing values as zeros for calcuation of cooking methods (-2 indicates “NA”, thus it is effectively a not used case, or 0)
myrecs$COOKTUSE<-recode(myrecs$COOKTUSE, recodes = "-2:0=0")
myrecs$OVENUSE<-recode(myrecs$OVENUSE, recodes = "-2:0=0")
myrecs$SEPCOOKTUSE<-recode(myrecs$SEPCOOKTUSE, recodes = "-2:0=0")
myrecs$SEPOVENUSE<-recode(myrecs$SEPOVENUSE, recodes = "-2:0=0")
myrecs$AMTMICRO<-recode(myrecs$AMTMICRO, recodes = "-2:0=0")
Calculating the value to be used for binary indicator
myrecs$mycooks<-myrecs$COOKTUSE
myrecs$mycooks<-myrecs$mycooks+myrecs$OVENUSE
myrecs$mycooks<-myrecs$mycooks+myrecs$SEPCOOKTUSE
myrecs$mycooks<-myrecs$mycooks+myrecs$SEPOVENUSE
myrecs$mycooks<-myrecs$mycooks-myrecs$AMTMICRO
Recoding previous outcomes as binary value for preferred cooking method
myrecs$mycooks<-recode(myrecs$mycooks, recodes="0:396=1;-99:-1=0")
## 1 = prefers cooking with stove / stovetop
## 0 = uses microwave at least as often
Recoding educational attainment level
myrecs$myedu<-recode(myrecs$EDUCATION,recodes="1='1NoHS';2='2HighSchool';3='3SomeCollege';4='4Bachelors';5='5MastersPhD'", as.factor.result=T)
To test this question, the RECS 2015 (https://www.eia.gov/consumption/residential/about.php) was used.
The raw frequency of answers:
table(myrecs$myedu,myrecs$mycooks)
##
## 0 1
## 1NoHS 146 259
## 2HighSchool 627 717
## 3SomeCollege 880 1015
## 4Bachelors 514 671
## 5MastersPhD 402 455
Simple Table non-weighted, percentages:
prop.table(table(myrecs$mycooks,myrecs$myedu), margin =2)
##
## 1NoHS 2HighSchool 3SomeCollege 4Bachelors 5MastersPhD
## 0 0.3604938 0.4665179 0.4643799 0.4337553 0.4690782
## 1 0.6395062 0.5334821 0.5356201 0.5662447 0.5309218
First, analysis without weight (NWEIGHT) via Pearson’a Chi Square:
chisq.test(table(myrecs$myedu,myrecs$mycooks))
##
## Pearson's Chi-squared test
##
## data: table(myrecs$myedu, myrecs$mycooks)
## X-squared = 18.609, df = 4, p-value = 0.0009377
Now, with simple weights:
cat<-wtd.table(myrecs$mycooks,myrecs$myedu, weights=myrecs$NWEIGHT)
prop.table(wtd.table(myrecs$mycooks,myrecs$myedu, weights = myrecs$NWEIGHT), margin=2)
## 1NoHS 2HighSchool 3SomeCollege 4Bachelors 5MastersPhD
## 0 0.3854068 0.4584732 0.4638190 0.4277120 0.4661823
## 1 0.6145932 0.5415268 0.5361810 0.5722880 0.5338177
Calculating n and p for Standard Errors: Note: The table’sStandard Errors are incorrect because they represent the whole population. The correct errors are in the Survey Design part.
n<-table(is.na(myrecs$myedu)==F)
p<-prop.table(wtd.table(myrecs$mycooks,myrecs$myedu, weights = myrecs$NWEIGHT))
se<-sqrt((p*(1-p))/n[1])
data.frame(proportion=p, se=se)
## proportion.Var1 proportion.Var2 proportion.Freq se.Var1 se.Var2
## 1 0 1NoHS 0.02899922 0 1NoHS
## 2 1 1NoHS 0.04624392 1 1NoHS
## 3 0 2HighSchool 0.11225346 0 2HighSchool
## 4 1 2HighSchool 0.13258848 1 2HighSchool
## 5 0 3SomeCollege 0.15260225 0 3SomeCollege
## 6 1 3SomeCollege 0.17641023 1 3SomeCollege
## 7 0 4Bachelors 0.08907532 0 4Bachelors
## 8 1 4Bachelors 0.11918471 1 4Bachelors
## 9 0 5MastersPhD 0.06649736 0 5MastersPhD
## 10 1 5MastersPhD 0.07614504 1 5MastersPhD
## se.Freq
## 1 0.002225356
## 2 0.002785111
## 3 0.004186402
## 4 0.004497406
## 5 0.004768927
## 6 0.005054916
## 7 0.003777605
## 8 0.004296841
## 9 0.003304126
## 10 0.003517380
Survey Design
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1,strada=~ststr, weights=~myrecs$NWEIGHT, data = myrecs[is.na(myrecs$myedu)==F,])
cat<-svytable(~myrecs$mycooks+myrecs$myedu, design = des)
##prop.table(svytable(~myrecs$mycooks+myrecs$myedu, design = des), margin = 2)
sv.table<-svyby(formula = ~mycooks, by = ~myedu, design = des, FUN = svymean, na.rm=T)
sv.table
## myedu mycooks se
## 1NoHS 1NoHS 0.6145932 0.02795179
## 2HighSchool 2HighSchool 0.5415268 0.01548395
## 3SomeCollege 3SomeCollege 0.5361810 0.01318545
## 4Bachelors 4Bachelors 0.5722880 0.01639110
## 5MastersPhD 5MastersPhD 0.5338177 0.01867883
The distribution of use of stove tops or ovens used at least as often as microwaves is the following:
61.46% (+/-2.80%) of households without high school diploma 54.15% (+/-1.55%) of households high school diploma 53.62% (+/-1.32%) of households with at some college or Associates degree 57.22% (+/-1.64%) of households with at a Bachelors degree 53.38% (+/-1.87%) of households with a Masters degree or doctorates
Households without high school diploma and households with a Bachelors degree as highest attainmant use microwaves significantly less over ovens and stove tops compared to the rest of the population.