library(haven)
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(broom)
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.1. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(questionr)
library(pander)
#importing RECS 2015 data
recs2015v2<-read.csv("C:/Users/tobik/Downloads/recs2015_public_v2.csv")
Creating a subset without missing values:
myrecs3<-subset(recs2015v2,ZMICRO=1) #has a microwave
myrecs3<-subset(myrecs3,ZOVEN=1) #has an oven
myrecs3<-subset(myrecs3,PROTHERM>-1) # has programmable thermostat question answered / appliccable
myrecs3<-subset(myrecs3,ZSTOVE=1) #has a stove
myrecs3<-subset(myrecs3,EQUIPAGE>0) #has an heater
myrecs3<-subset(myrecs3,AGECENAC>0) #has an AC question answered / appliccable
myrecs3<-subset(myrecs3,ZLGTINLED=1) #answered LED light bulb question
Recoding missing values as zeros for calcuation of cooking methods (-2 indicates “NA”, thus it is effectively a not used case, or 0)
myrecs3$COOKTUSE<-recode(myrecs3$COOKTUSE, recodes = "-2:0=0")
myrecs3$OVENUSE<-recode(myrecs3$OVENUSE, recodes = "-2:0=0")
myrecs3$SEPCOOKTUSE<-recode(myrecs3$SEPCOOKTUSE, recodes = "-2:0=0")
myrecs3$SEPOVENUSE<-recode(myrecs3$SEPOVENUSE, recodes = "-2:0=0")
myrecs3$AMTMICRO<-recode(myrecs3$AMTMICRO, recodes = "-2:0=0")
Calculating the value to be used for binary indicator
Microwave use over Stove/Cook-top use:
myrecs3$mycooks<-myrecs3$COOKTUSE
myrecs3$mycooks<-myrecs3$mycooks+myrecs3$OVENUSE
myrecs3$mycooks<-myrecs3$mycooks+myrecs3$SEPCOOKTUSE
myrecs3$mycooks<-myrecs3$mycooks+myrecs3$SEPOVENUSE
myrecs3$mycooks<-myrecs3$mycooks-myrecs3$AMTMICRO
myrecs3$mycooks<-recode(myrecs3$mycooks, recodes="0:396=1;-99:-1=0")
myrecs3$mycooks<-recode(myrecs3$mycooks, recodes="1=2;-99:0=3") #flipping 1
myrecs3$mycooks<-recode(myrecs3$mycooks, recodes="2=0;-99:3=1") #flipping 2
## 0 = prefers cooking with stove / stovetop
## 1 = uses microwave at least as often
Age of heating equipment:
myrecs3$heating<-myrecs3$EQUIPAGE
myrecs3$heating<-recode(myrecs3$heating, recodes="1:3=1;5:42=0")
## 0 = 10 years and older equipment
## 1 = less than 9 years old = 1
Programmable Thermostat for heating: Thermost is coded as PROTHERM = 1 for having one and = 0 for not having one
Age of AC equipment:
myrecs3$cooling<-myrecs3$AGECENAC
myrecs3$cooling<-recode(myrecs3$cooling, recodes="1:3=1;5:42=0")
## 0 = 10 years and older equipment
## 1 = less than 9 years old = 1
Uses at least most LED light fixtures:
myrecs3$LEDbulbs<-myrecs3$LGTINLED
myrecs3$LEDbulbs<-recode(myrecs3$LEDbulbs, recodes="1:2=1;3:4=0")
## 0 = about half to none
## 1 = Most or All LED bubls
The variables are: mycooks heating PROTHERM cooling LEDbulbs
The scores do not need to be standardized because they are all binary.
myrecs3.pc<-prcomp(~LEDbulbs+PROTHERM+heating+cooling+mycooks,data=myrecs3, center=T,retx=T)
summary(myrecs3.pc)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5
## Standard deviation 0.6513 0.4995 0.4691 0.2819 0.21944
## Proportion of Variance 0.4153 0.2443 0.2154 0.0778 0.04715
## Cumulative Proportion 0.4153 0.6596 0.8751 0.9528 1.00000
myrecs3.pc$rotation
## PC1 PC2 PC3 PC4 PC5
## LEDbulbs 0.0007640987 -0.004936221 0.01662451 0.0428534040 0.998930559
## PROTHERM 0.1665177163 0.072674156 0.98319585 -0.0082016587 -0.015779058
## heating 0.6989498882 -0.017298808 -0.12246090 -0.7036861672 0.031605543
## cooling 0.6954152884 -0.016846168 -0.11108560 0.7091697657 -0.029189333
## mycooks -0.0117429476 -0.997051182 0.07558354 -0.0005831635 -0.006150819
hist(myrecs3.pc$x[,1])
hist(myrecs3.pc$x[,2])
Correlations between the first 2 components:
cor(myrecs3.pc$x[,1:2])
## PC1 PC2
## PC1 1.00000e+00 -2.47691e-15
## PC2 -2.47691e-15 1.00000e+00
scores<-data.frame(myrecs3.pc$x)
scores$name<-rownames(myrecs3.pc$x)
myrecs3$name<-rownames(myrecs3)
myrecs3<-merge(myrecs3, scores, by.x="name", by.y="name", all.x=F)
tail(names(myrecs3), 20)
## [1] "BRRWT86" "BRRWT87" "BRRWT88" "BRRWT89" "BRRWT90" "BRRWT91"
## [7] "BRRWT92" "BRRWT93" "BRRWT94" "BRRWT95" "BRRWT96" "mycooks"
## [13] "heating" "cooling" "LEDbulbs" "PC1" "PC2" "PC3"
## [19] "PC4" "PC5"
Correlations: (LEDbulbs PROTHERM mycooks heating cooling )
round(cor(myrecs3[,c("LEDbulbs","PROTHERM","heating","cooling","mycooks")], method = "spearman"), 3)
## LEDbulbs PROTHERM heating cooling mycooks
## LEDbulbs 1.000 0.027 -0.010 0.008 0.011
## PROTHERM 0.027 1.000 0.097 0.103 -0.011
## heating -0.010 0.097 1.000 0.681 -0.005
## cooling 0.008 0.103 0.681 1.000 -0.005
## mycooks 0.011 -0.011 -0.005 -0.005 1.000
This exercise can only be seen as an programming task because that is most of what is left. The only strong correlation is newer Central Heat and newer Central AC, which understandably correlated since they are - when new - most of the time the same unit. The concept was that people who are eco-friendly would have LED light bulbs, a programmable thermostat (which I found out is always attached to the hearter - with the exception of one case where it is only for the AC), have a newer heater or AC (less than 10 years old) and prefer the microwave over oven/stove use to save energy by cooking once and heating up withv more energy efficient microwaves.