library(car)
## Loading required package: carData
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(ggplot2)
library(pander)
## Warning: package 'pander' was built under R version 4.0.4
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(knitr)
load("C:/Users/Steve/DEM7283/rsconnect/documents/homework7/data.RData")
summary(data)
## STRATA PSU SAMPWEIGHT HEALTH
## Min. :100.0 Min. : 1.00 Min. : 652.1 Min. :1.000
## 1st Qu.:111.0 1st Qu.: 8.00 1st Qu.: 4074.0 1st Qu.:1.000
## Median :125.0 Median : 23.00 Median : 6493.9 Median :2.000
## Mean :125.4 Mean : 31.13 Mean : 7867.2 Mean :2.207
## 3rd Qu.:139.0 3rd Qu.: 47.00 3rd Qu.: 9646.1 3rd Qu.:3.000
## Max. :151.0 Max. :153.00 Max. :121704.8 Max. :9.000
## ASTHMAEV CHEARTDIEV DIABETICEV STROKEV
## Min. :1.000 Min. :0.0000 Min. :1.000 Min. :0.0000
## 1st Qu.:1.000 1st Qu.:1.0000 1st Qu.:1.000 1st Qu.:1.0000
## Median :1.000 Median :1.0000 Median :1.000 Median :1.0000
## Mean :1.138 Mean :0.8399 Mean :1.092 Mean :0.8143
## 3rd Qu.:1.000 3rd Qu.:1.0000 3rd Qu.:1.000 3rd Qu.:1.0000
## Max. :9.000 Max. :9.0000 Max. :9.000 Max. :9.0000
## SMOKFREQNOW
## Min. :0.0000
## 1st Qu.:0.0000
## Median :0.0000
## Mean :0.4851
## 3rd Qu.:1.0000
## Max. :8.0000
#recode variables
data$ast<-Recode(data$ASTHMAEV,
recodes="2=1; 1=0; else=NA")
#1 is good health while 0 is bad health
data$hlth<-Recode(data$HEALTH,
recodes="1=1; 2=1; 3=1; 4=0; 5=0; else=NA")
data$cohrt<-Recode(data$CHEARTDIEV,
recodes= "1=1; 2=0; else=NA")
data$dia<-Recode(data$DIABETICEV,
recodes= "1=1; 2=0; 3=0; else=NA")
data$strke<-Recode(data$STROKEV,
recodes= "1=1; 2=0; else=NA")
data$smke<-Recode(data$SMOKFREQNOW,
recodes= "1=1; 2=0; 3=0; else=NA")
data <- na.omit(data)
data1<-data%>%
filter(complete.cases(SAMPWEIGHT, STRATA, HEALTH, ASTHMAEV,CHEARTDIEV,DIABETICEV, STROKEV, SMOKFREQNOW, ast, hlth, cohrt, dia, strke, smke))%>%
select(SAMPWEIGHT, STRATA, HEALTH, ASTHMAEV,CHEARTDIEV,DIABETICEV, STROKEV, SMOKFREQNOW, ast, hlth, cohrt, dia, strke, smke)%>%
mutate_at(vars(ast, hlth, cohrt, dia, strke, smke), scale)
data.pc<-princomp(~hlth+ast+cohrt+dia+strke+smke,
data=data1,
scores=T)
#Screeplot
screeplot(data.pc,
type = "l",
main = "Scree Plot")
abline(h=1)

summary(data.pc)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## Standard deviation 1.254056 1.0455280 0.9839541 0.9347115 0.8864570
## Proportion of Variance 0.262131 0.1822030 0.1613742 0.1456262 0.1309784
## Cumulative Proportion 0.262131 0.4443341 0.6057082 0.7513344 0.8823128
## Comp.6
## Standard deviation 0.8402770
## Proportion of Variance 0.1176872
## Cumulative Proportion 1.0000000
loadings(data.pc )
##
## Loadings:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## hlth 0.540 0.308 0.122 0.769
## ast -0.168 -0.530 0.743 0.308 0.209
## cohrt 0.510 -0.233 0.771 -0.302
## dia 0.479 -0.144 -0.150 0.609 -0.491 -0.339
## strke 0.432 0.406 -0.675 -0.388 -0.202
## smke 0.739 0.504 0.252 0.114 -0.344
##
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## SS loadings 1.000 1.000 1.000 1.000 1.000 1.000
## Proportion Var 0.167 0.167 0.167 0.167 0.167 0.167
## Cumulative Var 0.167 0.333 0.500 0.667 0.833 1.000
#There were no eigenvalues that were all positive or all negative
scores<-data.frame(data.pc$scores)
hist(scores$Comp.1)

hist(scores$Comp.2)

cor(scores[,1:2])
## Comp.1 Comp.2
## Comp.1 1.000000e+00 -1.868843e-14
## Comp.2 -1.868843e-14 1.000000e+00
data2<-cbind(data1, scores)
names(data2)
## [1] "SAMPWEIGHT" "STRATA" "HEALTH" "ASTHMAEV" "CHEARTDIEV"
## [6] "DIABETICEV" "STROKEV" "SMOKFREQNOW" "ast" "hlth"
## [11] "cohrt" "dia" "strke" "smke" "Comp.1"
## [16] "Comp.2" "Comp.3" "Comp.4" "Comp.5" "Comp.6"
round(cor(data2[,c(-1:-8, -15:-20)]), 3)
## ast hlth cohrt dia strke smke
## ast 1.000 -0.121 -0.020 -0.038 -0.010 -0.031
## hlth -0.121 1.000 0.214 0.215 0.190 0.072
## cohrt -0.020 0.214 1.000 0.194 0.173 -0.086
## dia -0.038 0.215 0.194 1.000 0.112 -0.068
## strke -0.010 0.190 0.173 0.112 1.000 -0.016
## smke -0.031 0.072 -0.086 -0.068 -0.016 1.000
round(cor(data2[,c(-1:-8, -17:-20)]),3)
## ast hlth cohrt dia strke smke Comp.1 Comp.2
## ast 1.000 -0.121 -0.020 -0.038 -0.010 -0.031 -0.211 -0.554
## hlth -0.121 1.000 0.214 0.215 0.190 0.072 0.678 0.323
## cohrt -0.020 0.214 1.000 0.194 0.173 -0.086 0.639 -0.244
## dia -0.038 0.215 0.194 1.000 0.112 -0.068 0.600 -0.150
## strke -0.010 0.190 0.173 0.112 1.000 -0.016 0.542 -0.044
## smke -0.031 0.072 -0.086 -0.068 -0.016 1.000 -0.084 0.773
## Comp.1 -0.211 0.678 0.639 0.600 0.542 -0.084 1.000 0.000
## Comp.2 -0.554 0.323 -0.244 -0.150 -0.044 0.773 0.000 1.000
#The results show negative correlation more than likely due to health beahavior coded as 0
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1,
strata=~STRATA,
weights=~SAMPWEIGHT,
data = data2)
library(ggplot2)
ggplot(aes(x=hlth, y=Comp.1),
data=data2)+
geom_boxplot()
## Warning: Continuous x aesthetic -- did you forget aes(group=...)?

ggplot(aes(x=HEALTH, y=Comp.1),
data=data2)+
geom_boxplot()
## Don't know how to automatically pick scale for object of type haven_labelled/vctrs_vctr/integer. Defaulting to continuous.
## Warning: Continuous x aesthetic -- did you forget aes(group=...)?

fit.1<-svyglm(Comp.1~hlth+smke+cohrt,
des,
family=gaussian)
summary(fit.1)
##
## Call:
## svyglm(formula = Comp.1 ~ hlth + smke + cohrt, design = des,
## family = gaussian)
##
## Survey design:
## svydesign(ids = ~1, strata = ~STRATA, weights = ~SAMPWEIGHT,
## data = data2)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.014545 0.007067 2.058 0.0396 *
## hlth 0.716224 0.009434 75.920 <2e-16 ***
## smke -0.105647 0.006564 -16.095 <2e-16 ***
## cohrt 0.645263 0.011439 56.410 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.4041208)
##
## Number of Fisher Scoring iterations: 2