#Load packages
library(car)
## Loading required package: carData
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(ggplot2)
library(pander)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(knitr)
library(haven)
#Load BRFSS
BRFSS19<-read_xpt("C:/Users/adolp/Desktop/Statistics 2/BRFSS/LLCP2019.XPT")
#Fix BRFSS names
nams<-names(BRFSS19)
head(BRFSS19, n=10)
## # A tibble: 10 x 342
## `_STATE` FMONTH IDATE IMONTH IDAY IYEAR DISPCODE SEQNO `_PSU` CTELENM1
## <dbl> <dbl> <chr> <chr> <chr> <chr> <dbl> <chr> <dbl> <dbl>
## 1 1 1 011820~ 01 18 2019 1100 201900~ 2.02e9 1
## 2 1 1 011320~ 01 13 2019 1100 201900~ 2.02e9 1
## 3 1 1 011820~ 01 18 2019 1100 201900~ 2.02e9 1
## 4 1 1 011820~ 01 18 2019 1200 201900~ 2.02e9 1
## 5 1 1 010420~ 01 04 2019 1100 201900~ 2.02e9 1
## 6 1 1 011820~ 01 18 2019 1200 201900~ 2.02e9 1
## 7 1 1 010420~ 01 04 2019 1100 201900~ 2.02e9 1
## 8 1 1 012320~ 01 23 2019 1100 201900~ 2.02e9 1
## 9 1 1 012420~ 01 24 2019 1100 201900~ 2.02e9 1
## 10 1 1 011320~ 01 13 2019 1100 201900~ 2.02e9 1
## # ... with 332 more variables: PVTRESD1 <dbl>, COLGHOUS <dbl>, STATERE1 <dbl>,
## # CELPHONE <dbl>, LADULT1 <dbl>, COLGSEX <dbl>, NUMADULT <dbl>,
## # LANDSEX <dbl>, NUMMEN <dbl>, NUMWOMEN <dbl>, RESPSLCT <dbl>,
## # SAFETIME <dbl>, CTELNUM1 <dbl>, CELLFON5 <dbl>, CADULT1 <dbl>,
## # CELLSEX <dbl>, PVTRESD3 <dbl>, CCLGHOUS <dbl>, CSTATE1 <dbl>,
## # LANDLINE <dbl>, HHADULT <dbl>, SEXVAR <dbl>, GENHLTH <dbl>, PHYSHLTH <dbl>,
## # MENTHLTH <dbl>, POORHLTH <dbl>, HLTHPLN1 <dbl>, PERSDOC2 <dbl>,
## # MEDCOST <dbl>, CHECKUP1 <dbl>, BPHIGH4 <dbl>, BPMEDS <dbl>, CHOLCHK2 <dbl>,
## # TOLDHI2 <dbl>, CHOLMED2 <dbl>, CVDINFR4 <dbl>, CVDCRHD4 <dbl>,
## # CVDSTRK3 <dbl>, ASTHMA3 <dbl>, ASTHNOW <dbl>, CHCSCNCR <dbl>,
## # CHCOCNCR <dbl>, CHCCOPD2 <dbl>, ADDEPEV3 <dbl>, CHCKDNY2 <dbl>,
## # DIABETE4 <dbl>, DIABAGE3 <dbl>, HAVARTH4 <dbl>, ARTHEXER <dbl>,
## # ARTHEDU <dbl>, LMTJOIN3 <dbl>, ARTHDIS2 <dbl>, JOINPAI2 <dbl>,
## # MARITAL <dbl>, EDUCA <dbl>, RENTHOM1 <dbl>, NUMHHOL3 <dbl>, NUMPHON3 <dbl>,
## # CPDEMO1B <dbl>, VETERAN3 <dbl>, EMPLOY1 <dbl>, CHILDREN <dbl>,
## # INCOME2 <dbl>, WEIGHT2 <dbl>, HEIGHT3 <dbl>, PREGNANT <dbl>, DEAF <dbl>,
## # BLIND <dbl>, DECIDE <dbl>, DIFFWALK <dbl>, DIFFDRES <dbl>, DIFFALON <dbl>,
## # SMOKE100 <dbl>, SMOKDAY2 <dbl>, STOPSMK2 <dbl>, LASTSMK2 <dbl>,
## # USENOW3 <dbl>, ALCDAY5 <dbl>, AVEDRNK3 <dbl>, DRNK3GE5 <dbl>,
## # MAXDRNKS <dbl>, EXERANY2 <dbl>, EXRACT11 <dbl>, EXEROFT1 <dbl>,
## # EXERHMM1 <dbl>, EXRACT21 <dbl>, EXEROFT2 <dbl>, EXERHMM2 <dbl>,
## # STRENGTH <dbl>, FRUIT2 <dbl>, FRUITJU2 <dbl>, FVGREEN1 <dbl>,
## # FRENCHF1 <dbl>, POTATOE1 <dbl>, VEGETAB2 <dbl>, FLUSHOT7 <dbl>,
## # FLSHTMY3 <dbl>, TETANUS1 <dbl>, PNEUVAC4 <dbl>, HIVTST7 <dbl>, ...
newnames<-tolower(gsub(pattern = "_",replacement = "",x = nams))
names(BRFSS19)<-newnames
#Recode variables
BRFSS19$Hrt<-Recode(BRFSS19$cvdinfr4, recodes ="7:9=NA; 1=1;2=0")
BRFSS19$Strk<-Recode(BRFSS19$cvdstrk3, recodes ="7:9=NA; 1=1;2=0")
BRFSS19$Cncr<-Recode(BRFSS19$chcocncr, recodes ="7:9=NA; 1=1;2=0")
BRFSS19$Air<-Recode(BRFSS19$chccopd2, recodes ="7:9=NA; 1=1;2=0")
BRFSS19$Dep<-Recode(BRFSS19$addepev3, recodes ="7:9=NA; 1=1;2=0")
BRFSS19$Kdny<-Recode(BRFSS19$chckdny2, recodes ="7:9=NA; 1=1;2=0")
BRFSS19$Gen<-Recode(BRFSS19$birthsex, recodes = "7:9=NA; 1=1;2=0")
BRFSS19$Hispanic<-Recode(BRFSS19$racegr3, recodes="5=1; 9=NA; else=0")
#SUbset and standardize variables
BRFSS19b<-BRFSS19%>%
filter(complete.cases(llcpwt, ststr, Gen, Hispanic,Hrt, Strk, Cncr, Air, Dep, Kdny))%>%
select(llcpwt, ststr, Gen, Hispanic,Hrt, Strk, Cncr, Air, Dep, Kdny)%>%
mutate_at(vars(Hrt, Strk, Cncr, Air, Dep, Kdny), scale)
#Summary statistics of scaled variables
summary(BRFSS19b)
## llcpwt ststr Gen Hispanic
## Min. : 1.82 Min. :151011 Min. :0.0000 Min. :0.00000
## 1st Qu.: 113.17 1st Qu.:272049 1st Qu.:0.0000 1st Qu.:0.00000
## Median : 250.13 Median :361151 Median :0.0000 Median :0.00000
## Mean : 544.26 Mean :348506 Mean :0.4694 Mean :0.06503
## 3rd Qu.: 586.04 3rd Qu.:491081 3rd Qu.:1.0000 3rd Qu.:0.00000
## Max. :22131.79 Max. :502109 Max. :1.0000 Max. :1.00000
## Hrt.V1 Strk.V1 Cncr.V1
## Min. :-0.226653 Min. :-0.195770 Min. :-0.315724
## 1st Qu.:-0.226653 1st Qu.:-0.195770 1st Qu.:-0.315724
## Median :-0.226653 Median :-0.195770 Median :-0.315724
## Mean : 0.000000 Mean : 0.000000 Mean : 0.000000
## 3rd Qu.:-0.226653 3rd Qu.:-0.195770 3rd Qu.:-0.315724
## Max. : 4.411967 Max. : 5.107943 Max. : 3.167270
## Air.V1 Dep.V1 Kdny.V1
## Min. :-0.267777 Min. :-0.4860826 Min. :-0.185114
## 1st Qu.:-0.267777 1st Qu.:-0.4860826 1st Qu.:-0.185114
## Median :-0.267777 Median :-0.4860826 Median :-0.185114
## Mean : 0.000000 Mean : 0.0000000 Mean : 0.000000
## 3rd Qu.:-0.267777 3rd Qu.:-0.4860826 3rd Qu.:-0.185114
## Max. : 3.734393 Max. : 2.0572303 Max. : 5.401997
#PCA
brfss.pc<-princomp(~Hrt+ Strk+ Cncr+ Air+ Dep+ Kdny,
data=BRFSS19b,
scores=T)
#Screeplot
screeplot(brfss.pc,
type = "l",
main = "Scree Plot")
abline(h=1)
Six chronic health variables were selected to create a chronic health index.
#Summary statistics of components
summary(brfss.pc)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## Standard deviation 1.2045106 1.0114657 0.9871635 0.9535046 0.9225369
## Proportion of Variance 0.2418115 0.1705132 0.1624179 0.1515310 0.1418480
## Cumulative Proportion 0.2418115 0.4123247 0.5747427 0.7262736 0.8681216
## Comp.6
## Standard deviation 0.8895266
## Proportion of Variance 0.1318784
## Cumulative Proportion 1.0000000
The first component accounts for 24% of the total variation.
loadings(brfss.pc )
##
## Loadings:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## Hrt 0.493 0.317 0.353 0.190 0.703
## Strk 0.469 0.253 0.419 0.141 -0.493 -0.527
## Cncr 0.317 0.180 -0.743 0.518 -0.193
## Air 0.467 -0.372 0.178 0.694 -0.360
## Dep 0.267 -0.802 -0.449 0.283
## Kdny 0.384 0.146 -0.382 -0.822
##
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## SS loadings 1.000 1.000 1.000 1.000 1.000 1.000
## Proportion Var 0.167 0.167 0.167 0.167 0.167 0.167
## Cumulative Var 0.167 0.333 0.500 0.667 0.833 1.000
#plot the first 2 components
scores<-data.frame(brfss.pc$scores)
hist(scores$Comp.1)
hist(scores$Comp.2)
cor(scores[,1:2])
## Comp.1 Comp.2
## Comp.1 1.000000e+00 1.421011e-14
## Comp.2 1.421011e-14 1.000000e+00
BRFSS19c<-cbind(BRFSS19b, scores)
names(BRFSS19c)
## [1] "llcpwt" "ststr" "Gen" "Hispanic" "Hrt" "Strk"
## [7] "Cncr" "Air" "Dep" "Kdny" "Comp.1" "Comp.2"
## [13] "Comp.3" "Comp.4" "Comp.5" "Comp.6"
names(BRFSS19c)
## [1] "llcpwt" "ststr" "Gen" "Hispanic" "Hrt" "Strk"
## [7] "Cncr" "Air" "Dep" "Kdny" "Comp.1" "Comp.2"
## [13] "Comp.3" "Comp.4" "Comp.5" "Comp.6"
round(cor(BRFSS19c[,c(-1:-4, -11:-17)]), 3)
## Hrt Strk Cncr Air Dep Kdny
## Hrt 1.000 0.193 0.068 0.134 0.025 0.107
## Strk 0.193 1.000 0.066 0.106 0.050 0.085
## Cncr 0.068 0.066 1.000 0.084 0.019 0.088
## Air 0.134 0.106 0.084 1.000 0.132 0.087
## Dep 0.025 0.050 0.019 0.132 1.000 0.044
## Kdny 0.107 0.085 0.088 0.087 0.044 1.000
round(cor(BRFSS19c[,c(-1:-5, -13:-17)]),3)
## Strk Cncr Air Dep Kdny Comp.1 Comp.2
## Strk 1.000 0.066 0.106 0.050 0.085 0.564 0.256
## Cncr 0.066 1.000 0.084 0.019 0.088 0.381 0.182
## Air 0.106 0.084 1.000 0.132 0.087 0.563 -0.376
## Dep 0.050 0.019 0.132 1.000 0.044 0.321 -0.811
## Kdny 0.085 0.088 0.087 0.044 1.000 0.463 0.148
## Comp.1 0.564 0.381 0.563 0.321 0.463 1.000 0.000
## Comp.2 0.256 0.182 -0.376 -0.811 0.148 0.000 1.000
#Survey design object
options(survey.lonely.psu = "adjust")
BRFSS19c$pc1q<-cut(BRFSS19c$Comp.1,
breaks = unique(quantile(BRFSS19c$Comp.1,
probs = c(0,.25,.5,.75,1) ,
na.rm=T),
include.lowest=T))
des<-svydesign(ids=~1,
strata=~ststr,
weights=~llcpwt,
data=BRFSS19c)
fit.1<-svyglm(Comp.1~Gen+Hispanic,
des,
family=gaussian)
summary(fit.1)
##
## Call:
## svyglm(formula = Comp.1 ~ Gen + Hispanic, design = des, family = gaussian)
##
## Survey design:
## svydesign(ids = ~1, strata = ~ststr, weights = ~llcpwt, data = BRFSS19c)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.03645 0.01064 -3.424 0.000617 ***
## Gen -0.06012 0.01536 -3.915 9.04e-05 ***
## Hispanic -0.15000 0.02435 -6.160 7.31e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 1.265712)
##
## Number of Fisher Scoring iterations: 2
library(ggplot2)
ggplot(aes(as.factor(Hispanic), y=Comp.1),
data=BRFSS19c)+
geom_boxplot()
library(ggplot2)
ggplot(aes(as.factor(Gen), y=Comp.1),
data=BRFSS19c)+
geom_boxplot()
Six chronic health variables were selected (heart attack, stroke, cancer, COPD, depression, and Asthma) to create a chronic health index. I hypothesized that combined, these six variables had an underlying latent construct of chronic health disease or a chronic health index. The first component had positive loadings, accounting for 24% of the total variation with higher values on the component equating to chronic health issues. Component 1 was then analyzed across two factors: Gender (male = 1, women = 0) and Hispanic (Hispanic = 1, Non-Hispanic = 0). Results showed that the more respondents identified as a male, the less likely of chronic health diseases they had. Also, the more respondents identified as a Hispanics, the less likely of chronic health diseases they had.