I will be using the BRFSS data and looking at several varaibles and how they relate to number of healthy days. These variables are; binge drinking, smoking, drinking and driving, seat belt useage, sleep per night, and exercise. I think some of these will have stronger assocaitions to healthy days. I think smoking, binge drinking, and sleep per night will be the strongest.

library(car)
library(stargazer)
library(survey)
library(ggplot2)
library(pander)
library(dplyr)
library(knitr)
library(factoextra)
library(FactoMineR)
brfss20<- readRDS(url("https://github.com/coreysparks/DEM7283/blob/master/data/brfss20sm.rds?raw=true"))
names(brfss20) <- tolower(gsub(pattern = "_",replacement =  "",x =  names(brfss20)))
#binge drinking
brfss20$bingedrink <- Recode(brfss20$drnk3ge5, recodes = "88=0; 77=NA; 99=NA")

#smoking
brfss20$smoke <- Recode(brfss20$smoke100, recodes = "2=0; 7=NA; 9=NA")

#drink and drive
brfss20$drinkdrive <- Recode(brfss20$drnkdri2, recodes = "88=0; 77=NA; 99=NA")

#sealt belt useage
brfss20$seatbeltdrive <- Recode(brfss20$seatbelt, recodes = "1:2 = 1; 3:5 = 0; 7=NA; 8  = NA; 9=NA")

#sleep per night
brfss20$sleep <- Recode(brfss20$sleptim1, recodes = "77=NA; 99=NA")

#exercise
brfss20$exercise <- Recode(brfss20$exerany2, recodes = "2=0; 7=NA; 9=NA")

#healthy days
brfss20$healthdays<-Recode(brfss20$physhlth, recodes = "88=0; 77=NA; 99=NA")
brfss20a <- brfss20 %>%
  filter(complete.cases(bingedrink, smoke, drinkdrive, seatbeltdrive, sleep, exercise, healthdays)) %>%
  select(healthdays, bingedrink, smoke, drinkdrive, seatbeltdrive, sleep, exercise)

samps <- sample(1:dim(brfss20a)[1], size=10000, replace = FALSE)
brfss20a <- brfss20a[samps, ]
brfss20.pc <- PCA(brfss20a,
                  scale.unit = T,
                  graph = F)
brfss20.pc$var
## $coord
##                    Dim.1         Dim.2       Dim.3      Dim.4       Dim.5
## healthdays     0.5346010 -0.4490274041  0.07958710  0.2471372 -0.27680970
## bingedrink     0.4978492  0.5810861774 -0.03098299 -0.0146508  0.13132977
## smoke          0.5486866  0.0257584783 -0.10280339  0.2012490  0.70565038
## drinkdrive     0.2752267  0.6344575614  0.25788462  0.2352849 -0.46460739
## seatbeltdrive -0.3869399 -0.0187160299  0.05574803  0.8889061  0.09034944
## sleep         -0.1649942  0.0007673925  0.92737098 -0.1289180  0.27542481
## exercise      -0.5520079  0.4440355944 -0.24073855 -0.0410815  0.17446296
## 
## $cor
##                    Dim.1         Dim.2       Dim.3      Dim.4       Dim.5
## healthdays     0.5346010 -0.4490274041  0.07958710  0.2471372 -0.27680970
## bingedrink     0.4978492  0.5810861774 -0.03098299 -0.0146508  0.13132977
## smoke          0.5486866  0.0257584783 -0.10280339  0.2012490  0.70565038
## drinkdrive     0.2752267  0.6344575614  0.25788462  0.2352849 -0.46460739
## seatbeltdrive -0.3869399 -0.0187160299  0.05574803  0.8889061  0.09034944
## sleep         -0.1649942  0.0007673925  0.92737098 -0.1289180  0.27542481
## exercise      -0.5520079  0.4440355944 -0.24073855 -0.0410815  0.17446296
## 
## $cos2
##                    Dim.1        Dim.2        Dim.3        Dim.4       Dim.5
## healthdays    0.28579821 2.016256e-01 0.0063341072 0.0610768126 0.076623611
## bingedrink    0.24785387 3.376611e-01 0.0009599457 0.0002146461 0.017247508
## smoke         0.30105697 6.634992e-04 0.0105685363 0.0405011496 0.497942458
## drinkdrive    0.07574973 4.025364e-01 0.0665044785 0.0553589798 0.215860031
## seatbeltdrive 0.14972249 3.502898e-04 0.0031078434 0.7901540838 0.008163022
## sleep         0.02722308 5.888912e-07 0.8600169291 0.0166198511 0.075858828
## exercise      0.30471275 1.971676e-01 0.0579550515 0.0016876900 0.030437323
## 
## $contrib
##                   Dim.1        Dim.2       Dim.3       Dim.4      Dim.5
## healthdays    20.529753 1.768638e+01  0.62997929  6.32518402  8.3093902
## bingedrink    17.804096 2.961927e+01  0.09547453  0.02222899  1.8703932
## smoke         21.625837 5.820142e-02  1.05112825  4.19434501 53.9989975
## drinkdrive     5.441333 3.531005e+01  6.61441982  5.73303876 23.4087797
## seatbeltdrive 10.755021 3.072703e-02  0.30910070 81.82925349  0.8852328
## sleep          1.955517 5.165689e-05 85.53578874  1.72117064  8.2264539
## exercise      21.888442 1.729533e+01  5.76410867  0.17477909  3.3007528
eigenvalues <- brfss20.pc$eig
head(eigenvalues[, 1:2])
##        eigenvalue percentage of variance
## comp 1  1.3921171               19.88739
## comp 2  1.1400051               16.28579
## comp 3  1.0054469               14.36353
## comp 4  0.9656132               13.79447
## comp 5  0.9221328               13.17333
## comp 6  0.7922595               11.31799
fviz_screeplot(brfss20.pc, ncp=10)

fviz_pca_var(brfss20.pc,
            col.var="contrib")+
  theme_minimal()

fviz_pca_ind(brfss20.pc,
             label="none",
             col.ind="cos2" )+
  scale_color_gradient2(low="blue",
                        mid="white", 
                      high="red",
                      midpoint = .5)+
  theme_minimal()

We can see that several of the variables are loading in seperate direction. Unsurprisingly, binge drinking and drinking and driving go in the same direction.

We have some eigenvalues that are greater than 1 which is what we are looking for. There is also another eigenvalue that is quite close to 1. The largest eigenvalue explains 20% of the variance.

desc <- dimdesc(brfss20.pc)
desc$Dim.1
## $quanti
##               correlation       p.value
## smoke           0.5486866  0.000000e+00
## healthdays      0.5346010  0.000000e+00
## bingedrink      0.4978492  0.000000e+00
## drinkdrive      0.2752267 2.779070e-173
## sleep          -0.1649942  5.770773e-62
## seatbeltdrive  -0.3869399  0.000000e+00
## exercise       -0.5520079  0.000000e+00
## 
## attr(,"class")
## [1] "condes" "list"
desc$Dim.2
## $quanti
##            correlation     p.value
## drinkdrive  0.63445756 0.000000000
## bingedrink  0.58108618 0.000000000
## exercise    0.44403559 0.000000000
## smoke       0.02575848 0.009996418
## healthdays -0.44902740 0.000000000
## 
## attr(,"class")
## [1] "condes" "list"

Overall, we can see that some of the variables are relevant and could be used to perform variable reduction. However, some of the variables that I selected were not relevant or loaded in the opposite and would have to removed for future use.