library(car)
## Loading required package: carData
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(ggplot2)
library(pander)
## Warning: package 'pander' was built under R version 4.0.4
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(knitr)


load("C:/Users/Steve/DEM7283/rsconnect/documents/homework7/data.RData")


summary(data)
##      STRATA           PSU           SAMPWEIGHT           HEALTH     
##  Min.   :100.0   Min.   :  1.00   Min.   :   652.1   Min.   :1.000  
##  1st Qu.:111.0   1st Qu.:  8.00   1st Qu.:  4074.0   1st Qu.:1.000  
##  Median :125.0   Median : 23.00   Median :  6493.9   Median :2.000  
##  Mean   :125.4   Mean   : 31.13   Mean   :  7867.2   Mean   :2.207  
##  3rd Qu.:139.0   3rd Qu.: 47.00   3rd Qu.:  9646.1   3rd Qu.:3.000  
##  Max.   :151.0   Max.   :153.00   Max.   :121704.8   Max.   :9.000  
##     ASTHMAEV       CHEARTDIEV       DIABETICEV       STROKEV      
##  Min.   :1.000   Min.   :0.0000   Min.   :1.000   Min.   :0.0000  
##  1st Qu.:1.000   1st Qu.:1.0000   1st Qu.:1.000   1st Qu.:1.0000  
##  Median :1.000   Median :1.0000   Median :1.000   Median :1.0000  
##  Mean   :1.138   Mean   :0.8399   Mean   :1.092   Mean   :0.8143  
##  3rd Qu.:1.000   3rd Qu.:1.0000   3rd Qu.:1.000   3rd Qu.:1.0000  
##  Max.   :9.000   Max.   :9.0000   Max.   :9.000   Max.   :9.0000  
##   SMOKFREQNOW    
##  Min.   :0.0000  
##  1st Qu.:0.0000  
##  Median :0.0000  
##  Mean   :0.4851  
##  3rd Qu.:1.0000  
##  Max.   :8.0000
#recode variables

data$ast<-Recode(data$ASTHMAEV,
              recodes="2=1; 1=0; else=NA")
#1 is good health while 0 is bad health
data$hlth<-Recode(data$HEALTH,
                    recodes="1=1; 2=1; 3=1; 4=0; 5=0; else=NA")
data$cohrt<-Recode(data$CHEARTDIEV,
                   recodes= "1=1; 2=0; else=NA")

data$dia<-Recode(data$DIABETICEV,
                 recodes= "1=1; 2=0; 3=0; else=NA")

data$strke<-Recode(data$STROKEV,
                   recodes= "1=1; 2=0; else=NA")

data$smke<-Recode(data$SMOKFREQNOW,
                   recodes= "1=1; 2=0; 3=0; else=NA")


data <- na.omit(data)
data1<-data%>%
  filter(complete.cases(SAMPWEIGHT, STRATA, HEALTH, ASTHMAEV,CHEARTDIEV,DIABETICEV, STROKEV, SMOKFREQNOW, ast, hlth, cohrt, dia, strke, smke))%>%
  select(SAMPWEIGHT, STRATA, HEALTH, ASTHMAEV,CHEARTDIEV,DIABETICEV, STROKEV, SMOKFREQNOW, ast, hlth, cohrt, dia, strke, smke)%>%
  mutate_at(vars(ast, hlth, cohrt, dia, strke, smke), scale)

data.pc<-princomp(~hlth+ast+cohrt+dia+strke+smke,
                   data=data1,
                   scores=T)

#Screeplot
screeplot(data.pc,
          type = "l",
          main = "Scree Plot")
abline(h=1)

summary(data.pc)
## Importance of components:
##                          Comp.1    Comp.2    Comp.3    Comp.4    Comp.5
## Standard deviation     1.254056 1.0455280 0.9839541 0.9347115 0.8864570
## Proportion of Variance 0.262131 0.1822030 0.1613742 0.1456262 0.1309784
## Cumulative Proportion  0.262131 0.4443341 0.6057082 0.7513344 0.8823128
##                           Comp.6
## Standard deviation     0.8402770
## Proportion of Variance 0.1176872
## Cumulative Proportion  1.0000000
loadings(data.pc )
## 
## Loadings:
##       Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## hlth   0.540  0.308         0.122         0.769
## ast   -0.168 -0.530  0.743  0.308         0.209
## cohrt  0.510 -0.233                0.771 -0.302
## dia    0.479 -0.144 -0.150  0.609 -0.491 -0.339
## strke  0.432         0.406 -0.675 -0.388 -0.202
## smke          0.739  0.504  0.252  0.114 -0.344
## 
##                Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## SS loadings     1.000  1.000  1.000  1.000  1.000  1.000
## Proportion Var  0.167  0.167  0.167  0.167  0.167  0.167
## Cumulative Var  0.167  0.333  0.500  0.667  0.833  1.000
#There were no eigenvalues that were all positive or all negative

scores<-data.frame(data.pc$scores)
hist(scores$Comp.1)

hist(scores$Comp.2)

cor(scores[,1:2])
##               Comp.1        Comp.2
## Comp.1  1.000000e+00 -1.868843e-14
## Comp.2 -1.868843e-14  1.000000e+00
data2<-cbind(data1, scores)
names(data2)
##  [1] "SAMPWEIGHT"  "STRATA"      "HEALTH"      "ASTHMAEV"    "CHEARTDIEV" 
##  [6] "DIABETICEV"  "STROKEV"     "SMOKFREQNOW" "ast"         "hlth"       
## [11] "cohrt"       "dia"         "strke"       "smke"        "Comp.1"     
## [16] "Comp.2"      "Comp.3"      "Comp.4"      "Comp.5"      "Comp.6"
round(cor(data2[,c(-1:-8, -15:-20)]), 3)
##          ast   hlth  cohrt    dia  strke   smke
## ast    1.000 -0.121 -0.020 -0.038 -0.010 -0.031
## hlth  -0.121  1.000  0.214  0.215  0.190  0.072
## cohrt -0.020  0.214  1.000  0.194  0.173 -0.086
## dia   -0.038  0.215  0.194  1.000  0.112 -0.068
## strke -0.010  0.190  0.173  0.112  1.000 -0.016
## smke  -0.031  0.072 -0.086 -0.068 -0.016  1.000
round(cor(data2[,c(-1:-8, -17:-20)]),3)
##           ast   hlth  cohrt    dia  strke   smke Comp.1 Comp.2
## ast     1.000 -0.121 -0.020 -0.038 -0.010 -0.031 -0.211 -0.554
## hlth   -0.121  1.000  0.214  0.215  0.190  0.072  0.678  0.323
## cohrt  -0.020  0.214  1.000  0.194  0.173 -0.086  0.639 -0.244
## dia    -0.038  0.215  0.194  1.000  0.112 -0.068  0.600 -0.150
## strke  -0.010  0.190  0.173  0.112  1.000 -0.016  0.542 -0.044
## smke   -0.031  0.072 -0.086 -0.068 -0.016  1.000 -0.084  0.773
## Comp.1 -0.211  0.678  0.639  0.600  0.542 -0.084  1.000  0.000
## Comp.2 -0.554  0.323 -0.244 -0.150 -0.044  0.773  0.000  1.000
#The results show negative correlation more than likely due to health beahavior coded as 0
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1,
               strata=~STRATA, 
               weights=~SAMPWEIGHT,
               data = data2)


library(ggplot2)
ggplot(aes(x=hlth, y=Comp.1),
       data=data2)+
  geom_boxplot()
## Warning: Continuous x aesthetic -- did you forget aes(group=...)?

ggplot(aes(x=HEALTH, y=Comp.1),
       data=data2)+
  geom_boxplot()
## Don't know how to automatically pick scale for object of type haven_labelled/vctrs_vctr/integer. Defaulting to continuous.
## Warning: Continuous x aesthetic -- did you forget aes(group=...)?

fit.1<-svyglm(Comp.1~hlth+smke+cohrt,
              des,
              family=gaussian)
summary(fit.1)
## 
## Call:
## svyglm(formula = Comp.1 ~ hlth + smke + cohrt, design = des, 
##     family = gaussian)
## 
## Survey design:
## svydesign(ids = ~1, strata = ~STRATA, weights = ~SAMPWEIGHT, 
##     data = data2)
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.014545   0.007067   2.058   0.0396 *  
## hlth         0.716224   0.009434  75.920   <2e-16 ***
## smke        -0.105647   0.006564 -16.095   <2e-16 ***
## cohrt        0.645263   0.011439  56.410   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.4041208)
## 
## Number of Fisher Scoring iterations: 2