Load Libraries, Import NHIS Data

library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats
library(haven)
library(dummies)
## dummies-1.5.6 provided by Decision Patterns
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
## 
##     expand
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(data.table)
## -------------------------------------------------------------------------
## data.table + dplyr code now lives in dtplyr.
## Please library(dtplyr)!
## -------------------------------------------------------------------------
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## The following object is masked from 'package:purrr':
## 
##     transpose
NHIS<-read_dta("NHIS_v2.dta")

NHIS.df<- as.data.frame(NHIS)
NHIS.dt<- as.data.table(NHIS)

Variables

NHIS.df$racenew<-as.factor(NHIS.df$racenew)
NHIS.df$racenew<-as.character(NHIS.df$racenew)
#height
NHIS.df$height<-as.factor(NHIS.df$height)
NHIS.df$height<-as.integer(NHIS.df$height)
#sex
NHIS.df$sex<-as.factor(NHIS.df$sex)
NHIS.df$sex<-as.integer(NHIS.df$sex)

Recode: Race

NHIS.df$racenew[NHIS.df$racenew=="10"] <- "White"
NHIS.df$racenew[NHIS.df$racenew=="20"] <- "Black/African American"
NHIS.df$racenew[NHIS.df$racenew=="30"] <- "American Indian/Alaskan Native"
NHIS.df$racenew[NHIS.df$racenew=="40"] <- "Asian"
NHIS.df$racenew[NHIS.df$racenew=="50"] <- "Multiple Race"
NHIS.df$racenew[NHIS.df$racenew=="60"] <- "Other Race"
NHIS.df$racenew[NHIS.df$racenew=="61"] <- "Race Group Not Releasable"
NHIS.df$racenew[NHIS.df$racenew=="97"] <- NA
NHIS.df$racenew[NHIS.df$racenew=="98"] <- NA
NHIS.df$racenew[NHIS.df$racenew=="99"] <- NA

Table: Race

table_race <- table(NHIS.df$racenew)

Recode: Height

NHIS.df$height[NHIS.df$height==1] <- 59
NHIS.df$height[NHIS.df$height==2] <- 60
NHIS.df$height[NHIS.df$height==3] <- 61
NHIS.df$height[NHIS.df$height==4] <- 62
NHIS.df$height[NHIS.df$height==5] <- 63
NHIS.df$height[NHIS.df$height==6] <- 64
NHIS.df$height[NHIS.df$height==7] <- 65
NHIS.df$height[NHIS.df$height==8] <- 66
NHIS.df$height[NHIS.df$height==9] <- 67
NHIS.df$height[NHIS.df$height==10] <- 68
NHIS.df$height[NHIS.df$height==11] <- 69
NHIS.df$height[NHIS.df$height==12] <- 70
NHIS.df$height[NHIS.df$height==13] <- 71
NHIS.df$height[NHIS.df$height==14] <- 72
NHIS.df$height[NHIS.df$height==15] <- 73
NHIS.df$height[NHIS.df$height==16] <- 74
NHIS.df$height[NHIS.df$height==17] <- 75
NHIS.df$height[NHIS.df$height==18] <- 76
NHIS.df$height[NHIS.df$height==19] <- NA
NHIS.df$height[NHIS.df$height==20] <- NA
NHIS.df$height[NHIS.df$height==21] <- NA
NHIS.df$height[NHIS.df$height==22] <- NA

Table: Height

table_height <- table(NHIS.df$height)

Table: Age

table_age <- table(NHIS.df$age)

Table: Sex

table_sex <- table(NHIS.df$sex)

Recode: Sex

NHIS.df$sex[NHIS.df$sex==1] <- "Male"
NHIS.df$sex[NHIS.df$sex==2] <- "Female"

Dummy Variables Black, Asian, Female

NHIS.df$Black <- as.numeric(NHIS.df$racenew == "Black/African American")

NHIS.df1 <- cbind(NHIS.df, dummy(NHIS.df$racenew, sep = "_"))

NHIS.df2 <- cbind(NHIS.df1, dummy(NHIS.df$sex, sep = "_"))

Table: Black

table(NHIS.df2$`NHIS.df_Black/African American`)
## < table of extent 0 >

Table: Asian

table(NHIS.df2$NHIS.df_Asian)
## < table of extent 0 >

Table: Female

table(NHIS.df2$NHIS.df1_Female)
## < table of extent 0 >

Table: Male

table(NHIS.df2$NHIS.df1_Male)
## < table of extent 0 >

Regression, No Weights

colnames(NHIS.df2)[312]="NHIS.df_Asian"
colnames(NHIS.df2)[313]="NHIS.df_Black"
colnames(NHIS.df2)[320]="NHIS.df1_Female"
mlm_NOWEIGHTS <- lm(height ~ age + NHIS.df_Black + NHIS.df_Asian + NHIS.df1_Female, data=NHIS.df2)
mlm_NOWEIGHTS
## 
## Call:
## lm(formula = height ~ age + NHIS.df_Black + NHIS.df_Asian + NHIS.df1_Female, 
##     data = NHIS.df2)
## 
## Coefficients:
##     (Intercept)              age    NHIS.df_Black    NHIS.df_Asian  
##        70.59734         -0.01689          0.18422         -1.73427  
## NHIS.df1_Female  
##        -5.63264
summary(mlm_NOWEIGHTS)
## 
## Call:
## lm(formula = height ~ age + NHIS.df_Black + NHIS.df_Asian + NHIS.df1_Female, 
##     data = NHIS.df2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.4775 -1.8880 -0.0062  1.8924  8.4036 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     70.5973408  0.0106047 6657.15   <2e-16 ***
## age             -0.0168893  0.0001929  -87.55   <2e-16 ***
## NHIS.df_Black    0.1842159  0.0104233   17.67   <2e-16 ***
## NHIS.df_Asian   -1.7342691  0.0178229  -97.31   <2e-16 ***
## NHIS.df1_Female -5.6326388  0.0069915 -805.64   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.625 on 573881 degrees of freedom
##   (45651 observations deleted due to missingness)
## Multiple R-squared:  0.5392, Adjusted R-squared:  0.5392 
## F-statistic: 1.679e+05 on 4 and 573881 DF,  p-value: < 2.2e-16

Mutate year_strata

NHIS.df3 <- NHIS.df2 %>% 
  mutate(year_strata = year+(strata/10000))
           
summary(NHIS.df3$year_strata)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1998    2002    2007    2007    2013    2017

Setting up Survey Design

ex_strata_weights <- svydesign(id = ~psu, data=NHIS.df3,strata = ~year_strata,weights = ~sampweight, nest = TRUE)
ex_strata_weights
## Stratified 1 - level Cluster Sampling design (with replacement)
## With (12757) clusters.
## svydesign(id = ~psu, data = NHIS.df3, strata = ~year_strata, 
##     weights = ~sampweight, nest = TRUE)

Regression, Weights

mlm_WEIGHTS_CSD <- svyglm(formula = height ~ age + NHIS.df_Black + NHIS.df_Asian + NHIS.df1_Female, design=ex_strata_weights)
mlm_WEIGHTS_CSD
## Stratified 1 - level Cluster Sampling design (with replacement)
## With (12757) clusters.
## svydesign(id = ~psu, data = NHIS.df3, strata = ~year_strata, 
##     weights = ~sampweight, nest = TRUE)
## 
## Call:  svyglm(formula = height ~ age + NHIS.df_Black + NHIS.df_Asian + 
##     NHIS.df1_Female, design = ex_strata_weights)
## 
## Coefficients:
##     (Intercept)              age    NHIS.df_Black    NHIS.df_Asian  
##        70.73150         -0.01697          0.04149         -1.86301  
## NHIS.df1_Female  
##        -5.66932  
## 
## Degrees of Freedom: 573885 Total (i.e. Null);  6650 Residual
##   (45651 observations deleted due to missingness)
## Null Deviance:       8737000 
## Residual Deviance: 3958000   AIC: 2879000
summary(mlm_WEIGHTS_CSD)
## 
## Call:
## svyglm(formula = height ~ age + NHIS.df_Black + NHIS.df_Asian + 
##     NHIS.df1_Female, design = ex_strata_weights)
## 
## Survey design:
## svydesign(id = ~psu, data = NHIS.df3, strata = ~year_strata, 
##     weights = ~sampweight, nest = TRUE)
## 
## Coefficients:
##                  Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)     70.731497   0.014544 4863.299  < 2e-16 ***
## age             -0.016965   0.000247  -68.686  < 2e-16 ***
## NHIS.df_Black    0.041489   0.014242    2.913  0.00359 ** 
## NHIS.df_Asian   -1.863009   0.020592  -90.472  < 2e-16 ***
## NHIS.df1_Female -5.669318   0.008855 -640.268  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 6.896197)
## 
## Number of Fisher Scoring iterations: 2