Load Libraries, Import NHIS Data
library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
library(haven)
library(dummies)
## dummies-1.5.6 provided by Decision Patterns
library(survey)
## Loading required package: grid
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
##
## expand
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(data.table)
## -------------------------------------------------------------------------
## data.table + dplyr code now lives in dtplyr.
## Please library(dtplyr)!
## -------------------------------------------------------------------------
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
NHIS<-read_dta("NHIS_v2.dta")
NHIS.df<- as.data.frame(NHIS)
NHIS.dt<- as.data.table(NHIS)
Variables
NHIS.df$racenew<-as.factor(NHIS.df$racenew)
NHIS.df$racenew<-as.character(NHIS.df$racenew)
#height
NHIS.df$height<-as.factor(NHIS.df$height)
NHIS.df$height<-as.integer(NHIS.df$height)
#sex
NHIS.df$sex<-as.factor(NHIS.df$sex)
NHIS.df$sex<-as.integer(NHIS.df$sex)
Recode: Race
NHIS.df$racenew[NHIS.df$racenew=="10"] <- "White"
NHIS.df$racenew[NHIS.df$racenew=="20"] <- "Black/African American"
NHIS.df$racenew[NHIS.df$racenew=="30"] <- "American Indian/Alaskan Native"
NHIS.df$racenew[NHIS.df$racenew=="40"] <- "Asian"
NHIS.df$racenew[NHIS.df$racenew=="50"] <- "Multiple Race"
NHIS.df$racenew[NHIS.df$racenew=="60"] <- "Other Race"
NHIS.df$racenew[NHIS.df$racenew=="61"] <- "Race Group Not Releasable"
NHIS.df$racenew[NHIS.df$racenew=="97"] <- NA
NHIS.df$racenew[NHIS.df$racenew=="98"] <- NA
NHIS.df$racenew[NHIS.df$racenew=="99"] <- NA
Table: Race
table_race <- table(NHIS.df$racenew)
Recode: Height
NHIS.df$height[NHIS.df$height==1] <- 59
NHIS.df$height[NHIS.df$height==2] <- 60
NHIS.df$height[NHIS.df$height==3] <- 61
NHIS.df$height[NHIS.df$height==4] <- 62
NHIS.df$height[NHIS.df$height==5] <- 63
NHIS.df$height[NHIS.df$height==6] <- 64
NHIS.df$height[NHIS.df$height==7] <- 65
NHIS.df$height[NHIS.df$height==8] <- 66
NHIS.df$height[NHIS.df$height==9] <- 67
NHIS.df$height[NHIS.df$height==10] <- 68
NHIS.df$height[NHIS.df$height==11] <- 69
NHIS.df$height[NHIS.df$height==12] <- 70
NHIS.df$height[NHIS.df$height==13] <- 71
NHIS.df$height[NHIS.df$height==14] <- 72
NHIS.df$height[NHIS.df$height==15] <- 73
NHIS.df$height[NHIS.df$height==16] <- 74
NHIS.df$height[NHIS.df$height==17] <- 75
NHIS.df$height[NHIS.df$height==18] <- 76
NHIS.df$height[NHIS.df$height==19] <- NA
NHIS.df$height[NHIS.df$height==20] <- NA
NHIS.df$height[NHIS.df$height==21] <- NA
NHIS.df$height[NHIS.df$height==22] <- NA
Table: Height
table_height <- table(NHIS.df$height)
Table: Age
table_age <- table(NHIS.df$age)
Table: Sex
table_sex <- table(NHIS.df$sex)
Recode: Sex
NHIS.df$sex[NHIS.df$sex==1] <- "Male"
NHIS.df$sex[NHIS.df$sex==2] <- "Female"
Dummy Variables Black, Asian, Female
NHIS.df$Black <- as.numeric(NHIS.df$racenew == "Black/African American")
NHIS.df1 <- cbind(NHIS.df, dummy(NHIS.df$racenew, sep = "_"))
NHIS.df2 <- cbind(NHIS.df1, dummy(NHIS.df$sex, sep = "_"))
Table: Black
table(NHIS.df2$`NHIS.df_Black/African American`)
## < table of extent 0 >
Table: Asian
table(NHIS.df2$NHIS.df_Asian)
## < table of extent 0 >
Table: Female
table(NHIS.df2$NHIS.df1_Female)
## < table of extent 0 >
Table: Male
table(NHIS.df2$NHIS.df1_Male)
## < table of extent 0 >
Regression, No Weights
colnames(NHIS.df2)[312]="NHIS.df_Asian"
colnames(NHIS.df2)[313]="NHIS.df_Black"
colnames(NHIS.df2)[320]="NHIS.df1_Female"
mlm_NOWEIGHTS <- lm(height ~ age + NHIS.df_Black + NHIS.df_Asian + NHIS.df1_Female, data=NHIS.df2)
mlm_NOWEIGHTS
##
## Call:
## lm(formula = height ~ age + NHIS.df_Black + NHIS.df_Asian + NHIS.df1_Female,
## data = NHIS.df2)
##
## Coefficients:
## (Intercept) age NHIS.df_Black NHIS.df_Asian
## 70.59734 -0.01689 0.18422 -1.73427
## NHIS.df1_Female
## -5.63264
summary(mlm_NOWEIGHTS)
##
## Call:
## lm(formula = height ~ age + NHIS.df_Black + NHIS.df_Asian + NHIS.df1_Female,
## data = NHIS.df2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.4775 -1.8880 -0.0062 1.8924 8.4036
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 70.5973408 0.0106047 6657.15 <2e-16 ***
## age -0.0168893 0.0001929 -87.55 <2e-16 ***
## NHIS.df_Black 0.1842159 0.0104233 17.67 <2e-16 ***
## NHIS.df_Asian -1.7342691 0.0178229 -97.31 <2e-16 ***
## NHIS.df1_Female -5.6326388 0.0069915 -805.64 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.625 on 573881 degrees of freedom
## (45651 observations deleted due to missingness)
## Multiple R-squared: 0.5392, Adjusted R-squared: 0.5392
## F-statistic: 1.679e+05 on 4 and 573881 DF, p-value: < 2.2e-16
Mutate year_strata
NHIS.df3 <- NHIS.df2 %>%
mutate(year_strata = year+(strata/10000))
summary(NHIS.df3$year_strata)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1998 2002 2007 2007 2013 2017
Setting up Survey Design
ex_strata_weights <- svydesign(id = ~psu, data=NHIS.df3,strata = ~year_strata,weights = ~sampweight, nest = TRUE)
ex_strata_weights
## Stratified 1 - level Cluster Sampling design (with replacement)
## With (12757) clusters.
## svydesign(id = ~psu, data = NHIS.df3, strata = ~year_strata,
## weights = ~sampweight, nest = TRUE)
Regression, Weights
mlm_WEIGHTS_CSD <- svyglm(formula = height ~ age + NHIS.df_Black + NHIS.df_Asian + NHIS.df1_Female, design=ex_strata_weights)
mlm_WEIGHTS_CSD
## Stratified 1 - level Cluster Sampling design (with replacement)
## With (12757) clusters.
## svydesign(id = ~psu, data = NHIS.df3, strata = ~year_strata,
## weights = ~sampweight, nest = TRUE)
##
## Call: svyglm(formula = height ~ age + NHIS.df_Black + NHIS.df_Asian +
## NHIS.df1_Female, design = ex_strata_weights)
##
## Coefficients:
## (Intercept) age NHIS.df_Black NHIS.df_Asian
## 70.73150 -0.01697 0.04149 -1.86301
## NHIS.df1_Female
## -5.66932
##
## Degrees of Freedom: 573885 Total (i.e. Null); 6650 Residual
## (45651 observations deleted due to missingness)
## Null Deviance: 8737000
## Residual Deviance: 3958000 AIC: 2879000
summary(mlm_WEIGHTS_CSD)
##
## Call:
## svyglm(formula = height ~ age + NHIS.df_Black + NHIS.df_Asian +
## NHIS.df1_Female, design = ex_strata_weights)
##
## Survey design:
## svydesign(id = ~psu, data = NHIS.df3, strata = ~year_strata,
## weights = ~sampweight, nest = TRUE)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 70.731497 0.014544 4863.299 < 2e-16 ***
## age -0.016965 0.000247 -68.686 < 2e-16 ***
## NHIS.df_Black 0.041489 0.014242 2.913 0.00359 **
## NHIS.df_Asian -1.863009 0.020592 -90.472 < 2e-16 ***
## NHIS.df1_Female -5.669318 0.008855 -640.268 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 6.896197)
##
## Number of Fisher Scoring iterations: 2