Libraries

library(tidyverse)
## -- Attaching packages ------------------------------------------------ tidyverse 1.3.0 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.0.3     v dplyr   1.0.3
## v tidyr   1.1.1     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts --------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(patchwork)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
library(ggsn)
## Loading required package: grid
library(foreign)
library(survey)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart

Import data

nhanes_18 <- read.xport("C:/Users/maman/OneDrive/DEM Spring 2021/DEM 7283 Stats 2/DEMO_J.XPT", 
    NULL)

Fix names

nams<-names(nhanes_18)
head(nams, n=10)
##  [1] "SEQN"     "SDDSRVYR" "RIDSTATR" "RIAGENDR" "RIDAGEYR" "RIDAGEMN"
##  [7] "RIDRETH1" "RIDRETH3" "RIDEXMON" "RIDEXAGM"
newnames<-tolower(gsub(pattern = "_",replacement =  "",x =  nams))
names(nhanes_18)<-newnames

Recode variables

#sex
nhanes_18$male <- recode(nhanes_18$riagendr, recodes = "2=0; 1=1; else=NA", as.factor=T)

#race/ethnicity
nhanes_18$hispanic <- Recode(nhanes_18$ridreth3, recodes = "1=1; 2=1; else=0")
nhanes_18$black<-Recode(nhanes_18$ridreth3, recodes="4=1; else=0")
nhanes_18$white<-Recode(nhanes_18$ridreth3, recodes="3=1; else=0")
nhanes_18$asian<-Recode(nhanes_18$ridreth3, recodes="6=1; else=0")
nhanes_18$other<-Recode(nhanes_18$ridreth3, recodes="7=1; else=0")

# at or below poverty threshold using income to poverty ratio
nhanes_18$pov<-ifelse(nhanes_18$indfmpir <= 1, "at or below poverty", 
                      ifelse(nhanes_18$indfmpir > 1, "above poverty", NA))

# education
nhanes_18$educ <- Recode(nhanes_18$dmdeduc2, recodes="1:2='< hs'; 3='highschool'; 4='some col'; 5='col grad'; 7:9=NA; else=NA", as.factor=T)

# marital status
nhanes_18$mar <- Recode(nhanes_18$dmdmartl, recodes="1='married'; 2='widowed'; 3='divorced'; 4='separated'; 5='nm'; 6='cohab'; else=NA", as.factor=T)

Simple table counts and percentages

table(nhanes_18$pov, nhanes_18$educ)
##                      
##                       < hs col grad highschool some col
##   above poverty        576     1099        869     1326
##   at or below poverty  319       64        287      234
100*prop.table(table(nhanes_18$pov, nhanes_18$educ), margin=2)
##                      
##                            < hs  col grad highschool  some col
##   above poverty       64.357542 94.496991  75.173010 85.000000
##   at or below poverty 35.642458  5.503009  24.826990 15.000000
chisq.test(table(nhanes_18$pov, nhanes_18$educ))
## 
##  Pearson's Chi-squared test
## 
## data:  table(nhanes_18$pov, nhanes_18$educ)
## X-squared = 341.33, df = 3, p-value < 2.2e-16

Create survey design object

options(survey.lonely.psu = "adjust")

dog<-svydesign(ids=~sdmvpsu, strata=~sdmvstra, weights=~wtint2yr, nest=TRUE, data = nhanes_18 )

Making my table one

library(tableone)

# without survey design
t1<-CreateTableOne(vars = c("educ", "mar"), strata = "pov", test = T, data = nhanes_18)
#t1<-print(t1, format="p")
print(t1,format="p")
##                Stratified by pov
##                 above poverty at or below poverty p      test
##   n             6140          1883                           
##   educ (%)                                        <0.001     
##      < hs       14.9          35.3                           
##      col grad   28.4           7.1                           
##      highschool 22.5          31.7                           
##      some col   34.3          25.9                           
##   mar (%)                                         <0.001     
##      cohab       8.2          14.3                           
##      divorced   11.2          15.2                           
##      married    54.1          31.2                           
##      nm         15.7          24.6                           
##      separated   2.7           6.5                           
##      widowed     8.2           8.2
#using survey design
st1<-svyCreateTableOne(vars = c("educ", "mar"), strata = "pov", test = T, data = dog)
#st1<-print(st1, format="p")
print(st1, format="p")
##                Stratified by pov
##                 above poverty at or below poverty p      test
##   n             238359161.5   45604858.6                     
##   educ (%)                                        <0.001     
##      < hs               8.1         25.9                     
##      col grad          34.7          6.5                     
##      highschool        25.4         40.9                     
##      some col          31.9         26.7                     
##   mar (%)                                         <0.001     
##      cohab              8.4         16.5                     
##      divorced          10.2         14.3                     
##      married           56.7         30.2                     
##      nm                17.1         27.0                     
##      separated          2.0          6.2                     
##      widowed            5.7          5.8

HW 2 Answers

# The binary outcome variable used was poverty status. This variable was recoded from the variable indfmpir, or income to poverty ratio, given in the 2017-18 NHANES wave. Any values of indfmpir greater than 1 were recoded as "above poverty", while values less than or equal to 1 were recoded as "at or below poverty". 
# Do factors like sex, race/ethnicity, educational attainment, and marital status influence the likelihood of individuals being below the poverty threshold?
# Educational attainment (educ) was defined as the maximum education level reached, categories included less than high school (< hs), high school diploma (highschool), attended college (some col), and college graduate (col grad). Marital status (mar) was defined as married, divorced, separated, never married (nm), widowed, and living together (cohab). 
# Descriptive statistics

# unweighted tableone 
print(t1,format="p")
##                Stratified by pov
##                 above poverty at or below poverty p      test
##   n             6140          1883                           
##   educ (%)                                        <0.001     
##      < hs       14.9          35.3                           
##      col grad   28.4           7.1                           
##      highschool 22.5          31.7                           
##      some col   34.3          25.9                           
##   mar (%)                                         <0.001     
##      cohab       8.2          14.3                           
##      divorced   11.2          15.2                           
##      married    54.1          31.2                           
##      nm         15.7          24.6                           
##      separated   2.7           6.5                           
##      widowed     8.2           8.2
# weighted tableone
print(st1, format="p")
##                Stratified by pov
##                 above poverty at or below poverty p      test
##   n             238359161.5   45604858.6                     
##   educ (%)                                        <0.001     
##      < hs               8.1         25.9                     
##      col grad          34.7          6.5                     
##      highschool        25.4         40.9                     
##      some col          31.9         26.7                     
##   mar (%)                                         <0.001     
##      cohab              8.4         16.5                     
##      divorced          10.2         14.3                     
##      married           56.7         30.2                     
##      nm                17.1         27.0                     
##      separated          2.0          6.2                     
##      widowed            5.7          5.8
# It's clear that discrepencies are present when comparing unweighted to weighted descriptive analyses. For example, the percentage of individuals at or below the poverty line with less than a high school education shifted by 10 percentage points! This illustrates the importance of using appropriate survey design analysis when working with data sets that utilize complex survey design.