Libraries
library(tidyverse)
## -- Attaching packages ------------------------------------------------ tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.3
## v tidyr 1.1.1 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts --------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(patchwork)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
library(ggsn)
## Loading required package: grid
library(foreign)
library(survey)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
Import data
nhanes_18 <- read.xport("C:/Users/maman/OneDrive/DEM Spring 2021/DEM 7283 Stats 2/DEMO_J.XPT",
NULL)
Fix names
nams<-names(nhanes_18)
head(nams, n=10)
## [1] "SEQN" "SDDSRVYR" "RIDSTATR" "RIAGENDR" "RIDAGEYR" "RIDAGEMN"
## [7] "RIDRETH1" "RIDRETH3" "RIDEXMON" "RIDEXAGM"
newnames<-tolower(gsub(pattern = "_",replacement = "",x = nams))
names(nhanes_18)<-newnames
Recode variables
#sex
nhanes_18$male <- recode(nhanes_18$riagendr, recodes = "2=0; 1=1; else=NA", as.factor=T)
#race/ethnicity
nhanes_18$hispanic <- Recode(nhanes_18$ridreth3, recodes = "1=1; 2=1; else=0")
nhanes_18$black<-Recode(nhanes_18$ridreth3, recodes="4=1; else=0")
nhanes_18$white<-Recode(nhanes_18$ridreth3, recodes="3=1; else=0")
nhanes_18$asian<-Recode(nhanes_18$ridreth3, recodes="6=1; else=0")
nhanes_18$other<-Recode(nhanes_18$ridreth3, recodes="7=1; else=0")
# at or below poverty threshold using income to poverty ratio
nhanes_18$pov<-ifelse(nhanes_18$indfmpir <= 1, "at or below poverty",
ifelse(nhanes_18$indfmpir > 1, "above poverty", NA))
# education
nhanes_18$educ <- Recode(nhanes_18$dmdeduc2, recodes="1:2='< hs'; 3='highschool'; 4='some col'; 5='col grad'; 7:9=NA; else=NA", as.factor=T)
# marital status
nhanes_18$mar <- Recode(nhanes_18$dmdmartl, recodes="1='married'; 2='widowed'; 3='divorced'; 4='separated'; 5='nm'; 6='cohab'; else=NA", as.factor=T)
Simple table counts and percentages
table(nhanes_18$pov, nhanes_18$educ)
##
## < hs col grad highschool some col
## above poverty 576 1099 869 1326
## at or below poverty 319 64 287 234
100*prop.table(table(nhanes_18$pov, nhanes_18$educ), margin=2)
##
## < hs col grad highschool some col
## above poverty 64.357542 94.496991 75.173010 85.000000
## at or below poverty 35.642458 5.503009 24.826990 15.000000
chisq.test(table(nhanes_18$pov, nhanes_18$educ))
##
## Pearson's Chi-squared test
##
## data: table(nhanes_18$pov, nhanes_18$educ)
## X-squared = 341.33, df = 3, p-value < 2.2e-16
Create survey design object
options(survey.lonely.psu = "adjust")
dog<-svydesign(ids=~sdmvpsu, strata=~sdmvstra, weights=~wtint2yr, nest=TRUE, data = nhanes_18 )
Making my table one
library(tableone)
# without survey design
t1<-CreateTableOne(vars = c("educ", "mar"), strata = "pov", test = T, data = nhanes_18)
#t1<-print(t1, format="p")
print(t1,format="p")
## Stratified by pov
## above poverty at or below poverty p test
## n 6140 1883
## educ (%) <0.001
## < hs 14.9 35.3
## col grad 28.4 7.1
## highschool 22.5 31.7
## some col 34.3 25.9
## mar (%) <0.001
## cohab 8.2 14.3
## divorced 11.2 15.2
## married 54.1 31.2
## nm 15.7 24.6
## separated 2.7 6.5
## widowed 8.2 8.2
#using survey design
st1<-svyCreateTableOne(vars = c("educ", "mar"), strata = "pov", test = T, data = dog)
#st1<-print(st1, format="p")
print(st1, format="p")
## Stratified by pov
## above poverty at or below poverty p test
## n 238359161.5 45604858.6
## educ (%) <0.001
## < hs 8.1 25.9
## col grad 34.7 6.5
## highschool 25.4 40.9
## some col 31.9 26.7
## mar (%) <0.001
## cohab 8.4 16.5
## divorced 10.2 14.3
## married 56.7 30.2
## nm 17.1 27.0
## separated 2.0 6.2
## widowed 5.7 5.8
# The binary outcome variable used was poverty status. This variable was recoded from the variable indfmpir, or income to poverty ratio, given in the 2017-18 NHANES wave. Any values of indfmpir greater than 1 were recoded as "above poverty", while values less than or equal to 1 were recoded as "at or below poverty".
# Do factors like sex, race/ethnicity, educational attainment, and marital status influence the likelihood of individuals being below the poverty threshold?
# Educational attainment (educ) was defined as the maximum education level reached, categories included less than high school (< hs), high school diploma (highschool), attended college (some col), and college graduate (col grad). Marital status (mar) was defined as married, divorced, separated, never married (nm), widowed, and living together (cohab).
# Descriptive statistics
# unweighted tableone
print(t1,format="p")
## Stratified by pov
## above poverty at or below poverty p test
## n 6140 1883
## educ (%) <0.001
## < hs 14.9 35.3
## col grad 28.4 7.1
## highschool 22.5 31.7
## some col 34.3 25.9
## mar (%) <0.001
## cohab 8.2 14.3
## divorced 11.2 15.2
## married 54.1 31.2
## nm 15.7 24.6
## separated 2.7 6.5
## widowed 8.2 8.2
# weighted tableone
print(st1, format="p")
## Stratified by pov
## above poverty at or below poverty p test
## n 238359161.5 45604858.6
## educ (%) <0.001
## < hs 8.1 25.9
## col grad 34.7 6.5
## highschool 25.4 40.9
## some col 31.9 26.7
## mar (%) <0.001
## cohab 8.4 16.5
## divorced 10.2 14.3
## married 56.7 30.2
## nm 17.1 27.0
## separated 2.0 6.2
## widowed 5.7 5.8
# It's clear that discrepencies are present when comparing unweighted to weighted descriptive analyses. For example, the percentage of individuals at or below the poverty line with less than a high school education shifted by 10 percentage points! This illustrates the importance of using appropriate survey design analysis when working with data sets that utilize complex survey design.