DEM 7283 HW2

Libraries

library(tidyverse)

## -- Attaching packages ------------------------------------------------ tidyverse 1.3.0 --

## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.0.3     v dplyr   1.0.3
## v tidyr   1.1.1     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0

## -- Conflicts --------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(patchwork)
library(car)

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

## The following object is masked from 'package:purrr':
## 
##     some

library(ggsn)

## Loading required package: grid

library(foreign)
library(survey)

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

## Loading required package: survival

## 
## Attaching package: 'survey'

## The following object is masked from 'package:graphics':
## 
##     dotchart

Import data

nhanes_18 <- read.xport("C:/Users/maman/OneDrive/DEM Spring 2021/DEM 7283 Stats 2/DEMO_J.XPT", 
    NULL)

Fix names

nams<-names(nhanes_18)
head(nams, n=10)

##  [1] "SEQN"     "SDDSRVYR" "RIDSTATR" "RIAGENDR" "RIDAGEYR" "RIDAGEMN"
##  [7] "RIDRETH1" "RIDRETH3" "RIDEXMON" "RIDEXAGM"

newnames<-tolower(gsub(pattern = "_",replacement =  "",x =  nams))
names(nhanes_18)<-newnames

Recode variables

#sex
nhanes_18$male <- recode(nhanes_18$riagendr, recodes = "2=0; 1=1; else=NA", as.factor=T)

#race/ethnicity
nhanes_18$hispanic <- Recode(nhanes_18$ridreth3, recodes = "1=1; 2=1; else=0")
nhanes_18$black<-Recode(nhanes_18$ridreth3, recodes="4=1; else=0")
nhanes_18$white<-Recode(nhanes_18$ridreth3, recodes="3=1; else=0")
nhanes_18$asian<-Recode(nhanes_18$ridreth3, recodes="6=1; else=0")
nhanes_18$other<-Recode(nhanes_18$ridreth3, recodes="7=1; else=0")

# at or below poverty threshold using income to poverty ratio
nhanes_18$pov<-ifelse(nhanes_18$indfmpir <= 1, "at or below poverty", 
                      ifelse(nhanes_18$indfmpir > 1, "above poverty", NA))

# education
nhanes_18$educ <- Recode(nhanes_18$dmdeduc2, recodes="1:2='< hs'; 3='highschool'; 4='some col'; 5='col grad'; 7:9=NA; else=NA", as.factor=T)

# marital status
nhanes_18$mar <- Recode(nhanes_18$dmdmartl, recodes="1='married'; 2='widowed'; 3='divorced'; 4='separated'; 5='nm'; 6='cohab'; else=NA", as.factor=T)

Simple table counts and percentages

table(nhanes_18$pov, nhanes_18$educ)

##                      
##                       < hs col grad highschool some col
##   above poverty        576     1099        869     1326
##   at or below poverty  319       64        287      234

100*prop.table(table(nhanes_18$pov, nhanes_18$educ), margin=2)

##                      
##                            < hs  col grad highschool  some col
##   above poverty       64.357542 94.496991  75.173010 85.000000
##   at or below poverty 35.642458  5.503009  24.826990 15.000000

chisq.test(table(nhanes_18$pov, nhanes_18$educ))

## 
##  Pearson's Chi-squared test
## 
## data:  table(nhanes_18$pov, nhanes_18$educ)
## X-squared = 341.33, df = 3, p-value < 2.2e-16

Create survey design object

options(survey.lonely.psu = "adjust")

dog<-svydesign(ids=~sdmvpsu, strata=~sdmvstra, weights=~wtint2yr, nest=TRUE, data = nhanes_18 )

Making my table one

library(tableone)

# without survey design
t1<-CreateTableOne(vars = c("educ", "mar"), strata = "pov", test = T, data = nhanes_18)
#t1<-print(t1, format="p")
print(t1,format="p")

##                Stratified by pov
##                 above poverty at or below poverty p      test
##   n             6140          1883                           
##   educ (%)                                        <0.001     
##      < hs       14.9          35.3                           
##      col grad   28.4           7.1                           
##      highschool 22.5          31.7                           
##      some col   34.3          25.9                           
##   mar (%)                                         <0.001     
##      cohab       8.2          14.3                           
##      divorced   11.2          15.2                           
##      married    54.1          31.2                           
##      nm         15.7          24.6                           
##      separated   2.7           6.5                           
##      widowed     8.2           8.2

#using survey design
st1<-svyCreateTableOne(vars = c("educ", "mar"), strata = "pov", test = T, data = dog)
#st1<-print(st1, format="p")
print(st1, format="p")

##                Stratified by pov
##                 above poverty at or below poverty p      test
##   n             238359161.5   45604858.6                     
##   educ (%)                                        <0.001     
##      < hs               8.1         25.9                     
##      col grad          34.7          6.5                     
##      highschool        25.4         40.9                     
##      some col          31.9         26.7                     
##   mar (%)                                         <0.001     
##      cohab              8.4         16.5                     
##      divorced          10.2         14.3                     
##      married           56.7         30.2                     
##      nm                17.1         27.0                     
##      separated          2.0          6.2                     
##      widowed            5.7          5.8

HW 2 Answers

# The binary outcome variable used was poverty status. This variable was recoded from the variable indfmpir, or income to poverty ratio, given in the 2017-18 NHANES wave. Any values of indfmpir greater than 1 were recoded as "above poverty", while values less than or equal to 1 were recoded as "at or below poverty".

# Do factors like sex, race/ethnicity, educational attainment, and marital status influence the likelihood of individuals being below the poverty threshold?

# Educational attainment (educ) was defined as the maximum education level reached, categories included less than high school (< hs), high school diploma (highschool), attended college (some col), and college graduate (col grad). Marital status (mar) was defined as married, divorced, separated, never married (nm), widowed, and living together (cohab).

# Descriptive statistics

# unweighted tableone 
print(t1,format="p")

##                Stratified by pov
##                 above poverty at or below poverty p      test
##   n             6140          1883                           
##   educ (%)                                        <0.001     
##      < hs       14.9          35.3                           
##      col grad   28.4           7.1                           
##      highschool 22.5          31.7                           
##      some col   34.3          25.9                           
##   mar (%)                                         <0.001     
##      cohab       8.2          14.3                           
##      divorced   11.2          15.2                           
##      married    54.1          31.2                           
##      nm         15.7          24.6                           
##      separated   2.7           6.5                           
##      widowed     8.2           8.2

# weighted tableone
print(st1, format="p")

##                Stratified by pov
##                 above poverty at or below poverty p      test
##   n             238359161.5   45604858.6                     
##   educ (%)                                        <0.001     
##      < hs               8.1         25.9                     
##      col grad          34.7          6.5                     
##      highschool        25.4         40.9                     
##      some col          31.9         26.7                     
##   mar (%)                                         <0.001     
##      cohab              8.4         16.5                     
##      divorced          10.2         14.3                     
##      married           56.7         30.2                     
##      nm                17.1         27.0                     
##      separated          2.0          6.2                     
##      widowed            5.7          5.8

# It's clear that discrepencies are present when comparing unweighted to weighted descriptive analyses. For example, the percentage of individuals at or below the poverty line with less than a high school education shifted by 10 percentage points! This illustrates the importance of using appropriate survey design analysis when working with data sets that utilize complex survey design.

DEM 7283 HW2

Daniel Mamani

2/4/2021

HW 2 Answers