library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1 ✔ tibble 3.3.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(haven)
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(knitr)
library(kableExtra)
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(broom)
library(ggeffects)
library(ggstats)
library(gtsummary)
library(GGally)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(corrplot)
## corrplot 0.95 loaded
wave4 <- read_sas("C:/Users/tahia/OneDrive/Desktop/UAlbany PhD/Epi 553/Project/Datasets/Wave 4 dataset/doi-10.15139-s3-11920/w4inhome.sas7bdat")
wave5 <- read_xpt("C:/Users/tahia/OneDrive/Desktop/UAlbany PhD/Epi 553/Project/Datasets/Wave 5 dataset/doi-10.15139-s3-zyrz5j/pwave5.xpt")
wave4_sub <- wave4 %>%
select(
AID,
BIO_SEX4,
H4IR4,
H4ED2,
H4EC1,
H4SE6,
H4TO3,
H4TO35,
H4DA4,
H4DA5,
H4DA6,
H4DA7,
H4DA8,
H4SP5,
H4SP6,
H4EC5,
H4EC8,
H4LM11,
H4ID5H
)
glimpse(wave4_sub)
## Rows: 5,114
## Columns: 19
## $ AID <chr> "57101310", "57103869", "57109625", "57111071", "57113943", "…
## $ BIO_SEX4 <dbl> 2, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2…
## $ H4IR4 <dbl> 2, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 4, 2, 1, 1, 1, 1, 2, 2…
## $ H4ED2 <dbl> 3, 2, 2, 6, 6, 1, 6, 12, 7, 6, 3, 9, 3, 6, 4, 6, 5, 5, 6, 2, …
## $ H4EC1 <dbl> 9, 1, 5, 6, 9, 9, 9, 11, 12, 7, 8, 10, 6, 9, 6, 9, 3, 5, 98, …
## $ H4SE6 <dbl> 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 1…
## $ H4TO3 <dbl> 1, 0, 0, 0, 7, 1, 0, 7, 7, 0, 0, 0, 7, 7, 7, 0, 1, 1, 0, 1, 7…
## $ H4TO35 <dbl> 2, 97, 5, 4, 1, 98, 2, 2, 4, 1, 2, 3, 97, 2, 4, 4, 97, 2, 0, …
## $ H4DA4 <dbl> 0, 1, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ H4DA5 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, 0, 0…
## $ H4DA6 <dbl> 0, 0, 2, 0, 3, 0, 0, 0, 4, 0, 3, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ H4DA7 <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ H4DA8 <dbl> 2, 2, 5, 0, 0, 0, 0, 0, 0, 2, 4, 0, 4, 1, 0, 0, 0, 2, 2, 0, 0…
## $ H4SP5 <dbl> 4, 0, 1, 0, 2, 0, 0, 0, 2, 0, 4, 2, 3, 2, 0, 0, 3, 2, 0, 4, 0…
## $ H4SP6 <dbl> 4, 0, 1, 0, 3, 0, 1, 0, 0, 1, 4, 4, 2, 4, 2, 0, 2, 2, 1, 4, 0…
## $ H4EC5 <dbl> 22000, 9999997, 9999997, 9999997, 9999997, 9999997, 9999997, …
## $ H4EC8 <dbl> 8, 1, 3, 5, 5, 4, 4, 2, 5, 2, 4, 3, 2, 5, 4, 7, 4, 2, 98, 1, …
## $ H4LM11 <dbl> 0, 0, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0…
## $ H4ID5H <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
wave5_sub <- wave5 %>%
select(
AID,
H5OD2A,
H5OD4A,
H5OD4B,
H5OD4C,
H5OD4D,
H5OD4E,
H5OD4F,
H5OD4G,
H5OD11,
H5EC1,
H5SE1,
H5TO1,
H5TO12,
H5ID25,
H5ID26,
H5ID27,
H5ID28,
H5ID29,
H5ID16,
H5EC5A,
H5EC5B,
H5EC5C,
H5LM5,
H5ID6G
)
glimpse(wave5_sub)
## Rows: 4,196
## Columns: 25
## $ AID <chr> "57101310", "57111071", "57111786", "57113943", "57117997", "57…
## $ H5OD2A <dbl> 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 1, 1, 2, 1, …
## $ H5OD4A <dbl> 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, …
## $ H5OD4B <dbl> 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, …
## $ H5OD4C <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, …
## $ H5OD4D <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, …
## $ H5OD4E <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, …
## $ H5OD4F <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ H5OD4G <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ H5OD11 <dbl> 4, 8, 3, 6, 3, 9, 16, 12, 3, 3, 4, 2, 7, 5, 3, 2, 8, 11, 10, 7,…
## $ H5EC1 <dbl> 5, 8, 11, 9, 7, 5, 10, 9, 1, 4, 3, 10, 3, 5, 1, 2, 10, 10, 9, 8…
## $ H5SE1 <dbl> 1, 1, 1, 0, 1, 1, 1, 1, 1, NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ H5TO1 <dbl> 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, …
## $ H5TO12 <dbl> 1, 4, 97, 2, 3, 0, 2, 3, 2, 0, 2, 3, 97, 2, 2, 0, 3, 2, 1, 4, 4…
## $ H5ID25 <dbl> 0, 2, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 5, 0, 0, 0, 0, …
## $ H5ID26 <dbl> 0, 0, 0, 3, 3, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, …
## $ H5ID27 <dbl> 0, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, …
## $ H5ID28 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, …
## $ H5ID29 <dbl> 0, 0, 0, 3, 3, 1, 0, 0, 7, 7, 3, 0, 0, 0, 0, 4, 2, 3, 1, 2, 0, …
## $ H5ID16 <dbl> 4, 0, 2, 4, 0, 2, 0, 2, 3, 0, 0, 0, 4, 1, 4, 2, 2, 2, 2, 0, 0, …
## $ H5EC5A <dbl> 3, 6, 6, 1, 1, 1, 7, 6, 1, 2, 1, 6, 2, 7, 5, 1, 6, 7, 6, 6, 6, …
## $ H5EC5B <dbl> 1, 3, 1, 2, 1, 3, 6, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 4, 1, 1, …
## $ H5EC5C <dbl> 3, 3, 2, 2, 2, 4, 2, 1, 2, 1, 2, 2, 8, 1, 2, 2, 3, 1, 2, 3, 5, …
## $ H5LM5 <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 2, 1, 2, 1, 2, 2, 1, 1, 1, 1, 1, …
## $ H5ID6G <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, …
Sex Sex was obtained from the biological sex variable in the dataset. In Wave IV, the variable BIO_SEX4 was used, and in Wave V the variable H5OD2A was used. Responses that indicated refusal, “don’t know,” or other non-substantive answers were recoded as missing values. The variable was kept as a binary variable representing male and female.
Race Race was constructed differently for the two waves because the datasets recorded race in different ways. In Wave IV, the interviewer’s observation of the respondent’s race was used (H4IR4). In Wave V, race was measured using several race indicator variables (H5OD4A–H5OD4G). These variables were combined to create one race variable. The final race categories were coded as White, Black or African American, American Indian or Alaska Native, and Asian or Pacific Islander. Any other race categories or unclear responses were treated as missing.
Education Education level was recategorized to create three broader groups of educational attainment. In Wave IV the variable H4ED2 was used, and in Wave V the variable H5OD11 was used. The original categories were grouped into three new categories: high school or less, some or completed college, and some or completed graduate education or higher. This grouping allowed the education variable to be comparable across both waves.
Household Income Household income was obtained from the income variable H4EC1 in Wave IV and H5EC1 in Wave V. The original income categories were grouped into two levels. Income values corresponding to $74,999 or less were coded as the lower income group, and income values representing $75,000 or more were coded as the higher income group. This recoding created a simplified measure of income for the analysis.
Sexual Activity Sexual activity status was measured using the variable H4SE6 in Wave IV and H5SE1 in Wave V. The responses were recoded into a binary variable indicating whether the respondent was currently sexually active or not. Any responses indicating refusal or “don’t know” were treated as missing values.
Smoking Status Smoking status was obtained from the smoking behavior variables H4TO3 for Wave IV and H5TO1 for Wave V. The variable was recoded into a binary smoking variable. Responses coded as 7 or 0 were treated as non-smokers, while all other valid smoking responses were classified as smokers. Non-substantive responses were recoded as missing.
Alcohol Consumption Alcohol use was measured using the variable H4TO35 in Wave IV and H5TO12 in Wave V. The variable was recoded into a binary indicator of alcohol consumption. Responses coded as 97 or 0 were considered as no alcohol consumption, while other valid responses indicating alcohol use were coded as alcohol consumption.
Physical Activity Physical activity was measured using five exercise-related variables in Wave IV (H4DA4, H4DA5, H4DA6, H4DA7, and H4DA8). These variables represented participation in different types of physical activities. The variables were combined into one physical activity variable. If a respondent reported participating in any of the activities, the variable was coded as physically active. If no activity was reported, it was coded as not physically active.The same method was applied for Wave V dataset, the variables for Wave V dataset were H5ID25-H5ID29.
Sleep Trouble Sleep disturbance was constructed by combining two variables related to sleep problems, H4SP5 and H4SP6, in Wave IV. These variables asked about difficulty falling asleep and difficulty staying asleep. The responses were combined to create one sleep trouble variable. A value of 0 indicated no sleep trouble, while values from 1 to 5 were recoded to indicate the presence of sleep trouble.
Financial Burden Financial burden was measured using variables related to mortgage. In Wave IV, the variables H4EC5 was used, while in Wave V the variable H5EC5A was used. These variables were recoded into three categories representing no debt, moderate debt, and high debt. This recoding allowed financial strain to be represented as an ordered categorical variable.
Employment Status Employment status was obtained from the variables H4LM11 in Wave IV and H5LM5 in Wave V. The responses were recoded into a binary variable indicating whether the respondent was employed or not employed. Responses coded as 7 or 0 were treated as not employed, while the remaining valid responses were classified as employed.
Depression Depression was used as the outcome variable in the study. In Wave IV, the variable H4ID5H was used, and in Wave V the variable H5ID6G was used. These variables asked whether a doctor or other health care provider had ever told the respondent that they had depression. The responses were treated as a binary variable, with 1 indicating the presence of depression and 0 indicating no depression.
# WAVE 4 SUBSET RECODING
# Recoding the variables as needed
wave4_sub <- wave4_sub %>%
mutate(across(
c(BIO_SEX4, H4SE6, H4SP5, H4SP6, H4ID5H), #missing data accumulation ALL GENERAL
~replace(., . %in% c(6,7,8,9,96,97,98,99), NA)
)) %>%
mutate(across(
c(H4ED2, H4EC1, H4EC5, H4EC8), #missing data accumulation EDUCATION, INCOME, FIN BURDEN
~replace(., . %in% c(96,97,98,99,9999996,9999998), NA)
)) %>%
mutate(across(
c(H4DA4,H4DA5, H4DA6, H4DA7, H4DA8), #missing data accumulation ALL EXCERCISE
~replace(., . %in% c(8,9,96,97,98,99), NA)
)) %>%
mutate(across(
c(H4TO3, H4LM11), #missing data accumulation SMOKING STAT, EMPLOYMENT
~replace(., . %in% c(6,8,9,96,97,98,99), NA)
)) %>%
mutate(across(
c(H4TO35), #missing data accumulation ALCOHOL STAT
~replace(., . %in% c(7,8,9,96,98,99), NA)
))
summary(wave4_sub)
## AID BIO_SEX4 H4IR4 H4ED2
## Length:5114 Min. :1.00 Min. :1.000 Min. : 1.000
## Class :character 1st Qu.:1.00 1st Qu.:1.000 1st Qu.: 4.000
## Mode :character Median :2.00 Median :1.000 Median : 6.000
## Mean :1.54 Mean :1.351 Mean : 5.706
## 3rd Qu.:2.00 3rd Qu.:2.000 3rd Qu.: 7.000
## Max. :2.00 Max. :4.000 Max. :13.000
## NA's :5 NA's :1
## H4EC1 H4SE6 H4TO3 H4TO35
## Min. : 1 Min. :0.0000 Min. :0.000 Min. : 0.00
## 1st Qu.: 7 1st Qu.:1.0000 1st Qu.:1.000 1st Qu.: 2.00
## Median : 9 Median :1.0000 Median :1.000 Median : 4.00
## Mean : 8 Mean :0.9446 Mean :2.905 Mean :21.75
## 3rd Qu.:10 3rd Qu.:1.0000 3rd Qu.:7.000 3rd Qu.: 5.00
## Max. :12 Max. :1.0000 Max. :7.000 Max. :97.00
## NA's :353 NA's :24 NA's :4 NA's :10
## H4DA4 H4DA5 H4DA6 H4DA7
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.2769 Mean :0.7008 Mean :0.8736 Mean :0.3166
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :7.0000 Max. :7.0000 Max. :7.0000 Max. :7.0000
## NA's :3 NA's :3 NA's :3 NA's :3
## H4DA8 H4SP5 H4SP6 H4EC5
## Min. :0.000 Min. :0.00 Min. :0.000 Min. : 0
## 1st Qu.:0.000 1st Qu.:0.00 1st Qu.:0.000 1st Qu.: 130000
## Median :1.000 Median :1.00 Median :1.000 Median :9999997
## Mean :1.975 Mean :1.14 Mean :1.314 Mean :6142953
## 3rd Qu.:3.000 3rd Qu.:2.00 3rd Qu.:2.000 3rd Qu.:9999997
## Max. :7.000 Max. :5.00 Max. :5.000 Max. :9999997
## NA's :3 NA's :1 NA's :1 NA's :145
## H4EC8 H4LM11 H4ID5H
## Min. :1.000 Min. :0.000 Min. :0.0000
## 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:0.0000
## Median :4.000 Median :1.000 Median :0.0000
## Mean :3.707 Mean :1.803 Mean :0.1617
## 3rd Qu.:5.000 3rd Qu.:1.000 3rd Qu.:0.0000
## Max. :8.000 Max. :7.000 Max. :1.0000
## NA's :384 NA's :3 NA's :1
# WAVE 5 SUBSET RECODING
# Recoding the variables as needed
wave5_sub <- wave5_sub %>%
mutate(across(
c(H5OD2A, H5SE1, H5ID16, H5ID6G), #missing data accumulation ALL GENERAL
~replace(., . %in% c(6,7,8,9,96,97,98,99), NA)
)) %>%
mutate(across(
c(H5OD11, H5EC1, H5EC5A, H5EC5C), #missing data accumulation EDUCATION, INCOME, FIN BURDEN
~replace(., . %in% c(96,97,98,99,9999996,9999998), NA)
)) %>%
mutate(across(
c(H5ID25,H5ID26,H5ID27,H5ID28,H5ID29), #missing data accumulation ALL EXCERCISE
~replace(., . %in% c(8,9,96,97,98,99), NA)
)) %>%
mutate(across(
c(H5TO1, H5LM5), #missing data accumulation SMOKING STAT, EMPLOYMENT
~replace(., . %in% c(6,8,9,96,97,98,99), NA)
)) %>%
mutate(across(
c(H5TO12), #missing data accumulation ALCOHOL STAT
~replace(., . %in% c(7,8,9,96,98,99), NA)
))
summary(wave5_sub)
## AID H5OD2A H5OD4A H5OD4B
## Length:4196 Min. :1.000 Min. :0.0000 Min. :0.0000
## Class :character 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.:0.0000
## Mode :character Median :2.000 Median :1.0000 Median :0.0000
## Mean :1.571 Mean :0.6797 Mean :0.2175
## 3rd Qu.:2.000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :2.000 Max. :1.0000 Max. :1.0000
## NA's :12 NA's :12
## H5OD4C H5OD4D H5OD4E H5OD4F
## Min. :0.00000 Min. :0.00000 Min. :0.000000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.000000 Median :0.00000
## Mean :0.09704 Mean :0.03728 Mean :0.009082 Mean :0.02892
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.000000 Max. :1.00000
## NA's :12 NA's :12 NA's :12 NA's :12
## H5OD4G H5OD11 H5EC1 H5SE1
## Min. :0.000000 Min. : 2.000 Min. : 1.00 Min. :0.0000
## 1st Qu.:0.000000 1st Qu.: 6.000 1st Qu.: 5.00 1st Qu.:1.0000
## Median :0.000000 Median : 9.000 Median : 8.00 Median :1.0000
## Mean :0.006214 Mean : 8.156 Mean : 7.17 Mean :0.9565
## 3rd Qu.:0.000000 3rd Qu.:10.000 3rd Qu.: 9.00 3rd Qu.:1.0000
## Max. :1.000000 Max. :16.000 Max. :13.00 Max. :1.0000
## NA's :12 NA's :5 NA's :71 NA's :33
## H5TO1 H5TO12 H5ID25 H5ID26
## Min. :0.0000 Min. : 0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.: 2.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median : 3.000 Median :0.0000 Median :0.0000
## Mean :0.4294 Mean : 9.616 Mean :0.6541 Mean :0.9596
## 3rd Qu.:1.0000 3rd Qu.: 4.000 3rd Qu.:0.0000 3rd Qu.:2.0000
## Max. :1.0000 Max. :97.000 Max. :7.0000 Max. :7.0000
## NA's :11 NA's :143 NA's :138
## H5ID27 H5ID28 H5ID29 H5ID16
## Min. :0.0000 Min. :0.0000 Min. :0.00 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00 1st Qu.:1.000
## Median :0.0000 Median :0.0000 Median :2.00 Median :2.000
## Mean :0.6539 Mean :0.2696 Mean :2.34 Mean :1.727
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:4.00 3rd Qu.:3.000
## Max. :7.0000 Max. :7.0000 Max. :7.00 Max. :4.000
## NA's :125 NA's :134 NA's :130 NA's :6
## H5EC5A H5EC5B H5EC5C H5LM5
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:1.000
## Median :5.000 Median :1.000 Median :3.000 Median :1.000
## Mean :3.927 Mean :2.341 Mean :2.858 Mean :1.178
## 3rd Qu.:6.000 3rd Qu.:3.750 3rd Qu.:4.000 3rd Qu.:1.000
## Max. :9.000 Max. :8.000 Max. :9.000 Max. :3.000
## NA's :64 NA's :70 NA's :70 NA's :10
## H5ID6G
## Min. :0.0000
## 1st Qu.:0.0000
## Median :0.0000
## Mean :0.2516
## 3rd Qu.:1.0000
## Max. :1.0000
## NA's :14
# WAVE 4, SORTING CATEGORIES;
wave4_sub <- wave4_sub %>%
mutate(
H4TO3 = replace(H4TO3, H4TO3 == 7, 0), # smoking: 7 - 0
H4TO35 = replace(H4TO35, H4TO35 == 97, 0), # alcohol: 97 - 0
H4LM11 = replace(H4LM11, H4LM11 == 7, 0), # employment: 7 - 0
H4EC5 = replace(H4EC5, H4EC5 == 9999997, 0) # house mortgage 9999997 - 0
)
# WAVE 5, SORTING CATEGORIES;
wave5_sub <- wave5_sub %>%
mutate(
H5TO1 = replace(H5TO1, H5TO1 == 7, 0), # smoking: 7 - 0
H5TO12 = replace(H5TO12, H5TO12 == 97, 0), # alcohol: 97 - 0
H5LM5 = replace(H5LM5, H5LM5 == 7, 0) # employment: 7 - 0
)
# RECODE AND ALCOHOL
table(wave4_sub$H4TO3, useNA = "ifany")
##
## 0 1 <NA>
## 2798 2312 4
table(wave5_sub$H5TO1, useNA = "ifany")
##
## 0 1 <NA>
## 2388 1797 11
#WAVE 4 AND 5, MERGING VARIABLES;
wave4_sub <- wave4_sub %>%
mutate(
sleep_trouble = ifelse(H4SP5 >= 1 | H4SP6 >= 1, 1, 0)
)
wave4_sub <- wave4_sub %>%
mutate(
exercise = ifelse(H4DA4 >= 1 | H4DA5 >= 1 | H4DA6 >= 1 | H4DA7 >= 1 | H4DA8 >= 1, 1, 0))
wave5_sub <- wave5_sub %>%
mutate(
exercise = ifelse(H5ID25 >= 1 | H5ID26 >= 1 | H5ID27 >= 1 | H5ID28 >= 1 | H5ID29 >= 1, 1, 0))
#MAKING NEW CATEGORY;
wave4_sub <- wave4_sub %>%
mutate(
mortgage = case_when(
H4EC5 == 0 ~ 0,
H4EC5 >= 20 & H4EC5 < 100000 ~ 1,
H4EC5 >= 100000 ~ 2,
TRUE ~ NA_real_))
wave4_sub <- wave4_sub %>%
mutate(alcohol = case_when(
H4TO35 == 0 ~ 0,
H4TO35 %in% 1:6 ~ 1,
TRUE ~ NA_real_))
wave5_sub <- wave5_sub %>%
mutate(
mortgage = case_when(
H5EC5A == 1 ~ 0,
H5EC5A >= 2 & H5EC5A <= 5 ~ 1,
H5EC5A >= 6 & H5EC5A <= 9 ~ 2,
TRUE ~ NA_real_))
wave5_sub <- wave5_sub %>%
mutate(alcohol = case_when(
H5TO12 == 0 ~ 0,
H5TO12 %in% 1:6 ~ 1,
TRUE ~ NA_real_))
wave5_sub <- wave5_sub %>%
mutate(sleep_trouble = case_when(
H5ID16 == 0 ~ 0,
H5ID16 %in% 1:4 ~ 1,
TRUE ~ NA_real_))
wave5_sub <- wave5_sub %>%
mutate(employment = case_when(
H5LM5 == 2:3 ~ 0,
H5LM5 %in% 1 ~ 1,
TRUE ~ NA_real_))
AVARAGE INCOME According to National Average Wage Index. (n.d.). Social Security. https://www.ssa.gov/oact/cola/AWI.html, the average annual income of USA in 2024 is 69,846.57. To align with this number, I choose to divide the yearly income category as close to the average number possible, which is 75K. so the 2 categories are below US average and above US average.
# RECATEGORY OF YEARLY INCOME;
wave4_sub <- wave4_sub %>%
mutate(
income_cat = case_when(
H4EC1 >= 1 & H4EC1 <= 9 ~ 0,
H4EC1 >= 10 & H4EC1 <= 12 ~ 1,
TRUE ~ NA_real_))
wave5_sub <- wave5_sub %>%
mutate(
income_cat = case_when(
H5EC1 >= 1 & H5EC1 <= 9 ~ 0,
H5EC1 >= 10 & H5EC1 <= 12 ~ 1,
TRUE ~ NA_real_))
#RECATEGORY OF EDUCATION
wave4_sub <- wave4_sub %>%
mutate(
education_cat = case_when(
H4ED2 >= 1 & H4ED2 <= 2 ~ 0,
H4ED2 >= 4 & H4ED2 <= 7 ~ 1,
H4ED2 >= 8 & H4ED2 <= 13 ~ 2,
TRUE ~ NA_real_))
wave5_sub <- wave5_sub %>%
mutate(
education_cat = case_when(
H5OD11 >= 2 & H5OD11 <= 4 ~ 0,
H5OD11 >= 5 & H5OD11 <= 10 ~ 1,
H5OD11 >= 11 & H5OD11 <= 16 ~ 2,
TRUE ~ NA_real_))
# MERGING AD RECODING RACE VARIABLE, WAVE 5;
wave5_sub <- wave5_sub %>%
mutate(
race = case_when(
H5OD4A == 1 ~ 1, # White
H5OD4B == 1 ~ 2, # Black
H5OD4F == 1 ~ 3, # American Indian / Alaska Native
H5OD4D == 1 | H5OD4E == 1 ~ 4, # Asian or Pacific Islander
TRUE ~ NA_real_ # Other or missing
)
)
table(wave5_sub$race, useNA = "ifany")
##
## 1 2 3 4 <NA>
## 2844 863 38 135 316
#LABELING VARIABLES
wave4_sub <- wave4_sub %>%
rename(
id = AID,
sex = BIO_SEX4,
race = H4IR4,
education = H4ED2,
income = H4EC1,
sexually_active = H4SE6,
smoking = H4TO3,
financial_debt = H4EC8,
employment = H4LM11,
depression = H4ID5H
)
wave5_sub <- wave5_sub %>%
rename(
id = AID,
sex = H5OD2A,
education = H5OD11,
income = H5EC1,
sexually_active = H5SE1,
smoking = H5TO1,
financial_debt = H5EC5C,
depression = H5ID6G
)
# KEEPING THE NEWLY MADE AND NEWLY CODED VARIABLES ONLY,
wave4_work <- wave4_sub %>%
select(
sex,
race,
education_cat,
income_cat,
sexually_active,
smoking,
alcohol,
exercise,
sleep_trouble,
mortgage,
financial_debt,
employment,
depression
)
wave5_work <- wave5_sub %>%
select(
sex,
race,
education_cat,
income_cat,
sexually_active,
smoking,
alcohol,
exercise,
sleep_trouble,
mortgage,
financial_debt,
employment,
depression
)
#CHECKING DATASETS
summary(wave4_work)
## sex race education_cat income_cat
## Min. :1.00 Min. :1.000 Min. :0.00 Min. :0.0000
## 1st Qu.:1.00 1st Qu.:1.000 1st Qu.:1.00 1st Qu.:0.0000
## Median :2.00 Median :1.000 Median :1.00 Median :0.0000
## Mean :1.54 Mean :1.351 Mean :1.06 Mean :0.2985
## 3rd Qu.:2.00 3rd Qu.:2.000 3rd Qu.:1.00 3rd Qu.:1.0000
## Max. :2.00 Max. :4.000 Max. :2.00 Max. :1.0000
## NA's :5 NA's :836 NA's :353
## sexually_active smoking alcohol exercise
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:1.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:1.0000
## Median :1.0000 Median :0.0000 Median :1.0000 Median :1.0000
## Mean :0.9446 Mean :0.4524 Mean :0.7188 Mean :0.7601
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## NA's :24 NA's :4 NA's :10 NA's :3
## sleep_trouble mortgage financial_debt employment
## Min. :0.0000 Min. :0.0000 Min. :1.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:2.000 1st Qu.:0.0000
## Median :1.0000 Median :0.0000 Median :4.000 Median :1.0000
## Mean :0.6864 Mean :0.5478 Mean :3.707 Mean :0.6576
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:5.000 3rd Qu.:1.0000
## Max. :1.0000 Max. :2.0000 Max. :8.000 Max. :1.0000
## NA's :2 NA's :145 NA's :384 NA's :3
## depression
## Min. :0.0000
## 1st Qu.:0.0000
## Median :0.0000
## Mean :0.1617
## 3rd Qu.:0.0000
## Max. :1.0000
## NA's :1
summary(wave5_work)
## sex race education_cat income_cat
## Min. :1.000 Min. :1.000 Min. :0.000 Min. :0.0000
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:0.0000
## Median :2.000 Median :1.000 Median :1.000 Median :0.0000
## Mean :1.571 Mean :1.346 Mean :1.002 Mean :0.2168
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:1.000 3rd Qu.:0.0000
## Max. :2.000 Max. :4.000 Max. :2.000 Max. :1.0000
## NA's :316 NA's :5 NA's :178
## sexually_active smoking alcohol exercise
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:1.0000 1st Qu.:0.0000 1st Qu.:1.0000 1st Qu.:1.0000
## Median :1.0000 Median :0.0000 Median :1.0000 Median :1.0000
## Mean :0.9565 Mean :0.4294 Mean :0.8449 Mean :0.7919
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## NA's :33 NA's :11 NA's :130
## sleep_trouble mortgage financial_debt employment
## Min. :0.0000 Min. :0.000 Min. :1.000 Min. :0.0000
## 1st Qu.:1.0000 1st Qu.:0.000 1st Qu.:2.000 1st Qu.:1.0000
## Median :1.0000 Median :1.000 Median :3.000 Median :1.0000
## Mean :0.7632 Mean :1.067 Mean :2.858 Mean :0.9091
## 3rd Qu.:1.0000 3rd Qu.:2.000 3rd Qu.:4.000 3rd Qu.:1.0000
## Max. :1.0000 Max. :2.000 Max. :9.000 Max. :1.0000
## NA's :6 NA's :64 NA's :70 NA's :369
## depression
## Min. :0.0000
## 1st Qu.:0.0000
## Median :0.0000
## Mean :0.2516
## 3rd Qu.:1.0000
## Max. :1.0000
## NA's :14
sapply(wave4_work, function(x) table(x, useNA = "ifany"))
## $sex
## x
## 1 2
## 2353 2761
##
## $race
## x
## 1 2 3 4 <NA>
## 3671 1240 41 157 5
##
## $education_cat
## x
## 0 1 2 <NA>
## 399 3223 656 836
##
## $income_cat
## x
## 0 1 <NA>
## 3340 1421 353
##
## $sexually_active
## x
## 0 1 <NA>
## 282 4808 24
##
## $smoking
## x
## 0 1 <NA>
## 2798 2312 4
##
## $alcohol
## x
## 0 1 <NA>
## 1435 3669 10
##
## $exercise
## x
## 0 1 <NA>
## 1226 3885 3
##
## $sleep_trouble
## x
## 0 1 <NA>
## 1603 3509 2
##
## $mortgage
## x
## 0 1 2 <NA>
## 3253 710 1006 145
##
## $financial_debt
## x
## 1 2 3 4 5 6 7 8 <NA>
## 607 696 711 1170 856 439 200 51 384
##
## $employment
## x
## 0 1 <NA>
## 1750 3361 3
##
## $depression
## x
## 0 1 <NA>
## 4286 827 1
sapply(wave5_work, function(x) table(x, useNA = "ifany"))
## $sex
## x
## 1 2
## 1802 2394
##
## $race
## x
## 1 2 3 4 <NA>
## 2844 863 38 135 316
##
## $education_cat
## x
## 0 1 2 <NA>
## 826 2529 836 5
##
## $income_cat
## x
## 0 1 <NA>
## 3147 871 178
##
## $sexually_active
## x
## 0 1 <NA>
## 181 3982 33
##
## $smoking
## x
## 0 1 <NA>
## 2388 1797 11
##
## $alcohol
## x
## 0 1
## 651 3545
##
## $exercise
## x
## 0 1 <NA>
## 846 3220 130
##
## $sleep_trouble
## x
## 0 1 <NA>
## 992 3198 6
##
## $mortgage
## x
## 0 1 2 <NA>
## 1409 1037 1686 64
##
## $financial_debt
## x
## 1 2 3 4 5 6 7 8 9 <NA>
## 601 1400 962 627 333 126 51 16 10 70
##
## $employment
## x
## 0 1 <NA>
## 348 3479 369
##
## $depression
## x
## 0 1 <NA>
## 3130 1052 14
nrow(wave4_sub)
## [1] 5114
nrow(wave5_sub)
## [1] 4196
table(wave4_work$depression, useNA="ifany")
##
## 0 1 <NA>
## 4286 827 1
table(wave5_work$depression, useNA="ifany")
##
## 0 1 <NA>
## 3130 1052 14
# Create a summary table of the WAVE 4 WORK data
tbl_w4 <- wave4_work %>%
select(-financial_debt) %>%
tbl_summary(
type = list(
income_cat ~ "categorical",
sexually_active ~ "categorical",
smoking ~ "categorical",
alcohol ~ "categorical",
exercise ~ "categorical",
sleep_trouble ~ "categorical",
employment ~ "categorical",
depression ~ "categorical"
),
missing = "ifany"
)
tbl_w4
| Characteristic | N = 5,1141 |
|---|---|
| BIOLOGICAL SEX-W4 | |
| 1 | 2,353 (46%) |
| 2 | 2,761 (54%) |
| S28Q4 RACE OF RESPONDENT BY FI-W4 | |
| 1 | 3,671 (72%) |
| 2 | 1,240 (24%) |
| 3 | 41 (0.8%) |
| 4 | 157 (3.1%) |
| Unknown | 5 |
| education_cat | |
| 0 | 399 (9.3%) |
| 1 | 3,223 (75%) |
| 2 | 656 (15%) |
| Unknown | 836 |
| income_cat | |
| 0 | 3,340 (70%) |
| 1 | 1,421 (30%) |
| Unknown | 353 |
| S15Q6 EVER HAVE VAGINAL SEX-W4 | |
| 0 | 282 (5.5%) |
| 1 | 4,808 (94%) |
| Unknown | 24 |
| S23Q3 EVER SMOKED CIGS REGULARLY-W4 | |
| 0 | 2,798 (55%) |
| 1 | 2,312 (45%) |
| Unknown | 4 |
| alcohol | |
| 0 | 1,435 (28%) |
| 1 | 3,669 (72%) |
| Unknown | 10 |
| exercise | |
| 0 | 1,226 (24%) |
| 1 | 3,885 (76%) |
| Unknown | 3 |
| sleep_trouble | |
| 0 | 1,603 (31%) |
| 1 | 3,509 (69%) |
| Unknown | 2 |
| mortgage | |
| 0 | 3,253 (65%) |
| 1 | 710 (14%) |
| 2 | 1,006 (20%) |
| Unknown | 145 |
| S11Q11 CURRENTLY WORK/10 HRS WEEK-W4 | |
| 0 | 1,750 (34%) |
| 1 | 3,361 (66%) |
| Unknown | 3 |
| S06Q5H EVER BEEN DX WITH DEPRESSION-W4 | |
| 0 | 4,286 (84%) |
| 1 | 827 (16%) |
| Unknown | 1 |
| 1 n (%) | |
# Create a summary table of the WAVE 5 WORK data
tbl_w5 <- wave5_work %>%
select(-financial_debt) %>%
tbl_summary(
type = list(
income_cat ~ "categorical",
sexually_active ~ "categorical",
smoking ~ "categorical",
alcohol ~ "categorical",
exercise ~ "categorical",
sleep_trouble ~ "categorical",
employment ~ "categorical",
depression ~ "categorical"
),
missing = "ifany"
)
tbl_w5
| Characteristic | N = 4,1961 |
|---|---|
| S1Q2A SEX ASSIGNED AT BIRTH-W5 | |
| 1 | 1,802 (43%) |
| 2 | 2,394 (57%) |
| race | |
| 1 | 2,844 (73%) |
| 2 | 863 (22%) |
| 3 | 38 (1.0%) |
| 4 | 135 (3.5%) |
| Unknown | 316 |
| education_cat | |
| 0 | 826 (20%) |
| 1 | 2,529 (60%) |
| 2 | 836 (20%) |
| Unknown | 5 |
| income_cat | |
| 0 | 3,147 (78%) |
| 1 | 871 (22%) |
| Unknown | 178 |
| S6Q1 EVER HAVE VAGINAL SEX-W5 | |
| 0 | 181 (4.3%) |
| 1 | 3,982 (96%) |
| Unknown | 33 |
| S7Q1 EVER SMOKED CIGS REGULARLY-W5 | |
| 0 | 2,388 (57%) |
| 1 | 1,797 (43%) |
| Unknown | 11 |
| alcohol | |
| 0 | 651 (16%) |
| 1 | 3,545 (84%) |
| exercise | |
| 0 | 846 (21%) |
| 1 | 3,220 (79%) |
| Unknown | 130 |
| sleep_trouble | |
| 0 | 992 (24%) |
| 1 | 3,198 (76%) |
| Unknown | 6 |
| mortgage | |
| 0 | 1,409 (34%) |
| 1 | 1,037 (25%) |
| 2 | 1,686 (41%) |
| Unknown | 64 |
| employment | |
| 0 | 348 (9.1%) |
| 1 | 3,479 (91%) |
| Unknown | 369 |
| S5Q6G EVER BEEN DX WITH DEPRESSION-W5 | |
| 0 | 3,130 (75%) |
| 1 | 1,052 (25%) |
| Unknown | 14 |
| 1 n (%) | |
tbl_merge(
tbls = list(tbl_w4, tbl_w5),
tab_spanner = c("**Wave 4**", "**Wave 5**")
)
## The number rows in the tables to be merged do not match, which may result in
## rows appearing out of order.
## ℹ See `tbl_merge()` (`?gtsummary::tbl_merge()`) help file for details. Use
## `quiet=TRUE` to silence message.
| Characteristic |
Wave 4
|
Wave 5
|
|---|---|---|
| N = 5,1141 | N = 4,1961 | |
| BIOLOGICAL SEX-W4 | ||
| 1 | 2,353 (46%) | |
| 2 | 2,761 (54%) | |
| S28Q4 RACE OF RESPONDENT BY FI-W4 | ||
| 1 | 3,671 (72%) | |
| 2 | 1,240 (24%) | |
| 3 | 41 (0.8%) | |
| 4 | 157 (3.1%) | |
| Unknown | 5 | |
| education_cat | ||
| 0 | 399 (9.3%) | 826 (20%) |
| 1 | 3,223 (75%) | 2,529 (60%) |
| 2 | 656 (15%) | 836 (20%) |
| Unknown | 836 | 5 |
| income_cat | ||
| 0 | 3,340 (70%) | 3,147 (78%) |
| 1 | 1,421 (30%) | 871 (22%) |
| Unknown | 353 | 178 |
| S15Q6 EVER HAVE VAGINAL SEX-W4 | ||
| 0 | 282 (5.5%) | |
| 1 | 4,808 (94%) | |
| Unknown | 24 | |
| S23Q3 EVER SMOKED CIGS REGULARLY-W4 | ||
| 0 | 2,798 (55%) | |
| 1 | 2,312 (45%) | |
| Unknown | 4 | |
| alcohol | ||
| 0 | 1,435 (28%) | 651 (16%) |
| 1 | 3,669 (72%) | 3,545 (84%) |
| Unknown | 10 | |
| exercise | ||
| 0 | 1,226 (24%) | 846 (21%) |
| 1 | 3,885 (76%) | 3,220 (79%) |
| Unknown | 3 | 130 |
| sleep_trouble | ||
| 0 | 1,603 (31%) | 992 (24%) |
| 1 | 3,509 (69%) | 3,198 (76%) |
| Unknown | 2 | 6 |
| mortgage | ||
| 0 | 3,253 (65%) | 1,409 (34%) |
| 1 | 710 (14%) | 1,037 (25%) |
| 2 | 1,006 (20%) | 1,686 (41%) |
| Unknown | 145 | 64 |
| S11Q11 CURRENTLY WORK/10 HRS WEEK-W4 | ||
| 0 | 1,750 (34%) | |
| 1 | 3,361 (66%) | |
| Unknown | 3 | |
| S06Q5H EVER BEEN DX WITH DEPRESSION-W4 | ||
| 0 | 4,286 (84%) | |
| 1 | 827 (16%) | |
| Unknown | 1 | |
| S1Q2A SEX ASSIGNED AT BIRTH-W5 | ||
| 1 | 1,802 (43%) | |
| 2 | 2,394 (57%) | |
| race | ||
| 1 | 2,844 (73%) | |
| 2 | 863 (22%) | |
| 3 | 38 (1.0%) | |
| 4 | 135 (3.5%) | |
| Unknown | 316 | |
| S6Q1 EVER HAVE VAGINAL SEX-W5 | ||
| 0 | 181 (4.3%) | |
| 1 | 3,982 (96%) | |
| Unknown | 33 | |
| S7Q1 EVER SMOKED CIGS REGULARLY-W5 | ||
| 0 | 2,388 (57%) | |
| 1 | 1,797 (43%) | |
| Unknown | 11 | |
| employment | ||
| 0 | 348 (9.1%) | |
| 1 | 3,479 (91%) | |
| Unknown | 369 | |
| S5Q6G EVER BEEN DX WITH DEPRESSION-W5 | ||
| 0 | 3,130 (75%) | |
| 1 | 1,052 (25%) | |
| Unknown | 14 | |
| 1 n (%) | ||