Dissertation

Author

Jyoti Nepal

packages <- c("tidyverse", "knitr", "tibble", "kableExtra", "officer", "compareGroups", "car", "haven", "foreign", "tidyr", "magrittr", "labelled", "ggplot2", "foreign", "lme4", "lmerTest", "MuMIn", "MASS", "arm", "lattice", "pscl", "survival", "survey", "srvyr", "survminer", "effects", "gt", "gtsummary", "glue", "flextable", "huxtable", "modelsummary", "broom", "broom.mixed", "lattice", "cmprsk", "stargazer", "sjstats", "sjPlot", "sjmisc", "sjlabelled")
invisible(lapply(packages, function(x) require(x, character.only = T, quietly = T)))
── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
✔ tibble  3.1.8      ✔ dplyr   1.0.10
✔ tidyr   1.2.1      ✔ stringr 1.4.1 
✔ readr   2.1.2      ✔ forcats 0.5.2 
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
Warning: package 'kableExtra' was built under R version 4.2.3

Attaching package: 'kableExtra'

The following object is masked from 'package:dplyr':

    group_rows
Warning: package 'officer' was built under R version 4.2.3

Attaching package: 'car'

The following object is masked from 'package:dplyr':

    recode

The following object is masked from 'package:purrr':

    some


Attaching package: 'magrittr'

The following object is masked from 'package:purrr':

    set_names

The following object is masked from 'package:tidyr':

    extract


Attaching package: 'Matrix'

The following objects are masked from 'package:tidyr':

    expand, pack, unpack


Attaching package: 'MASS'

The following object is masked from 'package:dplyr':

    select


Attaching package: 'survey'

The following object is masked from 'package:graphics':

    dotchart


Attaching package: 'srvyr'

The following object is masked from 'package:MASS':

    select

The following object is masked from 'package:kableExtra':

    group_rows

The following object is masked from 'package:stats':

    filter


Attaching package: 'survminer'

The following object is masked from 'package:survival':

    myeloma
Warning: package 'gt' was built under R version 4.2.2

Attaching package: 'gtsummary'

The following object is masked from 'package:MASS':

    select
Warning: package 'flextable' was built under R version 4.2.3

Attaching package: 'flextable'

The following objects are masked from 'package:gtsummary':

    as_flextable, continuous_summary

The following objects are masked from 'package:ggpubr':

    border, font, rotate

The following objects are masked from 'package:kableExtra':

    as_image, footnote

The following object is masked from 'package:purrr':

    compose


Please cite as: 

 Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
 R package version 5.2.3. https://CRAN.R-project.org/package=stargazer 
library(eha)
library(survival)
library(muhaz)
library(dplyr)
library(psych)

Attaching package: 'psych'
The following object is masked from 'package:car':

    logit
The following objects are masked from 'package:ggplot2':

    %+%, alpha
library(reshape2)

Attaching package: 'reshape2'
The following object is masked from 'package:tidyr':

    smiths
library(xtable)
Warning: package 'xtable' was built under R version 4.2.3

Attaching package: 'xtable'
The following object is masked from 'package:flextable':

    align
library(plyr)
------------------------------------------------------------------------------
You have loaded plyr after dplyr - this is likely to cause problems.
If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
library(plyr); library(dplyr)
------------------------------------------------------------------------------

Attaching package: 'plyr'
The following object is masked from 'package:gtsummary':

    mutate
The following object is masked from 'package:ggpubr':

    mutate
The following objects are masked from 'package:srvyr':

    mutate, rename, summarise, summarize
The following objects are masked from 'package:dplyr':

    arrange, count, desc, failwith, id, mutate, rename, summarise,
    summarize
The following object is masked from 'package:purrr':

    compact
library(tidyr)
library(foreign)
library(car)
library(tableone)
Warning: package 'tableone' was built under R version 4.2.3
library(table1)
Warning: package 'table1' was built under R version 4.2.2

Attaching package: 'table1'
The following objects are masked from 'package:xtable':

    label, label<-
The following objects are masked from 'package:base':

    units, units<-
library(survey)
library(eha)
library(haven)
library(kableExtra)
library(janitor)
Warning: package 'janitor' was built under R version 4.2.3

Attaching package: 'janitor'
The following objects are masked from 'package:stats':

    chisq.test, fisher.test
options(survey.lonely.psu = "adjust")
 setwd("C:/Users/spara/OneDrive - University of Texas at San Antonio/Dissertation")

DATA

w3 <- as_tibble(read.delim("21600-0008-Data.tsv", sep = '\t', header = TRUE ))
w4 <- as_tibble(read.delim("21600-0022-Data.tsv", sep = '\t', header = TRUE ))
w5 <- as_tibble(read.delim("21600-0032-Data.tsv", sep = '\t', header = TRUE ))
wt <- as_tibble(read.delim("21600-0004-Data.tsv", sep = '\t', header = TRUE ))

names(w3)<-tolower(names(w3))
names(w4)<-tolower(names(w4))
names(w5)<-tolower(names(w5))
names(wt)<-tolower(names(wt))

Variable check

# List of variables to check
variables_to_check <- c("h4ma1","h3ma1","h3ma2","h3ma3","h4ma3","h3ma4","h4ma5","h3od38","h3od33","h3ma5","h3od33","h3ma6","h3id15","h4id5h"
)

# Check if variables exist in dataframe F

variables_exist <- variables_to_check %in% colnames(w3)

# Print the result
print(variables_exist)
 [1] FALSE  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
[13]  TRUE FALSE
variables_exist <- variables_to_check %in% colnames(w4)

# Print the result
print(variables_exist)
 [1]  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE
[13] FALSE  TRUE
# Between W3 and W4 all variables are present
# Year of survey for wave 3
#summary(w3$iyear3)

#year of birth
#summary(w3$h3od1y)

# Current age: year of survey minus age at birth
w3$age <- (w3$iyear3 -w3$h3od1y)
summary(w3$age)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   18.0    21.0    22.0    22.2    24.0    28.0 
# Year of survey for wave 4
#summary(w4$iyear4)

#year of birth
#summary(w4$h4od1y)

# Current age is year of survey minus age at birth
w4$age <- (w4$iyear4 -w4$h4od1y)
summary(w4$age)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
     25      28      29      29      30      34 
#Current age: year of survey minus age at birth
w5$age <- (w5$iyear5 -w5$h5od1y)
summary(w5$age)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  33.00   36.00   37.00   37.32   39.00   42.00 

Maltreatment

## Childhood maltreatments

#Neglect

w3$neglect1<- car::Recode(w3$h3ma1,recodes="1:5='1'; 6='2'; else='3'")
w3$neglect2<- car::Recode(w3$h3ma2,recodes="1:5='1'; 6='2'; else='3'")
w4$neglect3<- car::Recode(w4$h4ma1,recodes="1:5='1'; 6='2'; else='3'")

w3$neg<- paste(w3$neglect1, w3$neglect2, sep = "")
table(w3$neg)

  11   12   13   21   22   23   31   32   33 
 320 1130   56   72 2256   17   13  142  127 
w3$neglect4<- car::Recode(w3$neg,"c(11,12,13,21,31)='1'; c(22,23,32)='2'; 33='3'")
table(w3$neglect4)

   1    2    3 
1591 2415  127 
#Physical

w3$physical2<- car::Recode(w3$h3ma3,recodes="1:5='1'; 6='2'; else='3'")
w4$physical1<- car::Recode(w4$h4ma3,recodes="1:5='1'; 6='2'; else='3'")


#Sexual

w3$sexual1<- car::Recode(w3$h3ma4,recodes="1:5='1'; 6='2'; else='3'")
w4$sexual3<- car::Recode(w4$h4ma5,recodes="1:5='1'; 6='2'; else='3'")

#CM_general

#w3$cm1 <- car::Recode(w3$h3od38 ,recodes="1='1'; 0='2'; else='3'")
#w3$cm2 <- car::Recode(w3$h3od33 ,recodes="1='1'; 0='2'; else='3'")

#w3$cm <- paste(w3$cm1,w3$cm2,sep = "")
#table(w3$cm)

#w3$cm_agg <- car::Recode(w3$cm,"c(11,12,13,21,31) = '1';c(22,23,32)='2'; 33='3'")
#table(w3$cm_agg)

SES

#Employment status

#h3da28: Do you currently have a job?
#w3$empstat<- car::Recode(w3$h3da28,recodes="1=1; 0=0; else=NA")

#H4LM11: Do you currently work 10 hours per week?
w4$empstat<- car::Recode(w4$h4lm11,recodes="1='Employed'; 0='Unemployed'; else=NA")


#Race/Ethnicity
w3$hisp <- car::Recode(w3$h3od2, recodes = "1= 'Hisp_' ; 0= 'NH_'; else=NA")
w3$race <- car::Recode(w3$h3ir4, recodes = "1= 'White'; 2= 'Black'; 3 = 'Asian'; 4='Other'; else=NA")
w3$race_eth<-ifelse(w3$hisp ==1, "hisp", w3$race)
w3$race_eth <- interaction(w3$hisp, w3$race)


  w3$race_ethr<- mutate(w3, ifelse(hisp==0 & race==1, 1,
                    ifelse(hisp==0 & race==2, 2,
                     ifelse(hisp==1, 3,
                    ifelse(hisp==0 & race==3, 4,
                     ifelse(hisp==0 & race==4, 5, "NA"))))))
w3$race_ethr<- ifelse(substr(w3$race_eth, 1, 4)=="hisp", "hisp", as.character(w3$race_eth))

#Education
#What is the highest grade or year of regular school you have completed?

w3$educ <- car::Recode(w3$h3ed1, recodes = "6:12= 'upto HS' ; 13:22= 'higher than HS'; else=NA")


w4$educ <- car:: Recode(w4$h4ed2, recodes = "1:3 =  'upto to HS'; 4:13 = 'higher than HS'; else = NA")

Depression

# Depression

## h3id15: Have you ever been diagnosed with depression
##h4id5h: Has a doctor, nurse or other health care provider ever told you that you have or had: depression?

w3$dep1 <-  car::Recode(w3$h3id15,recodes="1='1'; 0='2'; else='3'")

w4$dep2 <-  car::Recode(w4$h4id5h,recodes="1='1'; 0='2'; else='3'")

Merging all waves

# Combine all maltreatment
allwaves <- left_join(w4, w3, by="aid")
allwaves <- left_join(allwaves, w5, by="aid")
allwaves <- left_join(allwaves,wt, by = "aid")


#Neglect

allwaves$neglect4<-car::Recode(allwaves$neglect4, recodes= "1 = '1';2 ='2'; else = '3'")
allwaves$negjoin <- paste(allwaves$neglect3, allwaves$neglect4, sep = "")

allwaves$neglect<- car::Recode(allwaves$negjoin,"c(11,12,13,21,31)='Yes'; c(22,23,32)='No'; else = NA",as.factor = T)


#Sexual Abuse

allwaves$sexual1<-car::Recode(allwaves$sexual1, recodes= "1 = '1';2 ='2'; else = '3'")
allwaves$sexjoin<- paste(allwaves$sexual3, allwaves$sexual1, sep = "")

allwaves$sexual<- car::Recode(allwaves$sexjoin,"c(11,12,13,21,31)='Yes'; c(22,23,32)='No'; else = NA",as.factor = T)


#Physical Abuse
allwaves$physical2<-car::Recode(allwaves$physical2, recodes= "1 = '1';2 ='2'; else = '3'")
allwaves$phyjoin<- paste(allwaves$physical1, allwaves$physical2, sep = "")

allwaves$physicalc<- car::Recode(allwaves$phyjoin,"c(11,12,13,21,31)='Yes'; c(22,23,32)='No'; else = NA", as.factor = T)


#CM_general
allwaves$cm_agg1 <- car::Recode(allwaves$cm_agg , recodes= "1 = '1';2 ='2'; else = '3'")
Warning: Unknown or uninitialised column: `cm_agg`.
allwaves$cmjoin<- paste(allwaves$cm_agg1, allwaves$cm_agg, sep = "")
Warning: Unknown or uninitialised column: `cm_agg`.
allwaves$cm_gen<- car::Recode(allwaves$cmjoin,"c(11,12,13,21,31)='Yes'; c(22,23,32)='No'; else = NA", as.factor = T)


#Depression
allwaves$dep1<-car::Recode(allwaves$dep1, recodes= "1 = '1';2 ='2'; else = '3'")
allwaves$depjoin<- paste(allwaves$dep1, allwaves$dep2, sep = "")

allwaves$depression<- car::Recode(allwaves$depjoin,"c(11,12,13,21,31)='Yes'; c(22,23,32)='No'; else = NA", as.factor = T)




sub <- allwaves%>%
  select(depression,sexual,physicalc,neglect,educ.x,race_ethr, empstat,age.y,cluster2,gswgt1) 
  #filter(complete.cases(.))

options(survey.lonely.psu = "adjust")
des<-svydesign(ids = ~1,
               strata = ~cluster2,
               weights = ~gswgt1,
               data = sub)

Count data entries

#Neglect
countneglect <- sub%>%
  group_by(neglect)%>%
  dplyr::summarise(Neglect = n())
countneglect
# A tibble: 3 × 2
  neglect Neglect
  <fct>     <int>
1 No         2099
2 Yes        2982
3 <NA>         33
#Sexual abuse
countsexual <- sub%>%
  group_by(sexual)%>%
  dplyr::summarise(Sexual_Abuse = n())
countsexual
# A tibble: 3 × 2
  sexual Sexual_Abuse
  <fct>         <int>
1 No             4729
2 Yes             360
3 <NA>             25
#Physical abuse
countphysical <- sub%>%
  group_by(physicalc)%>%
  dplyr::summarise(Physical_Abuse = n())
countphysical
# A tibble: 3 × 2
  physicalc Physical_Abuse
  <fct>              <int>
1 No                  3603
2 Yes                 1481
3 <NA>                  30
##CM_General
#countcm <- sub%>%
 # group_by(cm_gen)%>%
  #dplyr::summarise(CM_General = n())
#countcm

##depression
countdepression <- sub%>%
  group_by(depression)%>%
  dplyr::summarise(Depression= n())
countdepression
# A tibble: 3 × 2
  depression Depression
  <fct>           <int>
1 No               4110
2 Yes              1003
3 <NA>                1
#label(sub$depression) <- "Depression"
#label(sub$sexual) <- "Sexual Abuse"
#label(sub$physicalc) <-"Physical Abuse"
#label(sub$neglect) <- "Neglect"
#label(data$empstat)<- "Employment Status"
#label(data$race)<- "Race"
#label(data$mars)<- "Marital Status"

Frequency Tables

result <- CreateTableOne(vars = c("neglect","physicalc","sexual"), strata ="depression",data = sub)
print(result)
                     Stratified by depression
                      No           Yes          p      test
  n                   4110         1003                    
  neglect = Yes (%)   2265 (55.5)   717 (71.7)  <0.001     
  physicalc = Yes (%) 1105 (27.0)   376 (37.7)  <0.001     
  sexual = Yes (%)     236 ( 5.8)   124 (12.4)  <0.001     
result_1 <- table1(
  ~sexual+physicalc+neglect+educ.x+empstat+age.y | depression, 
  data = sub, 
  overall = FALSE)

# Display the resulting cross-tabulation
result_1
No
(N=4110)
Yes
(N=1003)
sexual
No 3854 (93.8%) 874 (87.1%)
Yes 236 (5.7%) 124 (12.4%)
Missing 20 (0.5%) 5 (0.5%)
physicalc
No 2981 (72.5%) 621 (61.9%)
Yes 1105 (26.9%) 376 (37.5%)
Missing 24 (0.6%) 6 (0.6%)
neglect
No 1815 (44.2%) 283 (28.2%)
Yes 2265 (55.1%) 717 (71.5%)
Missing 30 (0.7%) 3 (0.3%)
educ.x
higher than HS 3111 (75.7%) 768 (76.6%)
upto to HS 998 (24.3%) 235 (23.4%)
Missing 1 (0.0%) 0 (0%)
empstat
Employed 2741 (66.7%) 619 (61.7%)
Unemployed 638 (15.5%) 276 (27.5%)
Missing 731 (17.8%) 108 (10.8%)
age.y
Mean (SD) 22.2 (1.88) 22.1 (1.85)
Median [Min, Max] 22.0 [18.0, 28.0] 22.0 [19.0, 27.0]
Missing 1300 (31.6%) 254 (25.3%)