library(tidyverse) # for the map() command
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(psych)
## 
## Attaching package: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(naniar)
library(expss)
## Loading required package: maditr
## 
## To modify variables or add new variables:
##              let(mtcars, new_var = 42, new_var2 = new_var*hp) %>% head()
## 
## 
## Attaching package: 'maditr'
## 
## The following objects are masked from 'package:dplyr':
## 
##     between, coalesce, first, last
## 
## The following object is masked from 'package:purrr':
## 
##     transpose
## 
## The following object is masked from 'package:readr':
## 
##     cols
## 
## 
## Attaching package: 'expss'
## 
## The following object is masked from 'package:naniar':
## 
##     is_na
## 
## The following objects are masked from 'package:stringr':
## 
##     fixed, regex
## 
## The following objects are masked from 'package:dplyr':
## 
##     compute, contains, na_if, recode, vars, where
## 
## The following objects are masked from 'package:purrr':
## 
##     keep, modify, modify_if, when
## 
## The following objects are masked from 'package:tidyr':
## 
##     contains, nest
## 
## The following object is masked from 'package:ggplot2':
## 
##     vars

Data Checking:

df <- read.csv(file="ARC_processed_data (1).csv", header=T)

# Subsetting:
d <- subset(df, select=c(mental_health___1,
                         mental_health___10,
                         mental_health___11,
                         mental_health___12,
                         mental_health___13,
                         phq_1,
                         phq_2,
                         phq_3,
                         phq_4,
                         phq_5,
                         phq_6,
                         phq_7,
                         phq_8,
                         phq_9,
                         rse_1,
                         rse_2,
                         rse_3,
                         rse_4,
                         rse_5,
                         rse_6,
                         rse_7,
                         rse_8,
                         rse_9,
                         pss_1,
                         pss_2,
                         pss_3,
                         pss_4,
                         gad_1,
                         gad_2,
                         gad_3,
                         gad_4,
                         gad_5,
                         gad_6,
                         gad_7))

names(d)
##  [1] "mental_health___1"  "mental_health___10" "mental_health___11"
##  [4] "mental_health___12" "mental_health___13" "phq_1"             
##  [7] "phq_2"              "phq_3"              "phq_4"             
## [10] "phq_5"              "phq_6"              "phq_7"             
## [13] "phq_8"              "phq_9"              "rse_1"             
## [16] "rse_2"              "rse_3"              "rse_4"             
## [19] "rse_5"              "rse_6"              "rse_7"             
## [22] "rse_8"              "rse_9"              "pss_1"             
## [25] "pss_2"              "pss_3"              "pss_4"             
## [28] "gad_1"              "gad_2"              "gad_3"             
## [31] "gad_4"              "gad_5"              "gad_6"             
## [34] "gad_7"
head(d)
##   mental_health___1 mental_health___10 mental_health___11 mental_health___12
## 1                 0                  0                  0                  0
## 2                NA                 NA                 NA                 NA
## 3                NA                 NA                 NA                 NA
## 4                NA                 NA                 NA                 NA
## 5                NA                 NA                 NA                 NA
## 6                NA                 NA                 NA                 NA
##   mental_health___13 phq_1 phq_2 phq_3 phq_4 phq_5 phq_6 phq_7 phq_8 phq_9
## 1                  0     2     1     1     2     1     1     2     1     1
## 2                 NA     1     1     1     2     1     1     1     1     1
## 3                 NA     1     1     1     1     1     1     1     1     1
## 4                 NA     3     2     2     2     1     1     4     1     1
## 5                 NA     2     2     2     2     1     1     2     1     1
## 6                 NA     1     1     2     2     1     1     2     1     1
##   rse_1 rse_2 rse_3 rse_4 rse_5 rse_6 rse_7 rse_8 rse_9 pss_1 pss_2 pss_3 pss_4
## 1     2     4     4     3     2     4     3     4     2     3     4     3     5
## 2    NA    NA    NA    NA    NA    NA    NA    NA    NA     2     4     4     2
## 3    NA    NA    NA    NA    NA    NA    NA    NA    NA     3     4     5     1
## 4    NA    NA    NA    NA    NA    NA    NA    NA    NA     4     2     2     3
## 5    NA    NA    NA    NA    NA    NA    NA    NA    NA     3     3     3     2
## 6    NA    NA    NA    NA    NA    NA    NA    NA    NA     1     4     4     2
##   gad_1 gad_2 gad_3 gad_4 gad_5 gad_6 gad_7
## 1     2     2     2     2     2     2     1
## 2     2     2     2     2     1     2     1
## 3     2     2     2     2     1     2     1
## 4     4     4     2     4     4     4     1
## 5     2     2     2     2     2     3     1
## 6     2     2     1     2     1     2     1
str(d)
## 'data.frame':    8867 obs. of  34 variables:
##  $ mental_health___1 : int  0 NA NA NA NA NA NA NA NA NA ...
##  $ mental_health___10: int  0 NA NA NA NA NA NA NA NA NA ...
##  $ mental_health___11: int  0 NA NA NA NA NA NA NA NA NA ...
##  $ mental_health___12: int  0 NA NA NA NA NA NA NA NA NA ...
##  $ mental_health___13: int  0 NA NA NA NA NA NA NA NA NA ...
##  $ phq_1             : int  2 1 1 3 2 1 1 2 2 1 ...
##  $ phq_2             : int  1 1 1 2 2 1 1 3 2 1 ...
##  $ phq_3             : int  1 1 1 2 2 2 1 3 2 1 ...
##  $ phq_4             : int  2 2 1 2 2 2 1 3 2 1 ...
##  $ phq_5             : int  1 1 1 1 1 1 1 3 1 1 ...
##  $ phq_6             : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ phq_7             : int  2 1 1 4 2 2 2 2 1 1 ...
##  $ phq_8             : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ phq_9             : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ rse_1             : int  2 NA NA NA NA NA NA NA NA NA ...
##  $ rse_2             : int  4 NA NA NA NA NA NA NA NA NA ...
##  $ rse_3             : int  4 NA NA NA NA NA NA NA NA NA ...
##  $ rse_4             : int  3 NA NA NA NA NA NA NA NA NA ...
##  $ rse_5             : int  2 NA NA NA NA NA NA NA NA NA ...
##  $ rse_6             : int  4 NA NA NA NA NA NA NA NA NA ...
##  $ rse_7             : int  3 NA NA NA NA NA NA NA NA NA ...
##  $ rse_8             : int  4 NA NA NA NA NA NA NA NA NA ...
##  $ rse_9             : int  2 NA NA NA NA NA NA NA NA NA ...
##  $ pss_1             : int  3 2 3 4 3 1 2 3 3 2 ...
##  $ pss_2             : int  4 4 4 2 3 4 4 3 2 5 ...
##  $ pss_3             : int  3 4 5 2 3 4 4 3 3 5 ...
##  $ pss_4             : int  5 2 1 3 2 2 2 3 3 1 ...
##  $ gad_1             : int  2 2 2 4 2 2 2 4 2 2 ...
##  $ gad_2             : int  2 2 2 4 2 2 2 3 2 1 ...
##  $ gad_3             : int  2 2 2 2 2 1 1 3 2 1 ...
##  $ gad_4             : int  2 2 2 4 2 2 2 3 2 2 ...
##  $ gad_5             : int  2 1 1 4 2 1 1 2 1 1 ...
##  $ gad_6             : int  2 2 2 4 3 2 2 3 2 2 ...
##  $ gad_7             : int  1 1 1 1 1 1 1 1 1 1 ...
d %>%
  map(table, useNA = "always")
## $mental_health___1
## 
##    0    1 <NA> 
## 2143  288 6436 
## 
## $mental_health___10
## 
##    0    1 <NA> 
## 2385   46 6436 
## 
## $mental_health___11
## 
##    0    1 <NA> 
## 2368   63 6436 
## 
## $mental_health___12
## 
##    0    1 <NA> 
## 2401   30 6436 
## 
## $mental_health___13
## 
##    0    1 <NA> 
## 2415   16 6436 
## 
## $phq_1
## 
##    1    2    3    4 <NA> 
## 3606 2441  965  704 1151 
## 
## $phq_2
## 
##    1    2    3    4 <NA> 
## 3773 2297  902  735 1160 
## 
## $phq_3
## 
##    1    2    3    4 <NA> 
## 3034 2286 1139 1251 1157 
## 
## $phq_4
## 
##    1    2    3    4 <NA> 
## 2358 2680 1301 1375 1153 
## 
## $phq_5
## 
##    1    2    3    4 <NA> 
## 3949 1864  974  913 1167 
## 
## $phq_6
## 
##    1    2    3    4 <NA> 
## 4315 1676  817  884 1175 
## 
## $phq_7
## 
##    1    2    3    4 <NA> 
## 4101 1948  850  804 1164 
## 
## $phq_8
## 
##    1    2    3    4 <NA> 
## 5994  926  430  345 1172 
## 
## $phq_9
## 
##    1    2    3    4 <NA> 
## 6213  733  332  417 1172 
## 
## $rse_1
## 
##    1    2    3    4 <NA> 
##  261  475  874  220 7037 
## 
## $rse_2
## 
##    1    2    3    4 <NA> 
##  262  428  668  472 7037 
## 
## $rse_3
## 
##    1    2    3    4 <NA> 
##   72  272 1073  413 7037 
## 
## $rse_4
## 
##    1    2    3    4 <NA> 
##  100  346 1002  377 7042 
## 
## $rse_5
## 
##    1    2    3    4 <NA> 
##  449  710  452  220 7036 
## 
## $rse_6
## 
##    1    2    3    4 <NA> 
##  286  421  674  446 7040 
## 
## $rse_7
## 
##    1    2    3    4 <NA> 
##  127  371  920  407 7042 
## 
## $rse_8
## 
##    1    2    3    4 <NA> 
##  224  470  695  441 7037 
## 
## $rse_9
## 
##    1    2    3    4 <NA> 
##  638  603  354  233 7039 
## 
## $pss_1
## 
##    1    2    3    4    5 <NA> 
## 1608 1805 2115 1505  772 1062 
## 
## $pss_2
## 
##    1    2    3    4    5 <NA> 
##  459 1144 2070 2672 1460 1062 
## 
## $pss_3
## 
##    1    2    3    4    5 <NA> 
##  407 1255 2717 2617  795 1076 
## 
## $pss_4
## 
##    1    2    3    4    5 <NA> 
## 2179 2060 1689 1094  788 1057 
## 
## $gad_1
## 
##    1    2    3    4 <NA> 
## 3212 2444  975 1041 1195 
## 
## $gad_2
## 
##    1    2    3    4 <NA> 
## 4089 1848  855  870 1205 
## 
## $gad_3
## 
##    1    2    3    4 <NA> 
## 3463 2299  941  957 1207 
## 
## $gad_4
## 
##    1    2    3    4 <NA> 
## 3506 2345  944  862 1210 
## 
## $gad_5
## 
##    1    2    3    4 <NA> 
## 5041 1549  587  486 1204 
## 
## $gad_6
## 
##    1    2    3    4 <NA> 
## 2823 2572 1251 1023 1198 
## 
## $gad_7
## 
##    1    2    3    4 <NA> 
## 4646 1680  684  645 1212

Recoding Variables:

d$dep <- NA
d$dep[d$mental_health___1 == "1"] <- "yes"
d$dep[d$mental_health___1 == "0"] <- "no"
table(d$dep)
## 
##   no  yes 
## 2143  288
d$ed <- "no"
d$ed[d$mental_health___10 == "1"] <- "yes"
d$ed[d$mental_health___11 == "1"] <- "yes"
d$ed[d$mental_health___12 == "1"] <- "yes"
d$ed[d$mental_health___13 == "1"] <- "yes"
d$ed[is.na(d$mental_health___10)] <- NA
d$ed[is.na(d$mental_health___11)] <- NA
d$ed[is.na(d$mental_health___12)] <- NA
d$ed[is.na(d$mental_health___13)] <- NA
table(d$ed)
## 
##   no  yes 
## 2300  131

Variable Composite

d$pss <- (d$pss_1 + d$pss_2 + d$pss_3 + d$pss_4)/4
d$phq <- (d$phq_1 + d$phq_2 + d$phq_3 + d$phq_4 + d$phq_5 + d$phq_6 + d$phq_7 + d$phq_8 + d$phq_9)/9
d$rse <- (d$rse_1 + d$rse_2 + d$rse_3 + d$rse_4 + d$rse_5 + d$rse_6 + d$rse_7 + d$rse_8 + d$rse_9)/9
d$gad <- (d$gad_1 + d$gad_2 + d$gad_3 + d$gad_4 + d$gad_5 + d$gad_6 + d$gad_7)/7

Univariate Normality:

describe(d)
##                    vars    n mean   sd median trimmed  mad min max range  skew
## mental_health___1     1 2431 0.12 0.32   0.00    0.02 0.00   0   1     1  2.36
## mental_health___10    2 2431 0.02 0.14   0.00    0.00 0.00   0   1     1  7.06
## mental_health___11    3 2431 0.03 0.16   0.00    0.00 0.00   0   1     1  5.96
## mental_health___12    4 2431 0.01 0.11   0.00    0.00 0.00   0   1     1  8.83
## mental_health___13    5 2431 0.01 0.08   0.00    0.00 0.00   0   1     1 12.20
## phq_1                 6 7716 1.84 0.97   2.00    1.69 1.48   1   4     3  0.93
## phq_2                 7 7707 1.82 0.98   2.00    1.65 1.48   1   4     3  0.98
## phq_3                 8 7710 2.08 1.09   2.00    1.97 1.48   1   4     3  0.60
## phq_4                 9 7714 2.22 1.07   2.00    2.15 1.48   1   4     3  0.43
## phq_5                10 7700 1.85 1.04   1.00    1.69 0.00   1   4     3  0.92
## phq_6                11 7692 1.78 1.04   1.00    1.59 0.00   1   4     3  1.08
## phq_7                12 7703 1.79 1.01   1.00    1.61 0.00   1   4     3  1.05
## phq_8                13 7695 1.37 0.78   1.00    1.15 0.00   1   4     3  2.20
## phq_9                14 7695 1.34 0.80   1.00    1.12 0.00   1   4     3  2.37
## rse_1                15 1830 2.58 0.88   3.00    2.59 1.48   1   4     3 -0.33
## rse_2                16 1830 2.74 1.00   3.00    2.80 1.48   1   4     3 -0.32
## rse_3                17 1830 3.00 0.73   3.00    3.05 0.00   1   4     3 -0.61
## rse_4                18 1825 2.91 0.78   3.00    2.95 0.00   1   4     3 -0.53
## rse_5                19 1831 2.24 0.96   2.00    2.18 1.48   1   4     3  0.33
## rse_6                20 1827 2.70 1.01   3.00    2.75 1.48   1   4     3 -0.30
## rse_7                21 1825 2.88 0.83   3.00    2.94 0.00   1   4     3 -0.50
## rse_8                22 1830 2.74 0.96   3.00    2.80 1.48   1   4     3 -0.29
## rse_9                23 1828 2.10 1.02   2.00    2.00 1.48   1   4     3  0.52
## pss_1                24 7805 2.75 1.26   3.00    2.69 1.48   1   5     4  0.16
## pss_2                25 7805 3.45 1.13   4.00    3.51 1.48   1   5     4 -0.41
## pss_3                26 7791 3.27 1.02   3.00    3.28 1.48   1   5     4 -0.29
## pss_4                27 7810 2.52 1.30   2.00    2.40 1.48   1   5     4  0.45
## gad_1                28 7672 1.98 1.04   2.00    1.85 1.48   1   4     3  0.76
## gad_2                29 7662 1.81 1.03   1.00    1.63 0.00   1   4     3  1.02
## gad_3                30 7660 1.92 1.03   2.00    1.78 1.48   1   4     3  0.84
## gad_4                31 7657 1.89 1.01   2.00    1.74 1.48   1   4     3  0.88
## gad_5                32 7663 1.55 0.88   1.00    1.35 0.00   1   4     3  1.57
## gad_6                33 7669 2.06 1.03   2.00    1.95 1.48   1   4     3  0.61
## gad_7                34 7655 1.65 0.95   1.00    1.46 0.00   1   4     3  1.33
## dep*                 35 2431 1.12 0.32   1.00    1.02 0.00   1   2     1  2.36
## ed*                  36 2431 1.05 0.23   1.00    1.00 0.00   1   2     1  3.95
## pss                  37 7754 3.00 0.49   3.00    3.01 0.37   1   5     4 -0.38
## phq                  38 7562 1.79 0.77   1.56    1.68 0.66   1   4     3  1.07
## rse                  39 1791 2.65 0.31   2.67    2.65 0.33   1   4     3 -0.05
## gad                  40 7553 1.83 0.84   1.57    1.71 0.85   1   4     3  0.99
##                    kurtosis   se
## mental_health___1      3.57 0.01
## mental_health___10    47.83 0.00
## mental_health___11    33.58 0.00
## mental_health___12    75.98 0.00
## mental_health___13   146.82 0.00
## phq_1                 -0.19 0.01
## phq_2                 -0.14 0.01
## phq_3                 -0.97 0.01
## phq_4                 -1.05 0.01
## phq_5                 -0.47 0.01
## phq_6                 -0.19 0.01
## phq_7                 -0.14 0.01
## phq_8                  3.88 0.01
## phq_9                  4.51 0.01
## rse_1                 -0.61 0.02
## rse_2                 -0.95 0.02
## rse_3                  0.54 0.02
## rse_4                  0.12 0.02
## rse_5                 -0.83 0.02
## rse_6                 -0.98 0.02
## rse_7                 -0.19 0.02
## rse_8                 -0.86 0.02
## rse_9                 -0.89 0.02
## pss_1                 -0.99 0.01
## pss_2                 -0.58 0.01
## pss_3                 -0.38 0.01
## pss_4                 -0.90 0.01
## gad_1                 -0.65 0.01
## gad_2                 -0.26 0.01
## gad_3                 -0.52 0.01
## gad_4                 -0.40 0.01
## gad_5                  1.44 0.01
## gad_6                 -0.80 0.01
## gad_7                  0.58 0.01
## dep*                   3.57 0.01
## ed*                   13.60 0.00
## pss                    2.60 0.01
## phq                    0.29 0.01
## rse                    1.09 0.01
## gad                   -0.05 0.01

Histograms:

hist(d$phq)

hist(d$pss)

hist(d$gad)

hist(d$rse)

Missing Data

gg_miss_upset(d,nsets = "6")

d2 <- na.omit(d)

Crosstabs and Scatterplots

cross_cases(d2, d$dep, d$ed)
 d$ed 
 no   yes 
 d$dep 
   no  2092 51
   yes  208 80
   #Total cases  2300 131

Scatterplots

plot(d2$phq, d2$gad, main="Scatterplot of PHQ and GAD", xlab = "PHQ", ylab = "GAD")

plot(d2$rse, d2$pss, main="Scatterplot of RSE and PSS", xlab = "RSE", ylab = "PSS")

Boxplots

boxplot(data=d2,phq~dep, main = "PHQ and Depression Estimate", xlab = "PHQ", ylab = "Depression")

boxplot(data=d2,gad~ed, main = "GAD and Eating Disorder Estimate", xlab = "GAD", ylab = "Eating Disorder")

Reflection Questions

"My continuous varibles appear to be kurotic. This may be because there were other options to fill in for mental health diagnosis that likely left this particular section empty for many of the participants. 
There is missing data from both the way that survey is designed and from lack of responses. "
## [1] "My continuous varibles appear to be kurotic. This may be because there were other options to fill in for mental health diagnosis that likely left this particular section empty for many of the participants. \nThere is missing data from both the way that survey is designed and from lack of responses. "