library(tidyverse) # for the map() command
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(psych)
##
## Attaching package: 'psych'
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(naniar)
library(expss)
## Loading required package: maditr
##
## To modify variables or add new variables:
## let(mtcars, new_var = 42, new_var2 = new_var*hp) %>% head()
##
##
## Attaching package: 'maditr'
##
## The following objects are masked from 'package:dplyr':
##
## between, coalesce, first, last
##
## The following object is masked from 'package:purrr':
##
## transpose
##
## The following object is masked from 'package:readr':
##
## cols
##
##
## Attaching package: 'expss'
##
## The following object is masked from 'package:naniar':
##
## is_na
##
## The following objects are masked from 'package:stringr':
##
## fixed, regex
##
## The following objects are masked from 'package:dplyr':
##
## compute, contains, na_if, recode, vars, where
##
## The following objects are masked from 'package:purrr':
##
## keep, modify, modify_if, when
##
## The following objects are masked from 'package:tidyr':
##
## contains, nest
##
## The following object is masked from 'package:ggplot2':
##
## vars
Data Checking:
df <- read.csv(file="ARC_processed_data (1).csv", header=T)
# Subsetting:
d <- subset(df, select=c(mental_health___1,
mental_health___10,
mental_health___11,
mental_health___12,
mental_health___13,
phq_1,
phq_2,
phq_3,
phq_4,
phq_5,
phq_6,
phq_7,
phq_8,
phq_9,
rse_1,
rse_2,
rse_3,
rse_4,
rse_5,
rse_6,
rse_7,
rse_8,
rse_9,
pss_1,
pss_2,
pss_3,
pss_4,
gad_1,
gad_2,
gad_3,
gad_4,
gad_5,
gad_6,
gad_7))
names(d)
## [1] "mental_health___1" "mental_health___10" "mental_health___11"
## [4] "mental_health___12" "mental_health___13" "phq_1"
## [7] "phq_2" "phq_3" "phq_4"
## [10] "phq_5" "phq_6" "phq_7"
## [13] "phq_8" "phq_9" "rse_1"
## [16] "rse_2" "rse_3" "rse_4"
## [19] "rse_5" "rse_6" "rse_7"
## [22] "rse_8" "rse_9" "pss_1"
## [25] "pss_2" "pss_3" "pss_4"
## [28] "gad_1" "gad_2" "gad_3"
## [31] "gad_4" "gad_5" "gad_6"
## [34] "gad_7"
head(d)
## mental_health___1 mental_health___10 mental_health___11 mental_health___12
## 1 0 0 0 0
## 2 NA NA NA NA
## 3 NA NA NA NA
## 4 NA NA NA NA
## 5 NA NA NA NA
## 6 NA NA NA NA
## mental_health___13 phq_1 phq_2 phq_3 phq_4 phq_5 phq_6 phq_7 phq_8 phq_9
## 1 0 2 1 1 2 1 1 2 1 1
## 2 NA 1 1 1 2 1 1 1 1 1
## 3 NA 1 1 1 1 1 1 1 1 1
## 4 NA 3 2 2 2 1 1 4 1 1
## 5 NA 2 2 2 2 1 1 2 1 1
## 6 NA 1 1 2 2 1 1 2 1 1
## rse_1 rse_2 rse_3 rse_4 rse_5 rse_6 rse_7 rse_8 rse_9 pss_1 pss_2 pss_3 pss_4
## 1 2 4 4 3 2 4 3 4 2 3 4 3 5
## 2 NA NA NA NA NA NA NA NA NA 2 4 4 2
## 3 NA NA NA NA NA NA NA NA NA 3 4 5 1
## 4 NA NA NA NA NA NA NA NA NA 4 2 2 3
## 5 NA NA NA NA NA NA NA NA NA 3 3 3 2
## 6 NA NA NA NA NA NA NA NA NA 1 4 4 2
## gad_1 gad_2 gad_3 gad_4 gad_5 gad_6 gad_7
## 1 2 2 2 2 2 2 1
## 2 2 2 2 2 1 2 1
## 3 2 2 2 2 1 2 1
## 4 4 4 2 4 4 4 1
## 5 2 2 2 2 2 3 1
## 6 2 2 1 2 1 2 1
str(d)
## 'data.frame': 8867 obs. of 34 variables:
## $ mental_health___1 : int 0 NA NA NA NA NA NA NA NA NA ...
## $ mental_health___10: int 0 NA NA NA NA NA NA NA NA NA ...
## $ mental_health___11: int 0 NA NA NA NA NA NA NA NA NA ...
## $ mental_health___12: int 0 NA NA NA NA NA NA NA NA NA ...
## $ mental_health___13: int 0 NA NA NA NA NA NA NA NA NA ...
## $ phq_1 : int 2 1 1 3 2 1 1 2 2 1 ...
## $ phq_2 : int 1 1 1 2 2 1 1 3 2 1 ...
## $ phq_3 : int 1 1 1 2 2 2 1 3 2 1 ...
## $ phq_4 : int 2 2 1 2 2 2 1 3 2 1 ...
## $ phq_5 : int 1 1 1 1 1 1 1 3 1 1 ...
## $ phq_6 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ phq_7 : int 2 1 1 4 2 2 2 2 1 1 ...
## $ phq_8 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ phq_9 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ rse_1 : int 2 NA NA NA NA NA NA NA NA NA ...
## $ rse_2 : int 4 NA NA NA NA NA NA NA NA NA ...
## $ rse_3 : int 4 NA NA NA NA NA NA NA NA NA ...
## $ rse_4 : int 3 NA NA NA NA NA NA NA NA NA ...
## $ rse_5 : int 2 NA NA NA NA NA NA NA NA NA ...
## $ rse_6 : int 4 NA NA NA NA NA NA NA NA NA ...
## $ rse_7 : int 3 NA NA NA NA NA NA NA NA NA ...
## $ rse_8 : int 4 NA NA NA NA NA NA NA NA NA ...
## $ rse_9 : int 2 NA NA NA NA NA NA NA NA NA ...
## $ pss_1 : int 3 2 3 4 3 1 2 3 3 2 ...
## $ pss_2 : int 4 4 4 2 3 4 4 3 2 5 ...
## $ pss_3 : int 3 4 5 2 3 4 4 3 3 5 ...
## $ pss_4 : int 5 2 1 3 2 2 2 3 3 1 ...
## $ gad_1 : int 2 2 2 4 2 2 2 4 2 2 ...
## $ gad_2 : int 2 2 2 4 2 2 2 3 2 1 ...
## $ gad_3 : int 2 2 2 2 2 1 1 3 2 1 ...
## $ gad_4 : int 2 2 2 4 2 2 2 3 2 2 ...
## $ gad_5 : int 2 1 1 4 2 1 1 2 1 1 ...
## $ gad_6 : int 2 2 2 4 3 2 2 3 2 2 ...
## $ gad_7 : int 1 1 1 1 1 1 1 1 1 1 ...
d %>%
map(table, useNA = "always")
## $mental_health___1
##
## 0 1 <NA>
## 2143 288 6436
##
## $mental_health___10
##
## 0 1 <NA>
## 2385 46 6436
##
## $mental_health___11
##
## 0 1 <NA>
## 2368 63 6436
##
## $mental_health___12
##
## 0 1 <NA>
## 2401 30 6436
##
## $mental_health___13
##
## 0 1 <NA>
## 2415 16 6436
##
## $phq_1
##
## 1 2 3 4 <NA>
## 3606 2441 965 704 1151
##
## $phq_2
##
## 1 2 3 4 <NA>
## 3773 2297 902 735 1160
##
## $phq_3
##
## 1 2 3 4 <NA>
## 3034 2286 1139 1251 1157
##
## $phq_4
##
## 1 2 3 4 <NA>
## 2358 2680 1301 1375 1153
##
## $phq_5
##
## 1 2 3 4 <NA>
## 3949 1864 974 913 1167
##
## $phq_6
##
## 1 2 3 4 <NA>
## 4315 1676 817 884 1175
##
## $phq_7
##
## 1 2 3 4 <NA>
## 4101 1948 850 804 1164
##
## $phq_8
##
## 1 2 3 4 <NA>
## 5994 926 430 345 1172
##
## $phq_9
##
## 1 2 3 4 <NA>
## 6213 733 332 417 1172
##
## $rse_1
##
## 1 2 3 4 <NA>
## 261 475 874 220 7037
##
## $rse_2
##
## 1 2 3 4 <NA>
## 262 428 668 472 7037
##
## $rse_3
##
## 1 2 3 4 <NA>
## 72 272 1073 413 7037
##
## $rse_4
##
## 1 2 3 4 <NA>
## 100 346 1002 377 7042
##
## $rse_5
##
## 1 2 3 4 <NA>
## 449 710 452 220 7036
##
## $rse_6
##
## 1 2 3 4 <NA>
## 286 421 674 446 7040
##
## $rse_7
##
## 1 2 3 4 <NA>
## 127 371 920 407 7042
##
## $rse_8
##
## 1 2 3 4 <NA>
## 224 470 695 441 7037
##
## $rse_9
##
## 1 2 3 4 <NA>
## 638 603 354 233 7039
##
## $pss_1
##
## 1 2 3 4 5 <NA>
## 1608 1805 2115 1505 772 1062
##
## $pss_2
##
## 1 2 3 4 5 <NA>
## 459 1144 2070 2672 1460 1062
##
## $pss_3
##
## 1 2 3 4 5 <NA>
## 407 1255 2717 2617 795 1076
##
## $pss_4
##
## 1 2 3 4 5 <NA>
## 2179 2060 1689 1094 788 1057
##
## $gad_1
##
## 1 2 3 4 <NA>
## 3212 2444 975 1041 1195
##
## $gad_2
##
## 1 2 3 4 <NA>
## 4089 1848 855 870 1205
##
## $gad_3
##
## 1 2 3 4 <NA>
## 3463 2299 941 957 1207
##
## $gad_4
##
## 1 2 3 4 <NA>
## 3506 2345 944 862 1210
##
## $gad_5
##
## 1 2 3 4 <NA>
## 5041 1549 587 486 1204
##
## $gad_6
##
## 1 2 3 4 <NA>
## 2823 2572 1251 1023 1198
##
## $gad_7
##
## 1 2 3 4 <NA>
## 4646 1680 684 645 1212
Recoding Variables:
d$dep <- NA
d$dep[d$mental_health___1 == "1"] <- "yes"
d$dep[d$mental_health___1 == "0"] <- "no"
table(d$dep)
##
## no yes
## 2143 288
d$ed <- "no"
d$ed[d$mental_health___10 == "1"] <- "yes"
d$ed[d$mental_health___11 == "1"] <- "yes"
d$ed[d$mental_health___12 == "1"] <- "yes"
d$ed[d$mental_health___13 == "1"] <- "yes"
d$ed[is.na(d$mental_health___10)] <- NA
d$ed[is.na(d$mental_health___11)] <- NA
d$ed[is.na(d$mental_health___12)] <- NA
d$ed[is.na(d$mental_health___13)] <- NA
table(d$ed)
##
## no yes
## 2300 131
Variable Composite
d$pss <- (d$pss_1 + d$pss_2 + d$pss_3 + d$pss_4)/4
d$phq <- (d$phq_1 + d$phq_2 + d$phq_3 + d$phq_4 + d$phq_5 + d$phq_6 + d$phq_7 + d$phq_8 + d$phq_9)/9
d$rse <- (d$rse_1 + d$rse_2 + d$rse_3 + d$rse_4 + d$rse_5 + d$rse_6 + d$rse_7 + d$rse_8 + d$rse_9)/9
d$gad <- (d$gad_1 + d$gad_2 + d$gad_3 + d$gad_4 + d$gad_5 + d$gad_6 + d$gad_7)/7
Univariate Normality:
describe(d)
## vars n mean sd median trimmed mad min max range skew
## mental_health___1 1 2431 0.12 0.32 0.00 0.02 0.00 0 1 1 2.36
## mental_health___10 2 2431 0.02 0.14 0.00 0.00 0.00 0 1 1 7.06
## mental_health___11 3 2431 0.03 0.16 0.00 0.00 0.00 0 1 1 5.96
## mental_health___12 4 2431 0.01 0.11 0.00 0.00 0.00 0 1 1 8.83
## mental_health___13 5 2431 0.01 0.08 0.00 0.00 0.00 0 1 1 12.20
## phq_1 6 7716 1.84 0.97 2.00 1.69 1.48 1 4 3 0.93
## phq_2 7 7707 1.82 0.98 2.00 1.65 1.48 1 4 3 0.98
## phq_3 8 7710 2.08 1.09 2.00 1.97 1.48 1 4 3 0.60
## phq_4 9 7714 2.22 1.07 2.00 2.15 1.48 1 4 3 0.43
## phq_5 10 7700 1.85 1.04 1.00 1.69 0.00 1 4 3 0.92
## phq_6 11 7692 1.78 1.04 1.00 1.59 0.00 1 4 3 1.08
## phq_7 12 7703 1.79 1.01 1.00 1.61 0.00 1 4 3 1.05
## phq_8 13 7695 1.37 0.78 1.00 1.15 0.00 1 4 3 2.20
## phq_9 14 7695 1.34 0.80 1.00 1.12 0.00 1 4 3 2.37
## rse_1 15 1830 2.58 0.88 3.00 2.59 1.48 1 4 3 -0.33
## rse_2 16 1830 2.74 1.00 3.00 2.80 1.48 1 4 3 -0.32
## rse_3 17 1830 3.00 0.73 3.00 3.05 0.00 1 4 3 -0.61
## rse_4 18 1825 2.91 0.78 3.00 2.95 0.00 1 4 3 -0.53
## rse_5 19 1831 2.24 0.96 2.00 2.18 1.48 1 4 3 0.33
## rse_6 20 1827 2.70 1.01 3.00 2.75 1.48 1 4 3 -0.30
## rse_7 21 1825 2.88 0.83 3.00 2.94 0.00 1 4 3 -0.50
## rse_8 22 1830 2.74 0.96 3.00 2.80 1.48 1 4 3 -0.29
## rse_9 23 1828 2.10 1.02 2.00 2.00 1.48 1 4 3 0.52
## pss_1 24 7805 2.75 1.26 3.00 2.69 1.48 1 5 4 0.16
## pss_2 25 7805 3.45 1.13 4.00 3.51 1.48 1 5 4 -0.41
## pss_3 26 7791 3.27 1.02 3.00 3.28 1.48 1 5 4 -0.29
## pss_4 27 7810 2.52 1.30 2.00 2.40 1.48 1 5 4 0.45
## gad_1 28 7672 1.98 1.04 2.00 1.85 1.48 1 4 3 0.76
## gad_2 29 7662 1.81 1.03 1.00 1.63 0.00 1 4 3 1.02
## gad_3 30 7660 1.92 1.03 2.00 1.78 1.48 1 4 3 0.84
## gad_4 31 7657 1.89 1.01 2.00 1.74 1.48 1 4 3 0.88
## gad_5 32 7663 1.55 0.88 1.00 1.35 0.00 1 4 3 1.57
## gad_6 33 7669 2.06 1.03 2.00 1.95 1.48 1 4 3 0.61
## gad_7 34 7655 1.65 0.95 1.00 1.46 0.00 1 4 3 1.33
## dep* 35 2431 1.12 0.32 1.00 1.02 0.00 1 2 1 2.36
## ed* 36 2431 1.05 0.23 1.00 1.00 0.00 1 2 1 3.95
## pss 37 7754 3.00 0.49 3.00 3.01 0.37 1 5 4 -0.38
## phq 38 7562 1.79 0.77 1.56 1.68 0.66 1 4 3 1.07
## rse 39 1791 2.65 0.31 2.67 2.65 0.33 1 4 3 -0.05
## gad 40 7553 1.83 0.84 1.57 1.71 0.85 1 4 3 0.99
## kurtosis se
## mental_health___1 3.57 0.01
## mental_health___10 47.83 0.00
## mental_health___11 33.58 0.00
## mental_health___12 75.98 0.00
## mental_health___13 146.82 0.00
## phq_1 -0.19 0.01
## phq_2 -0.14 0.01
## phq_3 -0.97 0.01
## phq_4 -1.05 0.01
## phq_5 -0.47 0.01
## phq_6 -0.19 0.01
## phq_7 -0.14 0.01
## phq_8 3.88 0.01
## phq_9 4.51 0.01
## rse_1 -0.61 0.02
## rse_2 -0.95 0.02
## rse_3 0.54 0.02
## rse_4 0.12 0.02
## rse_5 -0.83 0.02
## rse_6 -0.98 0.02
## rse_7 -0.19 0.02
## rse_8 -0.86 0.02
## rse_9 -0.89 0.02
## pss_1 -0.99 0.01
## pss_2 -0.58 0.01
## pss_3 -0.38 0.01
## pss_4 -0.90 0.01
## gad_1 -0.65 0.01
## gad_2 -0.26 0.01
## gad_3 -0.52 0.01
## gad_4 -0.40 0.01
## gad_5 1.44 0.01
## gad_6 -0.80 0.01
## gad_7 0.58 0.01
## dep* 3.57 0.01
## ed* 13.60 0.00
## pss 2.60 0.01
## phq 0.29 0.01
## rse 1.09 0.01
## gad -0.05 0.01
Histograms:
hist(d$phq)

hist(d$pss)

hist(d$gad)

hist(d$rse)

Missing Data
gg_miss_upset(d,nsets = "6")

d2 <- na.omit(d)
Crosstabs and Scatterplots
cross_cases(d2, d$dep, d$ed)
|
|
 d$edÂ
|
|
|
 noÂ
|
 yesÂ
|
|
 d$depÂ
|
|
   noÂ
|
2092
|
51
|
|
   yesÂ
|
208
|
80
|
|
   #Total casesÂ
|
2300
|
131
|
Scatterplots
plot(d2$phq, d2$gad, main="Scatterplot of PHQ and GAD", xlab = "PHQ", ylab = "GAD")

plot(d2$rse, d2$pss, main="Scatterplot of RSE and PSS", xlab = "RSE", ylab = "PSS")

Boxplots
boxplot(data=d2,phq~dep, main = "PHQ and Depression Estimate", xlab = "PHQ", ylab = "Depression")

boxplot(data=d2,gad~ed, main = "GAD and Eating Disorder Estimate", xlab = "GAD", ylab = "Eating Disorder")

Reflection Questions
"My continuous varibles appear to be kurotic. This may be because there were other options to fill in for mental health diagnosis that likely left this particular section empty for many of the participants.
There is missing data from both the way that survey is designed and from lack of responses. "
## [1] "My continuous varibles appear to be kurotic. This may be because there were other options to fill in for mental health diagnosis that likely left this particular section empty for many of the participants. \nThere is missing data from both the way that survey is designed and from lack of responses. "