library(rmarkdown); library(knitr)
library(dplyr)
library(psych); library(corrplot); library(GPArotation)
library(lavaan); #library(semPlot); library(moments)
HEALTHDATA <- read.csv("https://www.dropbox.com/s/sb8s1i4h1z7g7fh/?dl=1")
attach(HEALTHDATA)
set.seed(1337)
Use cbind() along with cov() to compute the (missing value) observed sample covariance matrix, S, for the interaction between ISOLATED, TROUBLED, SELFHARM, THOUGHTS, TRAUMA, and SLEEP
S <<- cov(cbind(ISOLATED, TROUBLED, SELFHARM, THOUGHTS, TRAUMA, SLEEP))
S
## ISOLATED TROUBLED SELFHARM THOUGHTS TRAUMA SLEEP
## ISOLATED 0.09203732 0.03292405 0.01803685 0.02675912 0.1619459 -0.01645363
## TROUBLED 0.03292405 0.08603026 0.01548007 0.01965926 0.1949260 -0.02363915
## SELFHARM 0.01803685 0.01548007 0.18335916 0.08414752 0.1707942 -0.01764608
## THOUGHTS 0.02675912 0.01965926 0.08414752 0.22723028 0.3108219 -0.03020436
## TRAUMA 0.16194593 0.19492597 0.17079416 0.31082189 7.3207109 -0.55705698
## SLEEP -0.01645363 -0.02363915 -0.01764608 -0.03020436 -0.5570570 0.24858473
Use function() to create a function that takes (as arguments) a single vector of all 18 path parameters and returns (as a scalar) the corresponding value of the difference metric, Q, under the maximum likelihood approach (hint: do NOT include the definition of S from above inside this function)
SEM1 = function(PAR){
PHI = matrix(0, nrow = 8, ncol = 8)
dimnames(PHI) = list(
c("Trauma", "Sleep", "E1", "E2", "E3", "E4", "D1", "D2"),
c("Trauma", "Sleep", "E1", "E2", "E3", "E4", "D1", "D2")
)
PHI[1,1] = PAR[1]
PHI[2,2] = PAR[2]
PHI[3,3] = PAR[3]
PHI[4,4] = PAR[4]
PHI[5,5] = PAR[5]
PHI[6,6] = PAR[6]
PHI[7,7] = 1
PHI[8,8] = 1
BETA = matrix(0, nrow = 6, ncol = 6)
dimnames(BETA) = list(
c("Isolated", "Troubled", "Self Harm", "Thoughts", "Psych Distress", "Suicidality"),
c("Isolated", "Troubled", "Self Harm", "Thoughts", "Psych Distress", "Suicidality")
)
BETA[1,5] = 1
BETA[2,5] = PAR[7]
BETA[3,6] = 1
BETA[4,6] = PAR[8]
GAMMA = matrix(0, nrow = 6, ncol = 8)
dimnames(GAMMA) = list(
c("Isolated", "Troubled", "Self Harm", "Thoughts", "Psych Distress", "Suicidality"),
c("Trauma", "Sleep", "E1", "E2", "E3", "E4", "D1", "D2")
)
GAMMA[1,3] = PAR[9]
GAMMA[2,4] = PAR[10]
GAMMA[3,5] = PAR[11]
GAMMA[4,6] = PAR[12]
GAMMA[5,1] = PAR[13]
GAMMA[5,2] = PAR[14]
GAMMA[5,7] = PAR[15]
GAMMA[6,1] = PAR[16]
GAMMA[6,2] = PAR[17]
GAMMA[6,8] = PAR[18]
G_yrep = rep(1, 4)
G_y = matrix(0, nrow = 4, ncol = 6)
dimnames(G_y) = list(
c("Isolated", "Troubled", "Self Harm", "Thoughts"),
c("Isolated", "Troubled", "Self Harm", "Thoughts", "Psych Distress", "Suicidality")
)
G_y[1,1] = G_yrep[1]
G_y[2,2] = G_yrep[2]
G_y[3,3] = G_yrep[3]
G_y[4,4] = G_yrep[4]
G_xrep = rep(1, 2)
G_x = matrix(0, nrow = 2, ncol = 8)
dimnames(G_x) = list(
c("Trauma", "Sleep"),
c("Trauma", "Sleep", "E1", "E2", "E3", "E4", "D1", "D2")
)
G_x[1,1] = G_xrep[1]
G_x[2,2] = G_xrep[2]
I = diag(1, nrow = 6, ncol = 6)
SH_yy = G_y %*% solve(I - BETA) %*% GAMMA %*% PHI %*% t(GAMMA) %*% t(solve(I - BETA)) %*% t(G_y)
SH_yx = G_y %*% solve(I - BETA) %*% GAMMA %*% PHI %*% t(G_x)
SH_xy = t(SH_yx)
SH_xx = G_x %*% PHI %*% t(G_x)
#print("GOT THIS FAR")
TOP = cbind(SH_yy, SH_yx)
BOTTOM = cbind(SH_xy, SH_xx)
SH = rbind(TOP, BOTTOM)
if (kappa(SH) > 1e12) {
return(20)
}
DIFF = log(det(SH)) - log(det(S)) + sum(diag(S %*% solve(SH))) - 6
return(DIFF)
}
Use optim() with the Conjugate Cradient (CG) method and a maximum of 10,000 iterations to find the optimum parameterization for the structural equation model above
optim(par = rep(0.5, 18),
fn = SEM1,
method = "CG",
control = list(maxit = 1e4))
## Warning in log(det(SH)): NaNs produced
## Warning in log(det(SH)): NaNs produced
## Warning in log(det(SH)): NaNs produced
## $par
## [1] 7.31991611 0.24858741 0.40266055 0.38013352 0.47811021 0.44990725
## [7] 1.23517434 1.81004220 0.40295921 0.34544827 0.53505168 0.40803777
## [13] 0.01944831 -0.03003108 0.15169694 0.02204822 -0.01827508 0.20591676
##
## $value
## [1] 0.2155537
##
## $counts
## function gradient
## 5464 2157
##
## $convergence
## [1] 0
##
## $message
## NULL
Explain what the regression coefficients above mean in terms of whether TRAUMA or SLEEP has a stronger effect on SUICIDALITY
TRAUMA has a stronger effect on SUICIDALITY because the regreesion coefficient's absolute value is greater than that of SLEEP.
Explain what the regression coefficients above mean in terms of whether TRAUMA has a positive or negative effect on PSYCHOLOGICAL DISTRESS
TRAUMA has a weak positive effect on PSYCHOLOGICAL DISTRESS.
Explain conceptually why SLEEP has a negative effect on PSYCHOLOGICAL DISTRESS
Getting less sleep is unhealthy and is proven to increase stress levels.
Use replicate() and slice_sample() to take a random sample, with replacement, of the sampled college students and find the optimum parameterization for the structural equation model a total of two times
BOOT = replicate(n = 2,
{
TEMP = slice_sample(HEALTHDATA, n = nrow(HEALTHDATA),
replace = TRUE)
S <<- cov(cbind(TEMP$ISOLATED, TEMP$TROUBLED, TEMP$SELFHARM, TEMP$THOUGHTS, TEMP$TRAUMA, TEMP$SLEEP))
FIT = optim(par = rep(0.5, 18),
fn = SEM1,
method = "CG",
control = list(maxit = 1e3))
FIT$par
}
)
BOOT
## [,1] [,2]
## [1,] 7.40489638 7.50501439
## [2,] 0.24894780 0.24881806
## [3,] 0.42028932 0.40637416
## [4,] 0.37432188 0.37006495
## [5,] 0.49736179 0.49102859
## [6,] 0.44710868 0.41995949
## [7,] 1.37141296 1.08972965
## [8,] 1.80357987 2.00380855
## [9,] 0.40073687 0.38175694
## [10,] 0.36759836 0.38307685
## [11,] 0.52992016 0.53097657
## [12,] 0.39876682 0.33945095
## [13,] 0.01633290 0.01684466
## [14,] -0.02750766 -0.03464115
## [15,] 0.13068919 0.15179351
## [16,] 0.01974731 0.01473724
## [17,] -0.01112961 -0.04446682
## [18,] 0.21378359 0.20718283
Use replicate() and slice_sample() to take a random sample, with replacement, of the sampled college students and find the optimum parameterization for the structural equation model a total of twenty times
set.seed(69)
BOOT = replicate(n = 20,
{
TEMP = slice_sample(HEALTHDATA, n = nrow(HEALTHDATA),
replace = TRUE)
S <<- cov(cbind(TEMP$ISOLATED, TEMP$TROUBLED, TEMP$SELFHARM, TEMP$THOUGHTS, TEMP$TRAUMA, TEMP$SLEEP))
FIT = optim(par = rep(0.5, 18),
fn = SEM1,
method = "CG",
control = list(maxit = 1e3))
FIT$par
}
)
Use quantile() to find a 95% confidence interval for the effect of SLEEP on PSYCHOLOGICAL DISTRESS
quantile(BOOT[14,-4], probs = c(0.025, 0.975))
## 2.5% 97.5%
## -0.04625486 -0.01200469
Explain if SLEEP has a significant effect on PSYCHOLOGICAL DISTRESS
SLEEP has a significant effect on PSYCHOLOGICAL DISTRESS because its confidence interval does not include 0.