library(rmarkdown); library(knitr)
library(dplyr)
library(psych); library(corrplot); library(GPArotation)
library(lavaan);# library(semPlot); library(moments)
HEALTHDATA <- read.csv("https://www.dropbox.com/s/sb8s1i4h1z7g7fh/?dl=1")
attach(HEALTHDATA)
S = cov(cbind(ISOLATED, TROUBLED, SELFHARM, THOUGHTS, TRAUMA, SLEEP), use = "pairwise.complete.obs")
set.seed(37)
Explain what the path diagram and corresponding parameter matrices calculated earlier mean in terms of the total number of parameters to be estimated for the hypothesized structural equation model
22, the sum of all the parameter vectors.
Use the number of measured variables to compute the number of unique data points provided by the original data
(points <- 6*(6+1)/2)
## [1] 21
Explain what the number of parameters to be estimated and the number of unique data points above mean in terms of whether the hypothesized model is over- or under-parameterized
Over-parameterized, more parameters than data points.
Explain what the information above means in terms of the minimum number of path parameters that should be fixed in order to properly parameterized the structural equation model
2, so that there are more data-points than parameters.
First, fix the variance of both disturbance terms, the path parameter from PSYDISTRESS to ISOLATED, and the path parameter from SUICIDALITY to SELFHARM Second, use function() to create a function that takes (as arguments) a single vector of all 18 path parameters and returns (as a scalar) the corresponding chi-square test statistic
SEM1 <- function(PAR){
S = cov(cbind(ISOLATED, TROUBLED, SELFHARM, THOUGHTS, TRAUMA, SLEEP), use = "pairwise.complete.obs")
PHI <- matrix(0, nrow = 8, ncol = 8)
dimnames(PHI) <- list(
c("Trauma", "Sleep", "E1", "E2", "E3", "E4", "D1", "D2"),
c("Trauma", "Sleep", "E1", "E2", "E3", "E4", "D1", "D2")
)
PHI[1,1] <- PAR[1]
PHI[2,2] <- PAR[2]
PHI[3,3] <- PAR[3]
PHI[4,4] <- PAR[4]
PHI[5,5] <- PAR[5]
PHI[6,6] <- PAR[6]
PHI[7,7] <- 1
PHI[8,8] <- 1
BETA <- matrix(0, nrow = 6, ncol = 6)
dimnames(BETA) <- list(
c("Isolated", "Troubled", "Self Harm", "Thoughts", "Psych Distress", "Suicidality"),
c("Isolated", "Troubled", "Self Harm", "Thoughts", "Psych Distress", "Suicidality")
)
BETA[1,5] <- 1
BETA[2,5] <- PAR[7]
BETA[3,6] <- 1
BETA[4,6] <- PAR[8]
GAMMA <- matrix(0, nrow = 6, ncol = 8)
dimnames(GAMMA) <- list(
c("Isolated", "Troubled", "Self Harm", "Thoughts", "Psych Distress", "Suicidality"),
c("Trauma", "Sleep", "E1", "E2", "E3", "E4", "D1", "D2")
)
GAMMA[1,3] <- PAR[9]
GAMMA[2,4] <- PAR[10]
GAMMA[3,5] <- PAR[11]
GAMMA[4,6] <- PAR[12]
GAMMA[5,1] <- PAR[13]
GAMMA[5,2] <- PAR[14]
GAMMA[5,7] <- PAR[15]
GAMMA[6,1] <- PAR[16]
GAMMA[6,2] <- PAR[17]
GAMMA[6,8] <- PAR[18]
G_yrep <- rep(1, 4)
G_y <- matrix(0, nrow = 4, ncol = 6)
dimnames(G_y) <- list(
c("Isolated", "Troubled", "Self Harm", "Thoughts"),
c("Isolated", "Troubled", "Self Harm", "Thoughts", "Psych Distress", "Suicidality")
)
G_y[1,1] <- G_yrep[1]
G_y[2,2] <- G_yrep[2]
G_y[3,3] <- G_yrep[3]
G_y[4,4] <- G_yrep[4]
G_xrep <- rep(1, 2)
G_x <- matrix(0, nrow = 2, ncol = 8)
dimnames(G_x) <- list(
c("Trauma", "Sleep"),
c("Trauma", "Sleep", "E1", "E2", "E3", "E4", "D1", "D2")
)
G_x[1,1] <- G_xrep[1]
G_x[2,2] <- G_xrep[2]
I <- diag(1, nrow = 6, ncol = 6)
SH_yy <- G_y %*% solve(I - BETA) %*% GAMMA %*% PHI %*% t(GAMMA) %*% t(solve(I - BETA)) %*% t(G_y)
SH_yx <- G_y %*% solve(I - BETA) %*% GAMMA %*% PHI %*% t(G_x)
SH_xy <- t(SH_yx)
SH_xx <- G_x %*% PHI %*% t(G_x)
#print("GOT THIS FAR")
TOP <- cbind(SH_yy, SH_yx)
BOTTOM <- cbind(SH_xy, SH_xx)
SH <- rbind(TOP, BOTTOM)
testStat <- log(det(SH)) - log(det(S)) + sum(diag(S %*% solve(SH))) - 6
return(testStat)
}
Use the function above to compute the chi-square test statistic for the hypothesized structural equation model based on (unoptimized) values of 0.5 for all 18 path parameters
vec <- rep(0.5, 18)
(chi <- SEM1(vec))
## [1] 20.63124
chi*(nrow(HEALTHDATA) - 1)
## [1] 27563.34
Use the function above to compute the chi-square statistic for the same (unoptimized) parameterization above but where the path parameter between TRAUMA and PSYDISTRESS is changed first to 0.51 and then to 0.49 (while the rest are maintained at 0.5)
n <- (nrow(HEALTHDATA) - 1)
vec[13] <- 0.51
SEM1(vec)*n
## [1] 27808.47
vec[13] <- 0.49
SEM1(vec)*n
## [1] 27323.8
Explain what the statistics above means in terms of whether increasing or decreasing the effect of TRAUMA on PSYDISTRESS (from a starting value of 0.5) improved model fit
Decreasing the value increased model fit (decreased the test statistic).
Use optim() with the Conjugate Cradient (CG) method and a maximum of 10,000 iterations to find the optimum parameterization for the structural equation model and the corresponding (optimized) chi-square test statistic
set.seed(37)
(bestfit <- optim(vec, SEM1, method = "CG", control = list(maxit = 1000)))
## $par
## [1] 7.23456409 0.24858742 0.38117816 0.38006057 0.47633762 0.45512571
## [7] 1.23504101 1.76521183 0.41414088 0.34534695 0.53403784 0.41453053
## [13] 0.01943434 -0.03001615 0.15169163 0.02255412 -0.01888536 0.20860628
##
## $value
## [1] 0.2156419
##
## $counts
## function gradient
## 2497 1001
##
## $convergence
## [1] 1
##
## $message
## NULL
Calculate the degrees of freedom for the structural equation model above
df <- 18 - 1 # = number of non-zero, non-fixed parameters -1
df
## [1] 17
Use the (optimized) chi-square statistic and the corresponding degrees of freedom to compute the root mean square error of approximation, RMSEA, for the (optimized) structural equation model
ts <- bestfit$value*n
(RMSEA <- sqrt(((ts-df)/(n+1)))/df)
## [1] 0.02648794
Explain what the RMSEA above means in terms of whether or not the hypothesized model provides a good fit to the original covariance structures between the variables in the data
The hypothesized model does not provide a good fit to the original structures
Compute the McDonald Fit Index (MFI) for the (optimized) structural equation model
(exp(-0.5*(ts-df)/(n+1)))
## [1] 0.9035871
Explain what the MFI above means in terms of whether or not the hypothesized model provides a good fit to the original covariance structures between the variables in the data
The MFI value suggests that the hypothesized model does not provide a good fit because it is less than 0.95.
Compute the (optimized) reconstructed covariance matrix for the (optimized) structural equation model
SEM2 <- function(PAR){
S = cov(cbind(ISOLATED, TROUBLED, SELFHARM, THOUGHTS, TRAUMA, SLEEP), use = "pairwise.complete.obs")
PHI <- matrix(0, nrow = 8, ncol = 8)
dimnames(PHI) <- list(
c("Trauma", "Sleep", "E1", "E2", "E3", "E4", "D1", "D2"),
c("Trauma", "Sleep", "E1", "E2", "E3", "E4", "D1", "D2")
)
PHI[1,1] <- PAR[1]
PHI[2,2] <- PAR[2]
PHI[3,3] <- PAR[3]
PHI[4,4] <- PAR[4]
PHI[5,5] <- PAR[5]
PHI[6,6] <- PAR[6]
PHI[7,7] <- 1
PHI[8,8] <- 1
BETA <- matrix(0, nrow = 6, ncol = 6)
dimnames(BETA) <- list(
c("Isolated", "Troubled", "Self Harm", "Thoughts", "Psych Distress", "Suicidality"),
c("Isolated", "Troubled", "Self Harm", "Thoughts", "Psych Distress", "Suicidality")
)
BETA[1,5] <- 1
BETA[2,5] <- PAR[7]
BETA[3,6] <- 1
BETA[4,6] <- PAR[8]
GAMMA <- matrix(0, nrow = 6, ncol = 8)
dimnames(GAMMA) <- list(
c("Isolated", "Troubled", "Self Harm", "Thoughts", "Psych Distress", "Suicidality"),
c("Trauma", "Sleep", "E1", "E2", "E3", "E4", "D1", "D2")
)
GAMMA[1,3] <- PAR[9]
GAMMA[2,4] <- PAR[10]
GAMMA[3,5] <- PAR[11]
GAMMA[4,6] <- PAR[12]
GAMMA[5,1] <- PAR[13]
GAMMA[5,2] <- PAR[14]
GAMMA[5,7] <- PAR[15]
GAMMA[6,1] <- PAR[16]
GAMMA[6,2] <- PAR[17]
GAMMA[6,8] <- PAR[18]
G_yrep <- rep(1, 4)
G_y <- matrix(0, nrow = 4, ncol = 6)
dimnames(G_y) <- list(
c("Isolated", "Troubled", "Self Harm", "Thoughts"),
c("Isolated", "Troubled", "Self Harm", "Thoughts", "Psych Distress", "Suicidality")
)
G_y[1,1] <- G_yrep[1]
G_y[2,2] <- G_yrep[2]
G_y[3,3] <- G_yrep[3]
G_y[4,4] <- G_yrep[4]
G_xrep <- rep(1, 2)
G_x <- matrix(0, nrow = 2, ncol = 8)
dimnames(G_x) <- list(
c("Trauma", "Sleep"),
c("Trauma", "Sleep", "E1", "E2", "E3", "E4", "D1", "D2")
)
G_x[1,1] <- G_xrep[1]
G_x[2,2] <- G_xrep[2]
I <- diag(1, nrow = 6, ncol = 6)
SH_yy <- G_y %*% solve(I - BETA) %*% GAMMA %*% PHI %*% t(GAMMA) %*% t(solve(I - BETA)) %*% t(G_y)
SH_yx <- G_y %*% solve(I - BETA) %*% GAMMA %*% PHI %*% t(G_x)
SH_xy <- t(SH_yx)
SH_xx <- G_x %*% PHI %*% t(G_x)
#print("GOT THIS FAR")
TOP <- cbind(SH_yy, SH_yx)
BOTTOM <- cbind(SH_xy, SH_xx)
SH <- rbind(TOP, BOTTOM)
# testStat <- log(det(SH)) - log(det(S)) + sum(diag(S %*% solve(SH))) - 6
return(SH)
}
(opt <- SEM2(bestfit$par))
## Isolated Troubled Self Harm Thoughts Trauma
## Isolated 0.091343651 0.032070024 0.003312001 0.005846384 0.1405990
## Troubled 0.032070024 0.084935535 0.004090458 0.007220524 0.1736455
## Self Harm 0.003312001 0.004090458 0.183135156 0.083468707 0.1631692
## Thoughts 0.005846384 0.007220524 0.083468707 0.225546731 0.2880282
## Trauma 0.140598980 0.173645506 0.163169197 0.288028196 7.2345641
## Sleep -0.007461637 -0.009215428 -0.004694664 -0.008287077 0.0000000
## Sleep
## Isolated -0.007461637
## Troubled -0.009215428
## Self Harm -0.004694664
## Thoughts -0.008287077
## Trauma 0.000000000
## Sleep 0.248587424
Compute the goodness-of-fit index (GFI) for the (optimized) structural equation model
(GFI <- sum(diag(t(opt) %*% solve(opt) %*% opt))/sum(diag(t(S) %*% solve(opt) %*% S)))
## [1] 0.8443864
Explain what the GFI above means in terms of whether or not the hypothesized model provides a good fit to the original covariance structures between the variables in the data
The GFI above is decent, but not significant (>0.95) in showing good fit.
Compute the Akaike Information Criterion, AIC, for the (optimized) structural equation model specified above
(AIC <- ts + 2*df)
## [1] 322.0976
Explain why the AIC does not provide a good indication of whether or not the hypothesized model provides a good fit to the original covariance structures between the variables in the data
The AIC does not provide a good indication of whether or not the hypothesized model provides a good fit because we don't have another model to compare it to.