STAT 360: Computational Methods in STAT

Lab 11: Assessing Fit of Structural Equation Models

Load R Libraries, Import and Attach Relevant Data, and Specify Seed

library(rmarkdown); library(knitr)

library(dplyr)
library(psych); library(corrplot); library(GPArotation)
library(lavaan); #library(semPlot); library(moments)

HEALTHDATA <- read.csv("https://www.dropbox.com/s/sb8s1i4h1z7g7fh/?dl=1")
attach(HEALTHDATA)

set.seed(1337)

Use all available variables to investigate how the characteristics of cavity trees and the surrounding vegetation influence the use of these trees by fishers

Exercise 01:

Use cbind() along with cov() to compute the (missing value) observed sample covariance matrix, S, for the interaction between ISOLATED, TROUBLED, SELFHARM, THOUGHTS, TRAUMA, and SLEEP

S <<- cov(cbind(ISOLATED, TROUBLED, SELFHARM, THOUGHTS, TRAUMA, SLEEP))
S

##             ISOLATED    TROUBLED    SELFHARM    THOUGHTS     TRAUMA       SLEEP
## ISOLATED  0.09203732  0.03292405  0.01803685  0.02675912  0.1619459 -0.01645363
## TROUBLED  0.03292405  0.08603026  0.01548007  0.01965926  0.1949260 -0.02363915
## SELFHARM  0.01803685  0.01548007  0.18335916  0.08414752  0.1707942 -0.01764608
## THOUGHTS  0.02675912  0.01965926  0.08414752  0.22723028  0.3108219 -0.03020436
## TRAUMA    0.16194593  0.19492597  0.17079416  0.31082189  7.3207109 -0.55705698
## SLEEP    -0.01645363 -0.02363915 -0.01764608 -0.03020436 -0.5570570  0.24858473

Exercise 02:

Use function() to create a function that takes (as arguments) a single vector of all 18 path parameters and returns (as a scalar) the corresponding value of the difference metric, Q, under the maximum likelihood approach (hint: do NOT include the definition of S from above inside this function)

SEM1 = function(PAR){
  PHI = matrix(0, nrow = 8, ncol = 8)
  dimnames(PHI) = list(
    c("Trauma", "Sleep", "E1", "E2", "E3", "E4", "D1", "D2"),
    c("Trauma", "Sleep", "E1", "E2", "E3", "E4", "D1", "D2")
  )
  PHI[1,1] = PAR[1]
  PHI[2,2] = PAR[2]
  PHI[3,3] = PAR[3]
  PHI[4,4] = PAR[4]
  PHI[5,5] = PAR[5]
  PHI[6,6] = PAR[6]
  PHI[7,7] = 1
  PHI[8,8] = 1

  BETA = matrix(0, nrow = 6, ncol = 6)
  dimnames(BETA) = list(
    c("Isolated", "Troubled", "Self Harm", "Thoughts", "Psych Distress", "Suicidality"),
    c("Isolated", "Troubled", "Self Harm", "Thoughts", "Psych Distress", "Suicidality")
  )
  BETA[1,5] = 1
  BETA[2,5] = PAR[7]
  BETA[3,6] = 1
  BETA[4,6] = PAR[8]


  GAMMA = matrix(0, nrow = 6, ncol = 8)
  dimnames(GAMMA) = list(
    c("Isolated", "Troubled", "Self Harm", "Thoughts", "Psych Distress", "Suicidality"),
    c("Trauma", "Sleep", "E1", "E2", "E3", "E4", "D1", "D2")
  )
  GAMMA[1,3] = PAR[9]
  GAMMA[2,4] = PAR[10]
  GAMMA[3,5] = PAR[11]
  GAMMA[4,6] = PAR[12]
  GAMMA[5,1] = PAR[13]
  GAMMA[5,2] = PAR[14]
  GAMMA[5,7] = PAR[15]
  GAMMA[6,1] = PAR[16]
  GAMMA[6,2] = PAR[17]
  GAMMA[6,8] = PAR[18]
  
  G_yrep = rep(1, 4)
  G_y = matrix(0, nrow = 4, ncol = 6)
  dimnames(G_y) = list(
    c("Isolated", "Troubled", "Self Harm", "Thoughts"),
    c("Isolated", "Troubled", "Self Harm", "Thoughts", "Psych Distress", "Suicidality")
  )
  G_y[1,1] = G_yrep[1]
  G_y[2,2] = G_yrep[2]
  G_y[3,3] = G_yrep[3]
  G_y[4,4] = G_yrep[4]

  G_xrep = rep(1, 2)
  G_x = matrix(0, nrow = 2, ncol = 8)
  dimnames(G_x) = list(
    c("Trauma", "Sleep"),
    c("Trauma", "Sleep", "E1", "E2", "E3", "E4", "D1", "D2")
  )
  G_x[1,1] = G_xrep[1]
  G_x[2,2] = G_xrep[2]
  
  I = diag(1, nrow = 6, ncol = 6)
  
  
  SH_yy = G_y %*% solve(I - BETA) %*% GAMMA %*% PHI %*% t(GAMMA) %*% t(solve(I - BETA)) %*% t(G_y)
  SH_yx = G_y %*% solve(I - BETA) %*% GAMMA %*% PHI %*% t(G_x)
  SH_xy = t(SH_yx)
  SH_xx = G_x %*% PHI %*% t(G_x)
  
  #print("GOT THIS FAR")
  TOP = cbind(SH_yy, SH_yx)
  BOTTOM = cbind(SH_xy, SH_xx)
  SH = rbind(TOP, BOTTOM)

  if (kappa(SH) > 1e12) {
    return(20)
  }
  
  DIFF = log(det(SH)) - log(det(S)) + sum(diag(S %*% solve(SH))) - 6
  
  return(DIFF)
}

Exercise 03:

Use optim() with the Conjugate Cradient (CG) method and a maximum of 10,000 iterations to find the optimum parameterization for the structural equation model above

optim(par = rep(0.5, 18),
        fn = SEM1,
        method = "CG",
        control = list(maxit = 1e4))

## Warning in log(det(SH)): NaNs produced

## Warning in log(det(SH)): NaNs produced

## Warning in log(det(SH)): NaNs produced

## $par
##  [1]  7.31991611  0.24858741  0.40266055  0.38013352  0.47811021  0.44990725
##  [7]  1.23517434  1.81004220  0.40295921  0.34544827  0.53505168  0.40803777
## [13]  0.01944831 -0.03003108  0.15169694  0.02204822 -0.01827508  0.20591676
## 
## $value
## [1] 0.2155537
## 
## $counts
## function gradient 
##     5464     2157 
## 
## $convergence
## [1] 0
## 
## $message
## NULL

Exercise 04:

Explain what the regression coefficients above mean in terms of whether TRAUMA or SLEEP has a stronger effect on SUICIDALITY

TRAUMA has a stronger effect on SUICIDALITY because the regreesion coefficient's absolute value is greater than that of SLEEP.

Exercise 05:

Explain what the regression coefficients above mean in terms of whether TRAUMA has a positive or negative effect on PSYCHOLOGICAL DISTRESS

TRAUMA has a weak positive effect on PSYCHOLOGICAL DISTRESS.

Exercise 06:

Explain conceptually why SLEEP has a negative effect on PSYCHOLOGICAL DISTRESS

Getting less sleep is unhealthy and is proven to increase stress levels.

Exercise 07:

Use replicate() and slice_sample() to take a random sample, with replacement, of the sampled college students and find the optimum parameterization for the structural equation model a total of two times

BOOT = replicate(n = 2,
{
  TEMP = slice_sample(HEALTHDATA, n = nrow(HEALTHDATA),
                      replace = TRUE)
  S <<- cov(cbind(TEMP$ISOLATED, TEMP$TROUBLED, TEMP$SELFHARM, TEMP$THOUGHTS, TEMP$TRAUMA, TEMP$SLEEP))
  FIT = optim(par = rep(0.5, 18),
              fn = SEM1,
              method = "CG",
              control = list(maxit = 1e3))
  FIT$par
}
)
BOOT

##              [,1]        [,2]
##  [1,]  7.40489638  7.50501439
##  [2,]  0.24894780  0.24881806
##  [3,]  0.42028932  0.40637416
##  [4,]  0.37432188  0.37006495
##  [5,]  0.49736179  0.49102859
##  [6,]  0.44710868  0.41995949
##  [7,]  1.37141296  1.08972965
##  [8,]  1.80357987  2.00380855
##  [9,]  0.40073687  0.38175694
## [10,]  0.36759836  0.38307685
## [11,]  0.52992016  0.53097657
## [12,]  0.39876682  0.33945095
## [13,]  0.01633290  0.01684466
## [14,] -0.02750766 -0.03464115
## [15,]  0.13068919  0.15179351
## [16,]  0.01974731  0.01473724
## [17,] -0.01112961 -0.04446682
## [18,]  0.21378359  0.20718283

Exercise 08:

set.seed(69)
BOOT = replicate(n = 20,
{
  TEMP = slice_sample(HEALTHDATA, n = nrow(HEALTHDATA),
                      replace = TRUE)
  S <<- cov(cbind(TEMP$ISOLATED, TEMP$TROUBLED, TEMP$SELFHARM, TEMP$THOUGHTS, TEMP$TRAUMA, TEMP$SLEEP))
  FIT = optim(par = rep(0.5, 18),
              fn = SEM1,
              method = "CG",
              control = list(maxit = 1e3))
  FIT$par
}
)

Exercise 09:

Use quantile() to find a 95% confidence interval for the effect of SLEEP on PSYCHOLOGICAL DISTRESS

quantile(BOOT[14,-4], probs = c(0.025, 0.975))

##        2.5%       97.5% 
## -0.04625486 -0.01200469

Exercise 10:

Explain if SLEEP has a significant effect on PSYCHOLOGICAL DISTRESS

SLEEP has a significant effect on PSYCHOLOGICAL DISTRESS because its confidence interval does not include 0.