STAT 360: Computational Statistics and Data Analysis

Load R Libraries, Import and Attach Relevant Data, and Specify Seed

library(rmarkdown); library(knitr); library(readxl)
set.seed(42069)

EXERCISE 01

sigma <- matrix(c(.165, .031, .055, .038, .031, .205, 0.089, 0.040, .055, 0.089, .165, .053, .038, 0.040, .053, .172), nrow = 4, ncol = 4)
rownames(sigma)<- c("Comfortable", "Benefits", "Anonymity", "Discussed")
colnames(sigma)<- c("Comfortable", "Benefits", "Anonymity", "Discussed")
sigma

##             Comfortable Benefits Anonymity Discussed
## Comfortable       0.165    0.031     0.055     0.038
## Benefits          0.031    0.205     0.089     0.040
## Anonymity         0.055    0.089     0.165     0.053
## Discussed         0.038    0.040     0.053     0.172

sigmahat <- matrix(c(.171, .093, .036, .000, .093, .171, 0.050, 0.000, .036, 0.050, .179, .033, .000, 0.000, .033, .226), nrow = 4, ncol = 4)
rownames(sigmahat)<- c("Comfortable", "Benefits", "Anonymity", "Discussed")
colnames(sigmahat)<- c("Comfortable", "Benefits", "Anonymity", "Discussed")
sigmahat

##             Comfortable Benefits Anonymity Discussed
## Comfortable       0.171    0.093     0.036     0.000
## Benefits          0.093    0.171     0.050     0.000
## Anonymity         0.036    0.050     0.179     0.033
## Discussed         0.000    0.000     0.033     0.226

Part (a)

From just looking at these two matrices, the reconstructed matrix doesn't look half bad. None of the numbers really jump out to me as being very different. We do have several paths though that have turned to zero, so that is mildly concerning.

Part (b)

Hey look! The tr function does work!

library(psych)

## Warning: package 'psych' was built under R version 4.0.5

Q <- log(det(sigmahat)) - log(det(sigma)) + sum(diag((sigma %*% solve(sigmahat)))) - 4
Q

## [1] 0.5544598

Q <- log(det(sigmahat)) - log(det(sigma)) + tr(sigma %*% solve(sigmahat)) - 4
Q

## [1] 0.5544598

EXERCISE 02

Part (a)

sigma <- matrix(c(.07, .03, .03, .02, -.01, 0.00, .03, .07, .04, .03, .01, .00, .03, .04, .22, .12, .09, -.01, .02, .03, .12, 1.65, .96, .06, -.01, .01, .09, .96, 1.38, .02, .00, .00, -.01, .06, .02, .24), nrow = 6, ncol = 6)
rownames(sigma)<- c("Threats", "Assaults", "Fights", "Depression", "Anxiety", "Housing")
colnames(sigma)<- c("Threats", "Assaults", "Fights", "Depression", "Anxiety", "Housing")
sigma

##            Threats Assaults Fights Depression Anxiety Housing
## Threats       0.07     0.03   0.03       0.02   -0.01    0.00
## Assaults      0.03     0.07   0.04       0.03    0.01    0.00
## Fights        0.03     0.04   0.22       0.12    0.09   -0.01
## Depression    0.02     0.03   0.12       1.65    0.96    0.06
## Anxiety      -0.01     0.01   0.09       0.96    1.38    0.02
## Housing       0.00     0.00  -0.01       0.06    0.02    0.24

These matrices never change

Gy <- matrix(0, nrow = 5, ncol = 6)
rownames(Gy)<- c("Threats", "Assaults", "Fights", "Depression", "Anxiety")
colnames(Gy)<- c("Threats", "Assaults", "Fights", "Depression", "Anxiety", "Psychological Distress")
Gy[1,1] = 1
Gy[2,2] = 1
Gy[3,3] = 1
Gy[4,4] = 1
Gy[5,5] = 1

Gx <- matrix(0, nrow = 1, ncol = 8)
rownames(Gx)<- c("Housing")
colnames(Gx)<- c("Housing", "Violence", "E1", "E2", "E3", "E4", "E5", "D1")
Gx[1,1] = 1

identity <- diag(c(1,1,1,1,1,1))

sem <- function(coeffs = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))
      {
        beta <- matrix(0, nrow = 6, ncol = 6)
        rownames(beta)<- c("Threats", "Assaults", "Fights", "Depression", "Anxiety", "Psychological Distress")
        colnames(beta)<- c("Threats", "Assaults", "Fights", "Depression", "Anxiety", "Psychological Distress")
        beta["Depression","Psychological Distress"] = 1
        beta["Anxiety","Psychological Distress"] = coeffs[1]
      
        gamma <- matrix(0, nrow = 6, ncol = 8)
        colnames(gamma)<- c("Housing", "Violence", "E1", "E2", "E3", "E4", "E5", "D1")
        rownames(gamma)<- c("Threats", "Assaults", "Fights", "Depression", "Anxiety", "Pyschological Distress")
        gamma[6,1] = coeffs[2]
        gamma[6,2] = coeffs[3]
        gamma[6,8] = 1
        gamma[5,7] = 1
        gamma[4,6] = 1
        gamma[3,2] = coeffs[4]
        gamma[3,5] = 1
        gamma[2,2] = coeffs[5]
        gamma[2,4] = 1
        gamma[1,2] = 1
        gamma[1,3] = 1
        
        phi <- diag(rep(.5,8))
        rownames(phi)<- c("Housing", "Violence", "E1", "E2", "E3", "E4", "E5", "D1")
        colnames(phi)<- c("Housing", "Violence", "E1", "E2", "E3", "E4", "E5", "D1")
        phi[1,1] = coeffs[6]
        phi[2,2] = coeffs[7]
        phi[3,3] = coeffs[8]
        phi[4,4] = coeffs[9]
        phi[5,5] = coeffs[10]
        phi[6,6] = coeffs[11]
        phi[7,7] = coeffs[12]
        phi[8,8] = coeffs[13]
        
        Sigmayy <- Gy %*% solve(identity - beta) %*% gamma %*% phi %*% t(gamma) %*% t(solve(identity - beta)) %*% t(Gy)
        
        Sigmayx <- Gy %*% solve(identity - beta) %*% gamma %*% phi %*% t(Gx)
        
        Sigmaxy <- t(Gy %*% solve(identity - beta) %*% gamma %*% phi %*% t(Gx))
        
        Sigmaxx <- Gx %*% phi %*% t(Gx)
        
        yyandyx <- cbind(Sigmayy, Sigmayx)
        xyandxx <- cbind(Sigmaxy, Sigmaxx)
        sigmahat <- rbind(yyandyx, xyandxx)
        
        Q <- log(det(sigmahat)) - log(det(sigma)) + tr(sigma %*% solve(sigmahat)) - 6
        return(Q)
}

Part (b)

optim(par = c(.3, .7, .7, .7, .7, .5, .5, .5, .5, .5, .5, .5, .5), fn = sem, method = "Nelder-Mead")

## Warning in log(det(sigmahat)): NaNs produced

## Warning in log(det(sigmahat)): NaNs produced

## Warning in log(det(sigmahat)): NaNs produced

## Warning in log(det(sigmahat)): NaNs produced

## $par
##  [1] 0.50912159 0.26476099 0.83472696 0.78987211 0.60700390 0.33766515
##  [7] 0.04903258 0.01716037 0.04892867 0.47510576 0.57167685 0.95970398
## [13] 1.03228490
## 
## $value
## [1] 0.5772616
## 
## $counts
## function gradient 
##      502       NA 
## 
## $convergence
## [1] 1
## 
## $message
## NULL

Part (c)

Well, I think this set of parameters is supposed to be the most optimal, but the optimization methods really aren't reliable. If the optimizer we used was perfect, then we would have minimized Q. But if, more likely, the optimizer isn't perfect, there is likely another set of parameter values that would find us a lower value of Q.

Part (d)

library(cats)
set.seed(420)
here_kitty()

## chirrr

PS16 Solutions

Sarah

4/21/2022

STAT 360: Computational Statistics and Data Analysis

Load R Libraries, Import and Attach Relevant Data, and Specify Seed

EXERCISE 01

Part (a)

Part (b)

EXERCISE 02

Part (a)

Part (b)

Part (c)

Part (d)