STAT 360: Computational Statistics and Data Analysis

Load R Libraries, Import and Attach Relevant Data, and Specify Seed

library(rmarkdown); library(knitr); library(readxl); library(cats)
set.seed(37)

EXERCISE 01

Part (a)

toxic <- read.csv("C:/Users/Sarah Chock/OneDrive - University of St. Thomas/Senior Year/STAT 360 Comp Stat and Data Analysis/Structural Equation Models/ToxoInSouthernChile.csv")
library(corrplot)
## corrplot 0.92 loaded
dim(toxic)
## [1] 733  17
toxic <- toxic[,2:17]
R <- cor(toxic, use = "pairwise.complete.obs")
corrplot(R)

here_kitty()

## meow

Part (b)

EQN <- '
        level: 1
        SexBased =~ I_Sex + I_Occupation
        AgeBased =~ I_CleanBarns + I_WorksinHuerta + I_Age
        OD1 ~ SexBased + AgeBased + I_CleanDrain
        level: 2
        Meats =~ HH_BeefSource + HH_PorkSource + HH_PoultrySource + HH_SheepSource
        Fruit =~ HH_FruitSource + HH_FruitConsumption
        OD1 ~ Meats + Fruit + HH_BoilWater
        '
here_kitty()

## purrr

Part (c)

library(lavaan)
## Warning: package 'lavaan' was built under R version 4.0.5
## This is lavaan 0.6-11
## lavaan is FREE software! Please report any bugs.
M1 <- sem(EQN, toxic, cluster = "HH")
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
##     Level-1 variable "I_Sex" has no variance within some clusters. The
##     cluster ids with zero within variance are: 23 41 51 54 76 83 85 91
##     134 136 139 145 152 153 156 159 164 170 175 177 184 204 227 229
##     230 235 302 315 318 346 348 350 352 368
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
##     Level-1 variable "I_Occupation" has no variance within some
##     clusters. The cluster ids with zero within variance are: 2 3 4 8 9
##     11 12 14 19 23 24 25 29 33 35 41 43 46 47 49 51 54 61 65 68 72 76
##     77 80 82 83 85 88 90 91 95 134 136 139 140 145 152 153 156 159 170
##     173 180 184 187 188 189 192 198 202 204 214 215 218 222 223 229
##     235 239 275 286 290 296 298 302 310 311 315 328 330 337 339 340
##     346 348 350 352 354 355 361 364 367 368 371 376
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
##     Level-1 variable "I_CleanBarns" has no variance within some
##     clusters. The cluster ids with zero within variance are: 2 17 23
##     25 26 37 41 44 50 54 60 62 69 76 77 79 80 82 83 84 88 90 92 132
##     136 143 146 147 148 151 152 156 159 160 166 170 173 174 177 178
##     180 184 187 188 189 192 198 199 200 202 204 215 224 226 227 229
##     230 232 239 285 296 300 302 328 335 337 349 350 354 355 359 360
##     362 367 370 372 373 377 380
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
##     Level-1 variable "I_WorksinHuerta" has no variance within some
##     clusters. The cluster ids with zero within variance are: 2 3 6 8 9
##     14 17 18 19 23 25 29 31 34 37 41 42 43 44 46 47 50 54 60 61 62 67
##     69 72 74 79 80 82 84 86 88 90 91 93 95 132 136 139 143 147 152 153
##     156 157 162 164 166 170 172 174 175 180 181 183 184 185 186 188
##     189 192 198 199 200 205 210 211 220 226 229 231 235 275 285 286
##     290 293 296 298 300 302 310 311 313 315 316 318 326 328 330 335
##     337 339 340 346 348 349 350 352 354 355 359 360 362 364 367 370
##     371 372 373 376 377 379 380
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
##     Level-1 variable "I_Age" has no variance within some clusters. The
##     cluster ids with zero within variance are: 84 143 207 208 335 364
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING:
##     Level-1 variable "I_CleanDrain" has no variance within some
##     clusters. The cluster ids with zero within variance are: 1 2 3 4 6
##     7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 28 29 30
##     31 33 34 35 37 38 39 41 42 43 44 45 46 47 49 50 56 60 61 62 65 68
##     69 71 72 74 76 77 80 81 82 83 84 85 86 88 89 90 91 92 93 95 131
##     132 134 135 136 137 139 140 141 143 144 146 147 148 149 151 152
##     153 156 157 159 160 161 162 164 165 166 168 170 171 172 173 174
##     175 177 178 180 181 182 183 184 185 186 187 188 189 192 196 197
##     198 199 200 202 204 205 207 208 210 211 213 214 215 216 217 218
##     220 221 222 223 224 225 226 227 229 230 231 232 234 235 237 239
##     275 285 286 290 293 296 302 310 313 315 318 326 328 330 335 337
##     339 340 348 349 350 352 354 355 361 364 368 370 371 372 373 376
##     377 379
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated lv
## variances are negative
here_kitty()

## chirrr

Part (d)

parameterEstimates(M1)
##                    lhs op                 rhs block level    est    se      z
## 1             SexBased =~               I_Sex     1     1  1.000 0.000     NA
## 2             SexBased =~        I_Occupation     1     1  2.617 0.970  2.697
## 3             AgeBased =~        I_CleanBarns     1     1  1.000 0.000     NA
## 4             AgeBased =~     I_WorksinHuerta     1     1  1.198 0.210  5.711
## 5             AgeBased =~               I_Age     1     1  0.820 0.157  5.230
## 6                  OD1  ~            SexBased     1     1 -0.177 0.115 -1.542
## 7                  OD1  ~            AgeBased     1     1  0.738 0.185  3.989
## 8                  OD1  ~        I_CleanDrain     1     1  0.043 0.046  0.930
## 9                I_Sex ~~               I_Sex     1     1  0.819 0.080 10.180
## 10        I_Occupation ~~        I_Occupation     1     1 -0.244 0.454 -0.539
## 11        I_CleanBarns ~~        I_CleanBarns     1     1  0.793 0.061 13.007
## 12     I_WorksinHuerta ~~     I_WorksinHuerta     1     1  0.717 0.073  9.772
## 13               I_Age ~~               I_Age     1     1  0.860 0.058 14.948
## 14                 OD1 ~~                 OD1     1     1  0.946 0.077 12.304
## 15            SexBased ~~            SexBased     1     1  0.184 0.074  2.497
## 16            AgeBased ~~            AgeBased     1     1  0.230 0.055  4.160
## 17            SexBased ~~            AgeBased     1     1  0.074 0.035  2.144
## 18        I_CleanDrain ~~        I_CleanDrain     1     1  0.890 0.000     NA
## 19               I_Sex ~1                         1     1  0.015 0.040  0.390
## 20        I_Occupation ~1                         1     1  0.020 0.040  0.504
## 21        I_CleanBarns ~1                         1     1  0.046 0.040  1.158
## 22     I_WorksinHuerta ~1                         1     1  0.061 0.040  1.499
## 23               I_Age ~1                         1     1  0.039 0.040  0.969
## 24                 OD1 ~1                         1     1  0.000 0.000     NA
## 25        I_CleanDrain ~1                         1     1 -0.026 0.000     NA
## 26            SexBased ~1                         1     1  0.000 0.000     NA
## 27            AgeBased ~1                         1     1  0.000 0.000     NA
## 28               Meats =~       HH_BeefSource     2     2  1.000 0.000     NA
## 29               Meats =~       HH_PorkSource     2     2  1.160 0.094 12.369
## 30               Meats =~    HH_PoultrySource     2     2  0.930 0.085 10.904
## 31               Meats =~      HH_SheepSource     2     2  0.648 0.091  7.126
## 32               Fruit =~      HH_FruitSource     2     2  1.000 0.000     NA
## 33               Fruit =~ HH_FruitConsumption     2     2  0.216 0.216  0.999
## 34                 OD1  ~               Meats     2     2  0.028 0.206  0.138
## 35                 OD1  ~               Fruit     2     2  0.020 0.474  0.042
## 36                 OD1  ~        HH_BoilWater     2     2 -0.082 0.049 -1.652
## 37       HH_BeefSource ~~       HH_BeefSource     2     2  0.490 0.054  9.022
## 38       HH_PorkSource ~~       HH_PorkSource     2     2  0.216 0.050  4.344
## 39    HH_PoultrySource ~~    HH_PoultrySource     2     2  0.517 0.055  9.347
## 40      HH_SheepSource ~~      HH_SheepSource     2     2  0.911 0.081 11.177
## 41      HH_FruitSource ~~      HH_FruitSource     2     2  1.173 0.317  3.703
## 42 HH_FruitConsumption ~~ HH_FruitConsumption     2     2  0.977 0.085 11.543
## 43                 OD1 ~~                 OD1     2     2  0.187 0.060  3.124
## 44               Meats ~~               Meats     2     2  0.591 0.089  6.621
## 45               Fruit ~~               Fruit     2     2 -0.007 0.300 -0.023
## 46               Meats ~~               Fruit     2     2  0.243 0.059  4.150
## 47        HH_BoilWater ~~        HH_BoilWater     2     2  1.055 0.000     NA
## 48       HH_BeefSource ~1                         2     2 -0.089 0.063 -1.421
## 49       HH_PorkSource ~1                         2     2 -0.041 0.061 -0.675
## 50    HH_PoultrySource ~1                         2     2 -0.046 0.061 -0.751
## 51      HH_SheepSource ~1                         2     2  0.187 0.065  2.879
## 52      HH_FruitSource ~1                         2     2  0.050 0.065  0.766
## 53 HH_FruitConsumption ~1                         2     2  0.032 0.060  0.535
## 54                 OD1 ~1                         2     2  1.560 0.050 31.355
## 55        HH_BoilWater ~1                         2     2  0.047 0.000     NA
## 56               Meats ~1                         2     2  0.000 0.000     NA
## 57               Fruit ~1                         2     2  0.000 0.000     NA
##    pvalue ci.lower ci.upper
## 1      NA    1.000    1.000
## 2   0.007    0.716    4.519
## 3      NA    1.000    1.000
## 4   0.000    0.787    1.610
## 5   0.000    0.513    1.128
## 6   0.123   -0.401    0.048
## 7   0.000    0.375    1.100
## 8   0.352   -0.048    0.133
## 9   0.000    0.662    0.977
## 10  0.590   -1.133    0.645
## 11  0.000    0.673    0.912
## 12  0.000    0.573    0.861
## 13  0.000    0.747    0.973
## 14  0.000    0.795    1.096
## 15  0.013    0.040    0.328
## 16  0.000    0.122    0.338
## 17  0.032    0.006    0.142
## 18     NA    0.890    0.890
## 19  0.697   -0.062    0.093
## 20  0.614   -0.058    0.098
## 21  0.247   -0.032    0.125
## 22  0.134   -0.019    0.140
## 23  0.333   -0.039    0.117
## 24     NA    0.000    0.000
## 25     NA   -0.026   -0.026
## 26     NA    0.000    0.000
## 27     NA    0.000    0.000
## 28     NA    1.000    1.000
## 29  0.000    0.976    1.344
## 30  0.000    0.763    1.097
## 31  0.000    0.470    0.826
## 32     NA    1.000    1.000
## 33  0.318   -0.208    0.639
## 34  0.890   -0.376    0.433
## 35  0.967   -0.909    0.949
## 36  0.099   -0.178    0.015
## 37  0.000    0.383    0.596
## 38  0.000    0.119    0.313
## 39  0.000    0.408    0.625
## 40  0.000    0.751    1.071
## 41  0.000    0.552    1.794
## 42  0.000    0.811    1.143
## 43  0.002    0.070    0.304
## 44  0.000    0.416    0.766
## 45  0.982   -0.595    0.582
## 46  0.000    0.128    0.358
## 47     NA    1.055    1.055
## 48  0.155   -0.212    0.034
## 49  0.499   -0.160    0.078
## 50  0.453   -0.166    0.074
## 51  0.004    0.060    0.315
## 52  0.444   -0.078    0.178
## 53  0.593   -0.085    0.149
## 54  0.000    1.463    1.658
## 55     NA    0.047    0.047
## 56     NA    0.000    0.000
## 57     NA    0.000    0.000
here_kitty()

## meow

Part (e)

On the individual level, only AgeBased is statistically significant at an alpha level of .10.
On the household level, only HH_BoilWater is statistically significant at an alpha level of .10.
I don't think this really matches our corrplot. Like there is a small circle there for HH_BoilWater, and I guess the three variables that make up AgeBased also have circles. I change my mind, it does match up.
here_kitty()

## mrrrrowwww