`— title: “Arbuckle SEM Revised Final Project” output: html_notebook —
library(psfmi)
data(hipstudy)
# select variables
myvars_revised<- c("Age", "Comorbidity", "ASA", "Hemoglobine", "Leucocytes", "Thrombocytes", "CRP", "Creatinine", "Urea", "Albumine", "Delay")
datasubset_revised<- hipstudy[myvars_revised]
Let’s install/load a few more helpful packages.
#install.packages("MVN")
library(psych)
library(MVN)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
library(nortest)
Before running EFA, it’s recommended to check that several assumptions/conditions are met.
#Checking for missing data
colSums(is.na(datasubset_revised))
## Age Comorbidity ASA Hemoglobine Leucocytes Thrombocytes
## 0 0 0 0 0 0
## CRP Creatinine Urea Albumine Delay
## 0 0 0 0 0
#It looks like there is no missing data.
#Time to assess sample size:
n <- nrow(datasubset_revised)
p <- ncol(datasubset_revised)
cat("Sample size:", n, "\nVariables:", p, "\nRatio (n/p):", n/p, "\n")
## Sample size: 426
## Variables: 11
## Ratio (n/p): 38.72727
#The ratio aimed for was 5-10 at least, ideally. 38.727 exceeds this ratio, which is desired.
#Now, I will check for multivariate normality.
mvn(datasubset_revised, mvnTest = "mardia")
## $multivariateNormality
## Test Statistic p value Result
## 1 Mardia Skewness 6133.36050708347 0 NO
## 2 Mardia Kurtosis 67.9676630136493 0 NO
## 3 MVN <NA> <NA> NO
##
## $univariateNormality
## Test Variable Statistic p value Normality
## 1 Anderson-Darling Age 7.6348 <0.001 NO
## 2 Anderson-Darling Comorbidity 21.0843 <0.001 NO
## 3 Anderson-Darling ASA 43.2942 <0.001 NO
## 4 Anderson-Darling Hemoglobine 2.5352 <0.001 NO
## 5 Anderson-Darling Leucocytes 2.9584 <0.001 NO
## 6 Anderson-Darling Thrombocytes 7.8847 <0.001 NO
## 7 Anderson-Darling CRP 63.9900 <0.001 NO
## 8 Anderson-Darling Creatinine 12.0004 <0.001 NO
## 9 Anderson-Darling Urea 16.8956 <0.001 NO
## 10 Anderson-Darling Albumine 2.6460 <0.001 NO
## 11 Anderson-Darling Delay 92.1011 <0.001 NO
##
## $Descriptives
## n Mean Std.Dev Median Min Max 25th 75th
## Age 426 79.701878 11.0045059 81.00 29.00 99.00 75.000 88.00
## Comorbidity 426 1.185446 1.0675423 1.00 0.00 4.00 0.000 2.00
## ASA 426 2.061033 0.6728572 2.00 1.00 4.00 2.000 2.00
## Hemoglobine 426 7.864554 0.9652245 7.90 3.50 10.50 7.300 8.40
## Leucocytes 426 10.888967 3.3007800 10.45 3.10 22.10 8.400 12.90
## Thrombocytes 426 235.394366 67.5793172 221.00 91.00 495.00 189.000 271.00
## CRP 426 20.880282 34.9368134 6.00 1.00 186.00 3.000 18.00
## Creatinine 426 89.380282 29.9264351 85.00 36.00 275.00 71.000 99.00
## Urea 426 7.463850 3.5140201 6.60 1.20 29.60 5.225 8.60
## Albumine 426 36.943662 4.0926470 37.00 21.00 53.00 34.250 40.00
## Delay 426 1.682676 4.0820019 0.85 0.04 48.66 0.590 1.21
## Skew Kurtosis
## Age -1.2903000 2.4964032
## Comorbidity 0.7039065 -0.1444506
## ASA 0.4367115 0.5257324
## Hemoglobine -0.6462986 1.4666297
## Leucocytes 0.6286738 0.1761800
## Thrombocytes 1.1158181 1.4666517
## CRP 2.6944097 7.2831436
## Creatinine 2.0680682 7.4887391
## Urea 2.1934118 7.6158605
## Albumine -0.3610894 1.5714318
## Delay 7.7333950 72.5910706
#Hmm, it doesn’t look like multivariate normality was met.
#Checking for linearity
pairs(datasubset_revised, main = "Scatterplot Matrix")
#Now, let’s check the correlation matrix
cor_matrix_revised<- cor(datasubset_revised, use = "pairwise.complete.obs")
print(round(cor_matrix_revised, 2))
## Age Comorbidity ASA Hemoglobine Leucocytes Thrombocytes CRP
## Age 1.00 0.25 0.42 -0.22 0.03 -0.02 0.13
## Comorbidity 0.25 1.00 0.66 0.02 0.08 -0.02 0.03
## ASA 0.42 0.66 1.00 -0.13 0.01 -0.06 0.12
## Hemoglobine -0.22 0.02 -0.13 1.00 0.14 -0.16 -0.10
## Leucocytes 0.03 0.08 0.01 0.14 1.00 0.23 -0.05
## Thrombocytes -0.02 -0.02 -0.06 -0.16 0.23 1.00 0.03
## CRP 0.13 0.03 0.12 -0.10 -0.05 0.03 1.00
## Creatinine 0.25 0.20 0.16 -0.07 -0.05 -0.17 0.12
## Urea 0.32 0.21 0.26 -0.21 -0.04 -0.12 0.17
## Albumine -0.28 -0.13 -0.23 0.26 0.06 -0.14 -0.34
## Delay 0.09 0.13 0.31 -0.02 0.03 0.03 0.15
## Creatinine Urea Albumine Delay
## Age 0.25 0.32 -0.28 0.09
## Comorbidity 0.20 0.21 -0.13 0.13
## ASA 0.16 0.26 -0.23 0.31
## Hemoglobine -0.07 -0.21 0.26 -0.02
## Leucocytes -0.05 -0.04 0.06 0.03
## Thrombocytes -0.17 -0.12 -0.14 0.03
## CRP 0.12 0.17 -0.34 0.15
## Creatinine 1.00 0.73 -0.13 0.07
## Urea 0.73 1.00 -0.17 0.05
## Albumine -0.13 -0.17 1.00 -0.04
## Delay 0.07 0.05 -0.04 1.00
#It looks like we have a mixture of moderate correlations between observed variables (.3-.8)
#Bartlett’s Test of Sphericity. This checks that the correlation matrix is significantly different from the identity matrix.
cortest.bartlett(cor_matrix_revised, n = nrow(datasubset_revised))
## $chisq
## [1] 1021.555
##
## $p.value
## [1] 4.406373e-178
##
## $df
## [1] 55
#P<.05, just what we want. This indicates sufficient correlation.
#Kaiser–Meyer–Olkin (KMO) test to see if the proportion of variance in my observed variables might be caused by underlying factors. I want values close to 1. This is a test of sampling adequacy.
KMO(datasubset_revised)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = datasubset_revised)
## Overall MSA = 0.6
## MSA for each item =
## Age Comorbidity ASA Hemoglobine Leucocytes Thrombocytes
## 0.79 0.58 0.58 0.58 0.43 0.48
## CRP Creatinine Urea Albumine Delay
## 0.64 0.56 0.60 0.69 0.51
#For the most part, I want values >.60. Most of these values are close to or exceed this. #Only two of my values are under .5 and both are very close, so I won’t remove them. #This test suggests that factor analysis may be useful for this data.
#Now, let’s check for multicollinearity.
# Use VIFs as a proxy for multicollinearity
# This requires a placeholder outcome just to run VIF
fake_outcome <- rnorm(nrow(datasubset_revised)) # Dummy DV
vif_model <- lm(fake_outcome ~ ., data = datasubset_revised)
vif(vif_model)
## Age Comorbidity ASA Hemoglobine Leucocytes Thrombocytes
## 1.367090 1.891557 2.331604 1.229845 1.117378 1.178738
## CRP Creatinine Urea Albumine Delay
## 1.182125 2.277489 2.431080 1.313917 1.151702
#Fortunately, none of my VIFS >5. Multicollinearity does not seem to be a problem.
#Checking for outliers visually using Box Plot:
While there are some more extreme values, they are generally grouped together, not isolated or singular. Therefore, I will not remove them.
boxplot(datasubset_revised)
#Now, I am going to split my data into two parts. Part 1 will undergo EFA. Part 2 will be undergo CFA. This will help me avoid conflating my model fit.
set.seed(123)
# Calculate number of rows
n <- nrow(hipstudy)
# Create a random sample of row indices for the EFA subset
efa_indices <- sample(1:n, size = floor(n / 2)) # Use floor to ensure integer
# Split the data
efa_full <- hipstudy[efa_indices, ] # First half for EFA
cfa_full <- hipstudy[-efa_indices, ] # Remaining for CFA
# Define your EFA variable names
efa_vars <- c("Age", "Comorbidity", "ASA", "Hemoglobine", "Leucocytes", "Thrombocytes", "CRP", "Creatinine", "Urea", "Albumine", "Delay")
# Extract just the variables of interest for EFA
efa_data <- efa_full[, efa_vars]
#Time for my EFA! #I need to figure out how many components best fit the data. #I will do this using eigenvalues #But first, let’s do a correlation matrix.
cordata_revised=cor(efa_data)
cordata_revised
## Age Comorbidity ASA Hemoglobine Leucocytes
## Age 1.00000000 0.22316090 0.46295154 -0.22301628 -0.01445548
## Comorbidity 0.22316090 1.00000000 0.65662374 0.05004786 0.07564360
## ASA 0.46295154 0.65662374 1.00000000 -0.13337100 -0.03041199
## Hemoglobine -0.22301628 0.05004786 -0.13337100 1.00000000 0.19810061
## Leucocytes -0.01445548 0.07564360 -0.03041199 0.19810061 1.00000000
## Thrombocytes 0.05611413 0.04265665 -0.09034920 -0.09587613 0.26400629
## CRP 0.16617834 0.03016959 0.06319467 -0.06881975 -0.06398498
## Creatinine 0.25989392 0.21675602 0.22743288 -0.02188167 -0.13164368
## Urea 0.35252980 0.31870910 0.38936152 -0.15837217 -0.05947338
## Albumine -0.32579487 -0.11354707 -0.20772364 0.27839220 0.04857372
## Delay 0.11486655 0.06816184 0.25895950 -0.03981696 -0.01223695
## Thrombocytes CRP Creatinine Urea Albumine
## Age 0.05611413 0.166178341 0.25989392 0.35252980 -0.32579487
## Comorbidity 0.04265665 0.030169590 0.21675602 0.31870910 -0.11354707
## ASA -0.09034920 0.063194674 0.22743288 0.38936152 -0.20772364
## Hemoglobine -0.09587613 -0.068819746 -0.02188167 -0.15837217 0.27839220
## Leucocytes 0.26400629 -0.063984980 -0.13164368 -0.05947338 0.04857372
## Thrombocytes 1.00000000 0.076120256 -0.12427223 -0.08715152 -0.09588076
## CRP 0.07612026 1.000000000 0.06211138 0.11224029 -0.30130571
## Creatinine -0.12427223 0.062111379 1.00000000 0.70411674 -0.08196506
## Urea -0.08715152 0.112240294 0.70411674 1.00000000 -0.16018943
## Albumine -0.09588076 -0.301305706 -0.08196506 -0.16018943 1.00000000
## Delay -0.08730885 -0.004278872 0.02976056 0.05808406 0.02218599
## Delay
## Age 0.114866550
## Comorbidity 0.068161840
## ASA 0.258959500
## Hemoglobine -0.039816960
## Leucocytes -0.012236950
## Thrombocytes -0.087308849
## CRP -0.004278872
## Creatinine 0.029760565
## Urea 0.058084062
## Albumine 0.022185990
## Delay 1.000000000
#I’ll also look at data descriptives:
describe(efa_data)
## vars n mean sd median trimmed mad min max range
## Age 1 213 79.02 10.57 80.00 80.18 8.90 39.00 97.00 58.00
## Comorbidity 2 213 1.10 1.08 1.00 0.95 1.48 0.00 4.00 4.00
## ASA 3 213 2.02 0.67 2.00 2.00 0.00 1.00 4.00 3.00
## Hemoglobine 4 213 7.96 0.87 8.00 7.97 0.74 5.10 10.50 5.40
## Leucocytes 5 213 10.61 3.33 9.90 10.35 3.26 3.10 22.10 19.00
## Thrombocytes 6 213 236.38 68.35 220.00 228.35 53.37 91.00 481.00 390.00
## CRP 7 213 20.72 34.74 6.00 12.13 5.93 1.00 181.00 180.00
## Creatinine 8 213 87.54 27.41 84.00 84.60 20.76 36.00 253.00 217.00
## Urea 9 213 7.17 3.15 6.50 6.74 2.22 2.20 25.70 23.50
## Albumine 10 213 37.04 4.11 37.00 37.05 4.45 22.00 48.00 26.00
## Delay 11 213 1.51 3.83 0.83 0.86 0.40 0.04 48.66 48.62
## skew kurtosis se
## Age -1.11 1.59 0.72
## Comorbidity 0.84 0.08 0.07
## ASA 0.44 0.56 0.05
## Hemoglobine -0.22 0.75 0.06
## Leucocytes 0.70 0.30 0.23
## Thrombocytes 1.15 1.41 4.68
## CRP 2.71 7.39 2.38
## Creatinine 2.22 9.65 1.88
## Urea 2.02 6.60 0.22
## Albumine -0.26 0.87 0.28
## Delay 9.43 107.21 0.26
#This will let me check sample size and number of observed items
dim(efa_data)
## [1] 213 11
#Most of the participants had normal Creatine, low hemoglobine, normal Leucocyte levels (though on the higher range of normal), and normal Thrombocyte levels, low Urea levels, and normal Albumine levels. #Ok, now let’s see how many factors fit our data.
eigen(cordata_revised)
## eigen() decomposition
## $values
## [1] 2.7718723 1.4830265 1.3778339 1.1514246 0.9441017 0.8845681 0.7081779
## [8] 0.6068945 0.5650293 0.2700076 0.2370637
##
## $vectors
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] -0.40376346 0.19833503 -0.06270095 0.07439643 0.098088625 -0.09245348
## [2,] -0.36365124 -0.19978668 -0.41001534 0.05909386 -0.134718949 0.42477764
## [3,] -0.45977247 -0.12746320 -0.24909389 0.31074436 -0.053573436 0.23764001
## [4,] 0.17705474 -0.41913690 -0.27208436 -0.22928149 -0.556975051 -0.08542448
## [5,] 0.07773252 0.02232018 -0.63508876 -0.24393983 0.073203276 -0.30949276
## [6,] 0.03800457 0.41926453 -0.43666239 -0.24774916 0.323494804 -0.13036012
## [7,] -0.15456979 0.40524133 0.09112876 -0.12190958 -0.698725554 -0.24217574
## [8,] -0.37319022 -0.26889792 0.24486854 -0.45096686 0.099930916 -0.23786393
## [9,] -0.45317870 -0.17833540 0.14637268 -0.35687897 0.149164366 -0.17966148
## [10,] 0.26221321 -0.50972913 -0.04937923 -0.01962885 0.169213406 -0.10525677
## [11,] -0.13121577 -0.15617627 -0.07575120 0.61235105 0.006612604 -0.68984135
## [,7] [,8] [,9] [,10] [,11]
## [1,] 0.42350303 0.72249118 -0.03790267 -0.15456964 0.2081497458
## [2,] -0.28744863 -0.16004823 0.00673229 -0.38740571 0.4464106836
## [3,] -0.03009517 0.03456686 -0.11448439 0.34935247 -0.6458755970
## [4,] 0.13380808 0.26781255 0.46759739 0.20144946 0.0002473773
## [5,] 0.42536244 -0.30994680 -0.37297025 -0.09081111 -0.0715325763
## [6,] -0.52465533 0.21003417 0.32756381 0.11535686 -0.0953610971
## [7,] -0.30090548 0.04371443 -0.38304021 -0.04033514 -0.0324496753
## [8,] -0.06892136 -0.05732254 0.17128557 -0.51369919 -0.3965654962
## [9,] -0.04932511 -0.15632134 -0.09227769 0.60928830 0.3910153419
## [10,] -0.37680720 0.43636499 -0.54395248 -0.02404716 -0.0049046346
## [11,] -0.15264612 -0.13872154 0.19975285 -0.06721507 0.1216180961
#I will use both Kaiser’s Rule and a Scree Plot to determine how many components #to keep. Kaiser’s Rule suggests retaining factors with eignevalues greater than or equal to 1. Factors 1-4 meet this rule. I will, though, also use a scree plot.
eig_vals <- eigen(cordata_revised)$values
plot(eig_vals, type = "b",
xlab = "Component Number",
ylab = "Eigenvalue",
main = "Scree Plot of Eigenvalues")
abline(h = 1, col = "red", lty = 2) # Optional: Kaiser criterion line
#Factors 1-4 fall above the criterion line. We will retain 4
factors.
#Now, let’s conduct EFA using PFA.
res.paf_revised=fa(cordata_revised, nfactors=4, fm="pa", max.iter = 100)
## maximum iteration exceeded
## Loading required namespace: GPArotation
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
#I am going to try increasing the iterations.
res.paf_revised_1=fa(cordata_revised, nfactors=4, fm="pa", max.iter = 1000)
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
#That didn’t help. Since the data isn’t normal, I’m going to try to use Minimum Residual (Minres). This is a robust method good for non-normal data.
res.paf_revised2<- fa(cordata_revised, nfactors = 4, fm = "minres")
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
#That didn’t work either. Let’s take a look and see which variables might be causing us issues. In particular, variables with low communality (<.20) or high factor loadings (>.90). These often lead to Heywood cases.
print(res.paf_revised2$communality, digits = 3)
## Age Comorbidity ASA Hemoglobine Leucocytes Thrombocytes
## 0.3686 0.4461 1.0243 0.2289 0.4334 0.2498
## CRP Creatinine Urea Albumine Delay
## 0.1329 0.7958 0.6851 0.4365 0.0659
print(res.paf_revised2$loadings)
##
## Loadings:
## MR1 MR2 MR3 MR4
## Age 0.266 0.172 -0.392
## Comorbidity 0.603 0.121 0.196
## ASA 1.014
## Hemoglobine 0.440 0.189
## Leucocytes 0.647
## Thrombocytes -0.127 -0.280 0.416
## CRP -0.368
## Creatinine 0.916
## Urea 0.109 0.758 -0.101
## Albumine 0.657
## Delay 0.265
##
## MR1 MR2 MR3 MR4
## SS loadings 1.569 1.472 1.018 0.678
## Proportion Var 0.143 0.134 0.093 0.062
## Cumulative Var 0.143 0.276 0.369 0.431
#Hmm. I see a few problemmatic variables but I’m hesitant to remove them just yet. #I will run parallel analysis to make sure I’m not overfactoring.
fa.parallel(cordata_revised, fm = "minres", fa = "fa")
## Warning in fa.parallel(cordata_revised, fm = "minres", fa = "fa"): It seems as
## if you are using a correlation matrix, but have not specified the number of
## cases. The number of subjects is arbitrarily set to be 100
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Parallel analysis suggests that the number of factors = 4 and the number of components = NA
#Hmm. Not super helpful. Let’s try removing ASA, CRP, and Delay, our problemmatic variables and run a 3 factor model.
vars_to_keep <- c("Age", "Comorbidity", "Hemoglobine", "Leucocytes",
"Thrombocytes", "Creatinine", "Urea", "Albumine")
cordata_reduced <- cordata_revised[, vars_to_keep]
res.paf_clean <- fa(cordata_reduced, nfactors = 3, fm = "minres", max.iter = 1000)
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
#Still some issues. Let’s take a closer look. In particular, variables with low communality (<.20) or high factor loadings (>.90) often lead to Heywood cases.
print(res.paf_clean$communality, digits = 3)
## Age Comorbidity Hemoglobine Leucocytes Thrombocytes Creatinine
## 0.891 0.252 0.552 0.329 1.009 0.913
## Urea Albumine
## 0.953 0.678
print(res.paf_clean$loadings)
##
## Loadings:
## MR3 MR1 MR2
## Age 0.968 -0.118
## Comorbidity 0.412 0.111 -0.135
## Hemoglobine -0.556 -0.245 -0.239
## Leucocytes -0.120 -0.371 0.258
## Thrombocytes 0.987
## Creatinine -0.101 0.992
## Urea 0.167 0.873
## Albumine -0.759 -0.176
##
## MR3 MR1 MR2
## SS loadings 2.045 1.966 1.163
## Proportion Var 0.256 0.246 0.145
## Cumulative Var 0.256 0.501 0.647
fa.parallel(cordata_reduced, fa = "fa", fm = "minres")
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Parallel analysis suggests that the number of factors = 1 and the number of components = NA
#We may be overfactoring. Let’s try a two factor model.
res.paf_cleaner<- fa(cordata_reduced, nfactors = 2, fm = "minres", max.iter = 1000)
res.paf_cleaner
## Factor Analysis using method = minres
## Call: fa(r = cordata_reduced, nfactors = 2, max.iter = 1000, fm = "minres")
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 MR2 h2 u2 com
## Age 0.76 0.20 0.73 0.274 1.1
## Comorbidity 0.33 0.27 0.24 0.762 1.9
## Hemoglobine -0.73 -0.05 0.56 0.435 1.0
## Leucocytes -0.03 -0.59 0.36 0.640 1.0
## Thrombocytes 0.43 -0.75 0.52 0.475 1.6
## Creatinine 0.14 0.83 0.78 0.221 1.1
## Urea 0.36 0.76 0.90 0.096 1.4
## Albumine -0.86 0.03 0.73 0.271 1.0
##
## MR1 MR2
## SS loadings 2.42 2.41
## Proportion Var 0.30 0.30
## Cumulative Var 0.30 0.60
## Proportion Explained 0.50 0.50
## Cumulative Proportion 0.50 1.00
##
## With factor correlations of
## MR1 MR2
## MR1 1.00 0.34
## MR2 0.34 1.00
##
## Mean item complexity = 1.3
## Test of the hypothesis that 2 factors are sufficient.
##
## df null model = 28 with the objective function = 5.4 with Chi Square = 35.09
## df of the model are 13 and the objective function was 1.02
##
## The root mean square of the residuals (RMSR) is 0.06
## The df corrected root mean square of the residuals is 0.08
##
## The harmonic n.obs is 11 with the empirical chi square 1.97 with prob < 1
## The total n.obs was 11 with Likelihood Chi Square = 5.26 with prob < 0.97
##
## Tucker Lewis Index of factoring reliability = -158.72
## RMSEA index = 0 and the 90 % confidence intervals are 0 0
## BIC = -25.91
## Fit based upon off diagonal values = 0.98
## Measures of factor score adequacy
## MR1 MR2
## Correlation of (regression) scores with factors 0.94 0.95
## Multiple R square of scores with factors 0.89 0.91
## Minimum correlation of possible factor scores 0.78 0.82
#It looks like the two factor model, removing three problemmatic variables, plus more iterations, and minres helped us avoid the Heywood case.
print(res.paf_cleaner$loadings, cutoff = 0.3)
##
## Loadings:
## MR1 MR2
## Age 0.762
## Comorbidity 0.328
## Hemoglobine -0.732
## Leucocytes -0.589
## Thrombocytes 0.429 -0.748
## Creatinine 0.827
## Urea 0.361 0.765
## Albumine -0.865
##
## MR1 MR2
## SS loadings 2.305 2.290
## Proportion Var 0.288 0.286
## Cumulative Var 0.288 0.574
print(res.paf_cleaner$communality, digits = 3)
## Age Comorbidity Hemoglobine Leucocytes Thrombocytes Creatinine
## 0.726 0.238 0.565 0.360 0.525 0.779
## Urea Albumine
## 0.904 0.729
fa.diagram(res.paf_cleaner)
#To simplify interpretation, I am going to use an oblimin rotation. This type of rotation allows for some correlation between my factors. Factors 1 and 2 are correlated (.3), hence my decision to use this.
res.paf_cleaner_rotated<- fa(cordata_reduced, nfactors = 2, fm = "minres", rotate = "oblimin")
res.paf_cleaner_rotated
## Factor Analysis using method = minres
## Call: fa(r = cordata_reduced, nfactors = 2, rotate = "oblimin", fm = "minres")
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 MR2 h2 u2 com
## Age 0.76 0.20 0.73 0.274 1.1
## Comorbidity 0.33 0.27 0.24 0.762 1.9
## Hemoglobine -0.73 -0.05 0.56 0.435 1.0
## Leucocytes -0.03 -0.59 0.36 0.640 1.0
## Thrombocytes 0.43 -0.75 0.52 0.475 1.6
## Creatinine 0.14 0.83 0.78 0.221 1.1
## Urea 0.36 0.76 0.90 0.096 1.4
## Albumine -0.86 0.03 0.73 0.271 1.0
##
## MR1 MR2
## SS loadings 2.42 2.41
## Proportion Var 0.30 0.30
## Cumulative Var 0.30 0.60
## Proportion Explained 0.50 0.50
## Cumulative Proportion 0.50 1.00
##
## With factor correlations of
## MR1 MR2
## MR1 1.00 0.34
## MR2 0.34 1.00
##
## Mean item complexity = 1.3
## Test of the hypothesis that 2 factors are sufficient.
##
## df null model = 28 with the objective function = 5.4 with Chi Square = 35.09
## df of the model are 13 and the objective function was 1.02
##
## The root mean square of the residuals (RMSR) is 0.06
## The df corrected root mean square of the residuals is 0.08
##
## The harmonic n.obs is 11 with the empirical chi square 1.97 with prob < 1
## The total n.obs was 11 with Likelihood Chi Square = 5.26 with prob < 0.97
##
## Tucker Lewis Index of factoring reliability = -158.72
## RMSEA index = 0 and the 90 % confidence intervals are 0 0
## BIC = -25.91
## Fit based upon off diagonal values = 0.98
## Measures of factor score adequacy
## MR1 MR2
## Correlation of (regression) scores with factors 0.94 0.95
## Multiple R square of scores with factors 0.89 0.91
## Minimum correlation of possible factor scores 0.78 0.82
print(res.paf_cleaner_rotated$loadings, cutoff = 0.3)
##
## Loadings:
## MR1 MR2
## Age 0.762
## Comorbidity 0.328
## Hemoglobine -0.732
## Leucocytes -0.589
## Thrombocytes 0.429 -0.748
## Creatinine 0.827
## Urea 0.361 0.765
## Albumine -0.865
##
## MR1 MR2
## SS loadings 2.305 2.290
## Proportion Var 0.288 0.286
## Cumulative Var 0.288 0.574
print(res.paf_cleaner_rotated$communality, digits = 3)
## Age Comorbidity Hemoglobine Leucocytes Thrombocytes Creatinine
## 0.726 0.238 0.565 0.360 0.525 0.779
## Urea Albumine
## 0.904 0.729
fa.diagram(res.paf_cleaner_rotated)
#In Oblimin rotation, loadings of .4 or .5 and above are considered
strong loadings
#Based on my results, I will call Factor 1 Vulnerability since it loads positively and strongly with age and negatively and strongly with Albumine and Hemoglobine levels. It also loads less strongly though positively with Comorbidity. Low Albumine levels often indicate underlying liver disease or kidney disease, or even malnutrition. They are also associated with greater mortality after Total Hip Arthroplasty or Hemiarthroplasty (Thomas et al., 2023). Low levels of hemoglobine pre-surgery are associated with chronic disease (Clevenger and Richards, 2014). Higher comorbidities are associated with increased morbidity post surgery, particularly in older adults (Ho et al., 2020). #I will call Factor 2 Waste Accumulation because Waste Products because Creatinine and Urea strongly and positively loaded onto it, while Thrombocytes and Leucocytes strongly but negatively loaded onto it. Note: Most of the participants had normal Creatine, low hemoglobine, normal Leucocyte levels (though on the higher range of normal), and normal Thrombocyte levels, low Urea levels, and normal Albumine levels. Higher creatinine levels pre-surgery have been found to be associated with decreased risk of short-term mortality (Chen et al., 2022). Low BUN scores (Urea scores) were generally associated with decreased mortality (Liu et al., 2022). Pre-surgery, it is generally better to have higher Thrombocyte levels (a higher platelet count) (Estcourt et al., 2017). It is been found that it is better for mortality rates to have lower leuococyte levels pre-surgery (Connolly et al., 2013)
#Time for a CFA based on a two-factor model and using the second half of the data. #t-rule: Observed pieces of information > unknown parameters to be estimated See PSYC 7302 Week 5_CFA.pdf #We have p means, so 8 means [p(p+1)]/2 or 22.5 variances/covariances t<less than the above to be overidentified. We have 16 parameters to estimate. #The 8 residuals of each of our 8 variables, and the residuals of the variances of the factors (2).Plus 6 loadings since we will hold constant the first one of each factor loading. In short our model is over-identified.
library(lavaan)
## This is lavaan 0.6-19
## lavaan is FREE software! Please report any bugs.
##
## Attaching package: 'lavaan'
## The following object is masked from 'package:psych':
##
## cor2cov
cfa_model<-'
Vulnerability=~Age+Comorbidity+Albumine+Hemoglobine
Waste_Accumulation =~ Creatinine + Urea + Thrombocytes+Leucocytes
Vulnerability~~Waste_Accumulation
'
fit.cfa_model=cfa(cfa_model, data=cfa_full, estimator="MLMV")
## Warning: lavaan->lav_data_full():
## some observed variances are (at least) a factor 1000 times larger than
## others; use varTable(fit) to investigate
summary(fit.cfa_model, fit.measures=TRUE, standardized=TRUE, rsquare=TRUE)
## lavaan 0.6-19 ended normally after 287 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 17
##
## Number of observations 213
##
## Model Test User Model:
## Standard Scaled
## Test Statistic 71.023 53.128
## Degrees of freedom 19 19
## P-value (Chi-square) 0.000 0.000
## Scaling correction factor 1.463
## Shift parameter 4.596
## simple second-order correction
##
## Model Test Baseline Model:
##
## Test statistic 321.363 146.004
## Degrees of freedom 28 28
## P-value 0.000 0.000
## Scaling correction factor 2.413
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.823 0.711
## Tucker-Lewis Index (TLI) 0.739 0.574
##
## Robust Comparative Fit Index (CFI) 0.825
## Robust Tucker-Lewis Index (TLI) 0.742
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -5299.686 -5299.686
## Loglikelihood unrestricted model (H1) -5264.175 -5264.175
##
## Akaike (AIC) 10633.372 10633.372
## Bayesian (BIC) 10690.514 10690.514
## Sample-size adjusted Bayesian (SABIC) 10636.646 10636.646
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.113 0.092
## 90 Percent confidence interval - lower 0.086 0.063
## 90 Percent confidence interval - upper 0.142 0.122
## P-value H_0: RMSEA <= 0.050 0.000 0.010
## P-value H_0: RMSEA >= 0.080 0.976 0.768
##
## Robust RMSEA 0.111
## 90 Percent confidence interval - lower 0.076
## 90 Percent confidence interval - upper 0.147
## P-value H_0: Robust RMSEA <= 0.050 0.003
## P-value H_0: Robust RMSEA >= 0.080 0.931
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.080 0.080
##
## Parameter Estimates:
##
## Standard errors Robust.sem
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## Vulnerability =~
## Age 1.000 7.055 0.620
## Comorbidity 0.050 0.016 3.149 0.002 0.350 0.333
## Albumine -0.245 0.062 -3.957 0.000 -1.728 -0.425
## Hemoglobine -0.055 0.017 -3.194 0.001 -0.390 -0.373
## Waste_Accumulation =~
## Creatinine 1.000 26.326 0.819
## Urea 0.133 0.019 7.047 0.000 3.494 0.915
## Thrombocytes -0.451 0.158 -2.855 0.004 -11.867 -0.178
## Leucocytes -0.003 0.009 -0.343 0.731 -0.082 -0.025
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## Vulnerability ~~
## Waste_Accumltn 93.280 20.317 4.591 0.000 0.502 0.502
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .Age 79.753 17.224 4.630 0.000 79.753 0.616
## .Comorbidity 0.977 0.106 9.235 0.000 0.977 0.889
## .Albumine 13.569 2.404 5.643 0.000 13.569 0.820
## .Hemoglobine 0.943 0.134 7.025 0.000 0.943 0.861
## .Creatinine 339.168 88.005 3.854 0.000 339.168 0.329
## .Urea 2.365 1.601 1.477 0.140 2.365 0.162
## .Thrombocytes 4319.538 574.254 7.522 0.000 4319.538 0.968
## .Leucocytes 10.543 1.044 10.100 0.000 10.543 0.999
## Vulnerability 49.780 15.487 3.214 0.001 1.000 1.000
## Waste_Accumltn 693.067 201.759 3.435 0.001 1.000 1.000
##
## R-Square:
## Estimate
## Age 0.384
## Comorbidity 0.111
## Albumine 0.180
## Hemoglobine 0.139
## Creatinine 0.671
## Urea 0.838
## Thrombocytes 0.032
## Leucocytes 0.001
#Because Thrombocytes and Leucocytes were poor indicators, I removed them from my model.
cfa_model_refined <- '
Vulnerability =~ Age + Comorbidity + Albumine + Hemoglobine
Waste_Accumulation =~ Creatinine + Urea
Vulnerability ~~ Waste_Accumulation
'
fit_refined <- cfa(cfa_model_refined, data = cfa_full, estimator = "MLMV")
summary(fit_refined, fit.measures = TRUE, standardized = TRUE, rsquare = TRUE)
## lavaan 0.6-19 ended normally after 232 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 13
##
## Number of observations 213
##
## Model Test User Model:
## Standard Scaled
## Test Statistic 18.322 15.260
## Degrees of freedom 8 8
## P-value (Chi-square) 0.019 0.054
## Scaling correction factor 1.280
## Shift parameter 0.946
## simple second-order correction
##
## Model Test Baseline Model:
##
## Test statistic 263.018 102.168
## Degrees of freedom 15 15
## P-value 0.000 0.000
## Scaling correction factor 2.753
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.958 0.917
## Tucker-Lewis Index (TLI) 0.922 0.844
##
## Robust Comparative Fit Index (CFI) 0.961
## Robust Tucker-Lewis Index (TLI) 0.927
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -3552.203 -3552.203
## Loglikelihood unrestricted model (H1) -3543.041 -3543.041
##
## Akaike (AIC) 7130.405 7130.405
## Bayesian (BIC) 7174.102 7174.102
## Sample-size adjusted Bayesian (SABIC) 7132.909 7132.909
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.078 0.065
## 90 Percent confidence interval - lower 0.030 0.000
## 90 Percent confidence interval - upper 0.125 0.115
## P-value H_0: RMSEA <= 0.050 0.144 0.265
## P-value H_0: RMSEA >= 0.080 0.517 0.353
##
## Robust RMSEA 0.074
## 90 Percent confidence interval - lower 0.000
## 90 Percent confidence interval - upper 0.130
## P-value H_0: Robust RMSEA <= 0.050 0.210
## P-value H_0: Robust RMSEA >= 0.080 0.479
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.044 0.044
##
## Parameter Estimates:
##
## Standard errors Robust.sem
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## Vulnerability =~
## Age 1.000 6.942 0.610
## Comorbidity 0.048 0.016 3.083 0.002 0.335 0.320
## Albumine -0.251 0.062 -4.021 0.000 -1.743 -0.428
## Hemoglobine -0.059 0.018 -3.325 0.001 -0.409 -0.391
## Waste_Accumulation =~
## Creatinine 1.000 25.037 0.779
## Urea 0.147 0.024 6.196 0.000 3.672 0.962
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## Vulnerability ~~
## Waste_Accumltn 86.570 20.086 4.310 0.000 0.498 0.498
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .Age 81.347 17.418 4.670 0.000 81.347 0.628
## .Comorbidity 0.987 0.106 9.353 0.000 0.987 0.898
## .Albumine 13.514 2.396 5.641 0.000 13.514 0.816
## .Hemoglobine 0.928 0.134 6.947 0.000 0.928 0.847
## .Creatinine 405.406 93.584 4.332 0.000 405.406 0.393
## .Urea 1.095 2.057 0.532 0.594 1.095 0.075
## Vulnerability 48.185 15.048 3.202 0.001 1.000 1.000
## Waste_Accumltn 626.829 195.060 3.214 0.001 1.000 1.000
##
## R-Square:
## Estimate
## Age 0.372
## Comorbidity 0.102
## Albumine 0.184
## Hemoglobine 0.153
## Creatinine 0.607
## Urea 0.925
#This model demonstrates good fit! Yay!
#Multi-group comparison using this model:
cfa_full$Dementia<- as.factor(cfa_full$Dementia)
table(cfa_full$Dementia)
##
## 1 2
## 158 55
#Configural invariance
fit_config <- cfa(cfa_model_refined, data = cfa_full, group = "Dementia", estimator = "MLMV")
## Warning: lavaan->lav_object_post_check():
## some estimated ov variances are negative
summary(fit_config, fit.measures = TRUE, standardized = TRUE, rsquare = TRUE)
## lavaan 0.6-19 ended normally after 1007 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 38
##
## Number of observations per group:
## 1 158
## 2 55
##
## Model Test User Model:
## Standard Scaled
## Test Statistic 30.074 26.177
## Degrees of freedom 16 16
## P-value (Chi-square) 0.018 0.052
## Scaling correction factor 1.320
## Shift parameter 3.394
## simple second-order correction
## Test statistic for each group:
## 1 16.015 16.015
## 2 10.162 10.162
##
## Model Test Baseline Model:
##
## Test statistic 290.300 115.520
## Degrees of freedom 30 30
## P-value 0.000 0.000
## Scaling correction factor 2.868
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.946 0.881
## Tucker-Lewis Index (TLI) 0.899 0.777
##
## Robust Comparative Fit Index (CFI) 0.945
## Robust Tucker-Lewis Index (TLI) 0.897
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -3529.080 -3529.080
## Loglikelihood unrestricted model (H1) -3514.043 -3514.043
##
## Akaike (AIC) 7134.161 7134.161
## Bayesian (BIC) 7261.890 7261.890
## Sample-size adjusted Bayesian (SABIC) 7141.479 7141.479
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.091 0.077
## 90 Percent confidence interval - lower 0.037 0.000
## 90 Percent confidence interval - upper 0.140 0.129
## P-value H_0: RMSEA <= 0.050 0.091 0.188
## P-value H_0: RMSEA >= 0.080 0.673 0.506
##
## Robust RMSEA 0.089
## 90 Percent confidence interval - lower 0.000
## 90 Percent confidence interval - upper 0.148
## P-value H_0: Robust RMSEA <= 0.050 0.148
## P-value H_0: Robust RMSEA >= 0.080 0.631
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.057 0.057
##
## Parameter Estimates:
##
## Standard errors Robust.sem
## Information Expected
## Information saturated (h1) model Structured
##
##
## Group 1 [1]:
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## Vulnerability =~
## Age 1.000 7.367 0.661
## Comorbidity 0.056 0.018 3.024 0.002 0.409 0.383
## Albumine -0.246 0.070 -3.518 0.000 -1.814 -0.421
## Hemoglobine -0.048 0.018 -2.671 0.008 -0.356 -0.334
## Waste_Accumulation =~
## Creatinine 1.000 26.357 0.802
## Urea 0.143 0.023 6.272 0.000 3.780 0.998
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## Vulnerability ~~
## Waste_Accumltn 94.135 23.446 4.015 0.000 0.485 0.485
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .Age 78.791 0.886 88.903 0.000 78.791 7.073
## .Comorbidity 1.234 0.085 14.521 0.000 1.234 1.155
## .Albumine 37.076 0.343 108.239 0.000 37.076 8.611
## .Hemoglobine 7.868 0.085 92.988 0.000 7.868 7.398
## .Creatinine 90.487 2.613 34.628 0.000 90.487 2.755
## .Urea 7.589 0.301 25.197 0.000 7.589 2.005
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .Age 69.835 16.042 4.353 0.000 69.835 0.563
## .Comorbidity 0.974 0.116 8.410 0.000 0.974 0.853
## .Albumine 15.247 3.176 4.801 0.000 15.247 0.822
## .Hemoglobine 1.005 0.164 6.117 0.000 1.005 0.888
## .Creatinine 384.203 111.762 3.438 0.001 384.203 0.356
## .Urea 0.044 2.072 0.021 0.983 0.044 0.003
## Vulnerability 54.267 18.871 2.876 0.004 1.000 1.000
## Waste_Accumltn 694.680 253.115 2.745 0.006 1.000 1.000
##
## R-Square:
## Estimate
## Age 0.437
## Comorbidity 0.147
## Albumine 0.178
## Hemoglobine 0.112
## Creatinine 0.644
## Urea 0.997
##
##
## Group 2 [2]:
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## Vulnerability =~
## Age 1.000 0.357 0.033
## Comorbidity -0.277 1.571 -0.176 0.860 -0.099 -0.101
## Albumine -2.268 12.102 -0.187 0.851 -0.809 -0.253
## Hemoglobine -3.266 18.106 -0.180 0.857 -1.166 -1.235
## Waste_Accumulation =~
## Creatinine 1.000 20.367 0.682
## Urea 0.165 0.078 2.127 0.033 3.361 0.869
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## Vulnerability ~~
## Waste_Accumltn 2.747 15.948 0.172 0.863 0.378 0.378
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .Age 84.964 1.458 58.285 0.000 84.964 7.859
## .Comorbidity 1.382 0.132 10.441 0.000 1.382 1.408
## .Albumine 36.182 0.432 83.781 0.000 36.182 11.297
## .Hemoglobine 7.498 0.127 58.896 0.000 7.498 7.942
## .Creatinine 93.309 4.028 23.166 0.000 93.309 3.124
## .Urea 8.225 0.522 15.764 0.000 8.225 2.126
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .Age 116.744 54.786 2.131 0.033 116.744 0.999
## .Comorbidity 0.954 0.185 5.164 0.000 0.954 0.990
## .Albumine 9.603 2.056 4.671 0.000 9.603 0.936
## .Hemoglobine -0.468 1.471 -0.318 0.750 -0.468 -0.525
## .Creatinine 477.507 118.817 4.019 0.000 477.507 0.535
## .Urea 3.676 5.081 0.723 0.469 3.676 0.245
## Vulnerability 0.127 1.369 0.093 0.926 1.000 1.000
## Waste_Accumltn 414.815 245.935 1.687 0.092 1.000 1.000
##
## R-Square:
## Estimate
## Age 0.001
## Comorbidity 0.010
## Albumine 0.064
## Hemoglobine NA
## Creatinine 0.465
## Urea 0.755
#I tried removing the worst indicators.
cfa_model_trimmed <- '
Vulnerability =~ Age + Comorbidity + Albumine
Waste_Accumulation =~ Creatinine
Vulnerability ~~ Waste_Accumulation
'
fit_trimmed <- cfa(cfa_model_trimmed, data = cfa_full, group = "Dementia", estimator = "MLMV")
## Warning: lavaan->lav_lavaan_step11_estoptim():
## Model estimation FAILED! Returning starting values.
summary(fit_trimmed, fit.measures = TRUE, standardized = TRUE, rsquare = TRUE)
## Warning: lavaan->lav_object_summary():
## fit measures not available if model did not converge
## lavaan 0.6-19 did NOT end normally after 761 iterations
## ** WARNING ** Estimates below are most likely unreliable
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 24
##
## Number of observations per group:
## 1 158
## 2 55
##
##
## Parameter Estimates:
##
## Standard errors Robust.sem
## Information Expected
## Information saturated (h1) model Structured
##
##
## Group 1 [1]:
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## Vulnerability =~
## Age 1.000 0.000 0.000
## Comorbidity -2812.426 NA -0.383 -0.361
## Albumine 10957.010 NA 1.492 0.344
## Waste_Accumulation =~
## Creatinine 1.000 32.822 1.000
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## Vulnerability ~~
## Waste_Accumltn -0.003 NA -0.688 -0.688
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .Age 78.791 NA 78.791 7.073
## .Comorbidity 1.234 NA 1.234 1.163
## .Albumine 37.076 NA 37.076 8.555
## .Creatinine 90.487 NA 90.487 2.757
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .Age 124.103 NA 124.103 1.000
## .Comorbidity 0.980 NA 0.980 0.870
## .Albumine 16.556 NA 16.556 0.882
## .Creatinine 0.000 0.000 0.000
## Vulnerability 0.000 NA 1.000 1.000
## Waste_Accumltn 1077.274 NA 1.000 1.000
##
## R-Square:
## Estimate
## Age 0.000
## Comorbidity 0.130
## Albumine 0.118
## Creatinine 1.000
##
##
## Group 2 [2]:
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## Vulnerability =~
## Age 1.000 0.012 0.001
## Comorbidity -5.060 NA -0.059 -0.060
## Albumine 938.711 NA 10.986 3.426
## Waste_Accumulation =~
## Creatinine 1.000 29.941 1.000
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## Vulnerability ~~
## Waste_Accumltn -0.004 NA -0.011 -0.011
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .Age 84.964 NA 84.964 7.859
## .Comorbidity 1.382 NA 1.382 1.408
## .Albumine 36.182 NA 36.182 11.285
## .Creatinine 93.309 NA 93.309 3.116
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .Age 116.874 NA 116.874 1.000
## .Comorbidity 0.960 NA 0.960 0.996
## .Albumine -110.408 NA -110.408 -10.740
## .Creatinine 0.000 0.000 0.000
## Vulnerability 0.000 NA 1.000 1.000
## Waste_Accumltn 896.434 NA 1.000 1.000
##
## R-Square:
## Estimate
## Age 0.000
## Comorbidity 0.004
## Albumine NA
## Creatinine 1.000
#install.packages("semPlot") # If not already installed
library(semPlot)
semPaths(fit_refined,
what = "std", # standardized estimates
layout = "tree", # layout style: "tree", "circle", etc.
edge.label.cex = .8,# text size for edge labels
sizeMan = 8, # size of manifest (observed) variables
sizeLat = 12, # size of latent variables
residuals = TRUE, # show residual variances
intercepts = FALSE, # usually not shown
nCharNodes = 0 # do not truncate variable names
)