`— title: “Arbuckle SEM Revised Final Project” output: html_notebook —

library(psfmi)
data(hipstudy)
# select variables
myvars_revised<- c("Age", "Comorbidity", "ASA", "Hemoglobine", "Leucocytes", "Thrombocytes", "CRP", "Creatinine", "Urea", "Albumine", "Delay")
datasubset_revised<- hipstudy[myvars_revised]

Let’s install/load a few more helpful packages.

#install.packages("MVN")
library(psych)
library(MVN)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
library(nortest)

Before running EFA, it’s recommended to check that several assumptions/conditions are met.

#Checking for missing data

colSums(is.na(datasubset_revised))
##          Age  Comorbidity          ASA  Hemoglobine   Leucocytes Thrombocytes 
##            0            0            0            0            0            0 
##          CRP   Creatinine         Urea     Albumine        Delay 
##            0            0            0            0            0

#It looks like there is no missing data.

#Time to assess sample size:

n <- nrow(datasubset_revised)
p <- ncol(datasubset_revised)
cat("Sample size:", n, "\nVariables:", p, "\nRatio (n/p):", n/p, "\n")
## Sample size: 426 
## Variables: 11 
## Ratio (n/p): 38.72727

#The ratio aimed for was 5-10 at least, ideally. 38.727 exceeds this ratio, which is desired.

#Now, I will check for multivariate normality.

mvn(datasubset_revised, mvnTest = "mardia")
## $multivariateNormality
##              Test        Statistic p value Result
## 1 Mardia Skewness 6133.36050708347       0     NO
## 2 Mardia Kurtosis 67.9676630136493       0     NO
## 3             MVN             <NA>    <NA>     NO
## 
## $univariateNormality
##                Test     Variable Statistic   p value Normality
## 1  Anderson-Darling     Age         7.6348  <0.001      NO    
## 2  Anderson-Darling Comorbidity    21.0843  <0.001      NO    
## 3  Anderson-Darling     ASA        43.2942  <0.001      NO    
## 4  Anderson-Darling Hemoglobine     2.5352  <0.001      NO    
## 5  Anderson-Darling  Leucocytes     2.9584  <0.001      NO    
## 6  Anderson-Darling Thrombocytes    7.8847  <0.001      NO    
## 7  Anderson-Darling     CRP        63.9900  <0.001      NO    
## 8  Anderson-Darling  Creatinine    12.0004  <0.001      NO    
## 9  Anderson-Darling     Urea       16.8956  <0.001      NO    
## 10 Anderson-Darling   Albumine      2.6460  <0.001      NO    
## 11 Anderson-Darling    Delay       92.1011  <0.001      NO    
## 
## $Descriptives
##                n       Mean    Std.Dev Median   Min    Max    25th   75th
## Age          426  79.701878 11.0045059  81.00 29.00  99.00  75.000  88.00
## Comorbidity  426   1.185446  1.0675423   1.00  0.00   4.00   0.000   2.00
## ASA          426   2.061033  0.6728572   2.00  1.00   4.00   2.000   2.00
## Hemoglobine  426   7.864554  0.9652245   7.90  3.50  10.50   7.300   8.40
## Leucocytes   426  10.888967  3.3007800  10.45  3.10  22.10   8.400  12.90
## Thrombocytes 426 235.394366 67.5793172 221.00 91.00 495.00 189.000 271.00
## CRP          426  20.880282 34.9368134   6.00  1.00 186.00   3.000  18.00
## Creatinine   426  89.380282 29.9264351  85.00 36.00 275.00  71.000  99.00
## Urea         426   7.463850  3.5140201   6.60  1.20  29.60   5.225   8.60
## Albumine     426  36.943662  4.0926470  37.00 21.00  53.00  34.250  40.00
## Delay        426   1.682676  4.0820019   0.85  0.04  48.66   0.590   1.21
##                    Skew   Kurtosis
## Age          -1.2903000  2.4964032
## Comorbidity   0.7039065 -0.1444506
## ASA           0.4367115  0.5257324
## Hemoglobine  -0.6462986  1.4666297
## Leucocytes    0.6286738  0.1761800
## Thrombocytes  1.1158181  1.4666517
## CRP           2.6944097  7.2831436
## Creatinine    2.0680682  7.4887391
## Urea          2.1934118  7.6158605
## Albumine     -0.3610894  1.5714318
## Delay         7.7333950 72.5910706

#Hmm, it doesn’t look like multivariate normality was met.

#Checking for linearity

pairs(datasubset_revised, main = "Scatterplot Matrix")

#Now, let’s check the correlation matrix

cor_matrix_revised<- cor(datasubset_revised, use = "pairwise.complete.obs")
print(round(cor_matrix_revised, 2))
##                Age Comorbidity   ASA Hemoglobine Leucocytes Thrombocytes   CRP
## Age           1.00        0.25  0.42       -0.22       0.03        -0.02  0.13
## Comorbidity   0.25        1.00  0.66        0.02       0.08        -0.02  0.03
## ASA           0.42        0.66  1.00       -0.13       0.01        -0.06  0.12
## Hemoglobine  -0.22        0.02 -0.13        1.00       0.14        -0.16 -0.10
## Leucocytes    0.03        0.08  0.01        0.14       1.00         0.23 -0.05
## Thrombocytes -0.02       -0.02 -0.06       -0.16       0.23         1.00  0.03
## CRP           0.13        0.03  0.12       -0.10      -0.05         0.03  1.00
## Creatinine    0.25        0.20  0.16       -0.07      -0.05        -0.17  0.12
## Urea          0.32        0.21  0.26       -0.21      -0.04        -0.12  0.17
## Albumine     -0.28       -0.13 -0.23        0.26       0.06        -0.14 -0.34
## Delay         0.09        0.13  0.31       -0.02       0.03         0.03  0.15
##              Creatinine  Urea Albumine Delay
## Age                0.25  0.32    -0.28  0.09
## Comorbidity        0.20  0.21    -0.13  0.13
## ASA                0.16  0.26    -0.23  0.31
## Hemoglobine       -0.07 -0.21     0.26 -0.02
## Leucocytes        -0.05 -0.04     0.06  0.03
## Thrombocytes      -0.17 -0.12    -0.14  0.03
## CRP                0.12  0.17    -0.34  0.15
## Creatinine         1.00  0.73    -0.13  0.07
## Urea               0.73  1.00    -0.17  0.05
## Albumine          -0.13 -0.17     1.00 -0.04
## Delay              0.07  0.05    -0.04  1.00

#It looks like we have a mixture of moderate correlations between observed variables (.3-.8)

#Bartlett’s Test of Sphericity. This checks that the correlation matrix is significantly different from the identity matrix.

cortest.bartlett(cor_matrix_revised, n = nrow(datasubset_revised))
## $chisq
## [1] 1021.555
## 
## $p.value
## [1] 4.406373e-178
## 
## $df
## [1] 55

#P<.05, just what we want. This indicates sufficient correlation.

#Kaiser–Meyer–Olkin (KMO) test to see if the proportion of variance in my observed variables might be caused by underlying factors. I want values close to 1. This is a test of sampling adequacy.

KMO(datasubset_revised)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = datasubset_revised)
## Overall MSA =  0.6
## MSA for each item = 
##          Age  Comorbidity          ASA  Hemoglobine   Leucocytes Thrombocytes 
##         0.79         0.58         0.58         0.58         0.43         0.48 
##          CRP   Creatinine         Urea     Albumine        Delay 
##         0.64         0.56         0.60         0.69         0.51

#For the most part, I want values >.60. Most of these values are close to or exceed this. #Only two of my values are under .5 and both are very close, so I won’t remove them. #This test suggests that factor analysis may be useful for this data.

#Now, let’s check for multicollinearity.

# Use VIFs as a proxy for multicollinearity
# This requires a placeholder outcome just to run VIF
fake_outcome <- rnorm(nrow(datasubset_revised))  # Dummy DV
vif_model <- lm(fake_outcome ~ ., data = datasubset_revised)
vif(vif_model)
##          Age  Comorbidity          ASA  Hemoglobine   Leucocytes Thrombocytes 
##     1.367090     1.891557     2.331604     1.229845     1.117378     1.178738 
##          CRP   Creatinine         Urea     Albumine        Delay 
##     1.182125     2.277489     2.431080     1.313917     1.151702

#Fortunately, none of my VIFS >5. Multicollinearity does not seem to be a problem.

#Checking for outliers visually using Box Plot:

While there are some more extreme values, they are generally grouped together, not isolated or singular. Therefore, I will not remove them.

boxplot(datasubset_revised)

#Now, I am going to split my data into two parts. Part 1 will undergo EFA. Part 2 will be undergo CFA. This will help me avoid conflating my model fit. 

set.seed(123) 
# Calculate number of rows
n <- nrow(hipstudy)

# Create a random sample of row indices for the EFA subset
efa_indices <- sample(1:n, size = floor(n / 2))  # Use floor to ensure integer

# Split the data
efa_full <- hipstudy[efa_indices, ]           # First half for EFA
cfa_full <- hipstudy[-efa_indices, ]          # Remaining for CFA
# Define your EFA variable names
efa_vars <- c("Age", "Comorbidity", "ASA", "Hemoglobine", "Leucocytes", "Thrombocytes", "CRP", "Creatinine", "Urea", "Albumine", "Delay")

# Extract just the variables of interest for EFA
efa_data <- efa_full[, efa_vars]

#Time for my EFA! #I need to figure out how many components best fit the data. #I will do this using eigenvalues #But first, let’s do a correlation matrix.

cordata_revised=cor(efa_data)
cordata_revised
##                      Age Comorbidity         ASA Hemoglobine  Leucocytes
## Age           1.00000000  0.22316090  0.46295154 -0.22301628 -0.01445548
## Comorbidity   0.22316090  1.00000000  0.65662374  0.05004786  0.07564360
## ASA           0.46295154  0.65662374  1.00000000 -0.13337100 -0.03041199
## Hemoglobine  -0.22301628  0.05004786 -0.13337100  1.00000000  0.19810061
## Leucocytes   -0.01445548  0.07564360 -0.03041199  0.19810061  1.00000000
## Thrombocytes  0.05611413  0.04265665 -0.09034920 -0.09587613  0.26400629
## CRP           0.16617834  0.03016959  0.06319467 -0.06881975 -0.06398498
## Creatinine    0.25989392  0.21675602  0.22743288 -0.02188167 -0.13164368
## Urea          0.35252980  0.31870910  0.38936152 -0.15837217 -0.05947338
## Albumine     -0.32579487 -0.11354707 -0.20772364  0.27839220  0.04857372
## Delay         0.11486655  0.06816184  0.25895950 -0.03981696 -0.01223695
##              Thrombocytes          CRP  Creatinine        Urea    Albumine
## Age            0.05611413  0.166178341  0.25989392  0.35252980 -0.32579487
## Comorbidity    0.04265665  0.030169590  0.21675602  0.31870910 -0.11354707
## ASA           -0.09034920  0.063194674  0.22743288  0.38936152 -0.20772364
## Hemoglobine   -0.09587613 -0.068819746 -0.02188167 -0.15837217  0.27839220
## Leucocytes     0.26400629 -0.063984980 -0.13164368 -0.05947338  0.04857372
## Thrombocytes   1.00000000  0.076120256 -0.12427223 -0.08715152 -0.09588076
## CRP            0.07612026  1.000000000  0.06211138  0.11224029 -0.30130571
## Creatinine    -0.12427223  0.062111379  1.00000000  0.70411674 -0.08196506
## Urea          -0.08715152  0.112240294  0.70411674  1.00000000 -0.16018943
## Albumine      -0.09588076 -0.301305706 -0.08196506 -0.16018943  1.00000000
## Delay         -0.08730885 -0.004278872  0.02976056  0.05808406  0.02218599
##                     Delay
## Age           0.114866550
## Comorbidity   0.068161840
## ASA           0.258959500
## Hemoglobine  -0.039816960
## Leucocytes   -0.012236950
## Thrombocytes -0.087308849
## CRP          -0.004278872
## Creatinine    0.029760565
## Urea          0.058084062
## Albumine      0.022185990
## Delay         1.000000000

#I’ll also look at data descriptives:

describe(efa_data)
##              vars   n   mean    sd median trimmed   mad   min    max  range
## Age             1 213  79.02 10.57  80.00   80.18  8.90 39.00  97.00  58.00
## Comorbidity     2 213   1.10  1.08   1.00    0.95  1.48  0.00   4.00   4.00
## ASA             3 213   2.02  0.67   2.00    2.00  0.00  1.00   4.00   3.00
## Hemoglobine     4 213   7.96  0.87   8.00    7.97  0.74  5.10  10.50   5.40
## Leucocytes      5 213  10.61  3.33   9.90   10.35  3.26  3.10  22.10  19.00
## Thrombocytes    6 213 236.38 68.35 220.00  228.35 53.37 91.00 481.00 390.00
## CRP             7 213  20.72 34.74   6.00   12.13  5.93  1.00 181.00 180.00
## Creatinine      8 213  87.54 27.41  84.00   84.60 20.76 36.00 253.00 217.00
## Urea            9 213   7.17  3.15   6.50    6.74  2.22  2.20  25.70  23.50
## Albumine       10 213  37.04  4.11  37.00   37.05  4.45 22.00  48.00  26.00
## Delay          11 213   1.51  3.83   0.83    0.86  0.40  0.04  48.66  48.62
##               skew kurtosis   se
## Age          -1.11     1.59 0.72
## Comorbidity   0.84     0.08 0.07
## ASA           0.44     0.56 0.05
## Hemoglobine  -0.22     0.75 0.06
## Leucocytes    0.70     0.30 0.23
## Thrombocytes  1.15     1.41 4.68
## CRP           2.71     7.39 2.38
## Creatinine    2.22     9.65 1.88
## Urea          2.02     6.60 0.22
## Albumine     -0.26     0.87 0.28
## Delay         9.43   107.21 0.26
#This will let me check sample size and number of observed items
dim(efa_data)
## [1] 213  11

#Most of the participants had normal Creatine, low hemoglobine, normal Leucocyte levels (though on the higher range of normal), and normal Thrombocyte levels, low Urea levels, and normal Albumine levels. #Ok, now let’s see how many factors fit our data.

eigen(cordata_revised)
## eigen() decomposition
## $values
##  [1] 2.7718723 1.4830265 1.3778339 1.1514246 0.9441017 0.8845681 0.7081779
##  [8] 0.6068945 0.5650293 0.2700076 0.2370637
## 
## $vectors
##              [,1]        [,2]        [,3]        [,4]         [,5]        [,6]
##  [1,] -0.40376346  0.19833503 -0.06270095  0.07439643  0.098088625 -0.09245348
##  [2,] -0.36365124 -0.19978668 -0.41001534  0.05909386 -0.134718949  0.42477764
##  [3,] -0.45977247 -0.12746320 -0.24909389  0.31074436 -0.053573436  0.23764001
##  [4,]  0.17705474 -0.41913690 -0.27208436 -0.22928149 -0.556975051 -0.08542448
##  [5,]  0.07773252  0.02232018 -0.63508876 -0.24393983  0.073203276 -0.30949276
##  [6,]  0.03800457  0.41926453 -0.43666239 -0.24774916  0.323494804 -0.13036012
##  [7,] -0.15456979  0.40524133  0.09112876 -0.12190958 -0.698725554 -0.24217574
##  [8,] -0.37319022 -0.26889792  0.24486854 -0.45096686  0.099930916 -0.23786393
##  [9,] -0.45317870 -0.17833540  0.14637268 -0.35687897  0.149164366 -0.17966148
## [10,]  0.26221321 -0.50972913 -0.04937923 -0.01962885  0.169213406 -0.10525677
## [11,] -0.13121577 -0.15617627 -0.07575120  0.61235105  0.006612604 -0.68984135
##              [,7]        [,8]        [,9]       [,10]         [,11]
##  [1,]  0.42350303  0.72249118 -0.03790267 -0.15456964  0.2081497458
##  [2,] -0.28744863 -0.16004823  0.00673229 -0.38740571  0.4464106836
##  [3,] -0.03009517  0.03456686 -0.11448439  0.34935247 -0.6458755970
##  [4,]  0.13380808  0.26781255  0.46759739  0.20144946  0.0002473773
##  [5,]  0.42536244 -0.30994680 -0.37297025 -0.09081111 -0.0715325763
##  [6,] -0.52465533  0.21003417  0.32756381  0.11535686 -0.0953610971
##  [7,] -0.30090548  0.04371443 -0.38304021 -0.04033514 -0.0324496753
##  [8,] -0.06892136 -0.05732254  0.17128557 -0.51369919 -0.3965654962
##  [9,] -0.04932511 -0.15632134 -0.09227769  0.60928830  0.3910153419
## [10,] -0.37680720  0.43636499 -0.54395248 -0.02404716 -0.0049046346
## [11,] -0.15264612 -0.13872154  0.19975285 -0.06721507  0.1216180961

#I will use both Kaiser’s Rule and a Scree Plot to determine how many components #to keep. Kaiser’s Rule suggests retaining factors with eignevalues greater than or equal to 1. Factors 1-4 meet this rule. I will, though, also use a scree plot.

eig_vals <- eigen(cordata_revised)$values
plot(eig_vals, type = "b",
     xlab = "Component Number",
     ylab = "Eigenvalue",
     main = "Scree Plot of Eigenvalues")
abline(h = 1, col = "red", lty = 2)  # Optional: Kaiser criterion line

#Factors 1-4 fall above the criterion line. We will retain 4 factors.

#Now, let’s conduct EFA using PFA.

res.paf_revised=fa(cordata_revised, nfactors=4, fm="pa", max.iter = 100)
## maximum iteration exceeded
## Loading required namespace: GPArotation
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully

#I am going to try increasing the iterations.

res.paf_revised_1=fa(cordata_revised, nfactors=4, fm="pa", max.iter = 1000)
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully

#That didn’t help. Since the data isn’t normal, I’m going to try to use Minimum Residual (Minres). This is a robust method good for non-normal data.

res.paf_revised2<- fa(cordata_revised, nfactors = 4, fm = "minres")
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully

#That didn’t work either. Let’s take a look and see which variables might be causing us issues. In particular, variables with low communality (<.20) or high factor loadings (>.90). These often lead to Heywood cases.

print(res.paf_revised2$communality, digits = 3)
##          Age  Comorbidity          ASA  Hemoglobine   Leucocytes Thrombocytes 
##       0.3686       0.4461       1.0243       0.2289       0.4334       0.2498 
##          CRP   Creatinine         Urea     Albumine        Delay 
##       0.1329       0.7958       0.6851       0.4365       0.0659
print(res.paf_revised2$loadings)
## 
## Loadings:
##              MR1    MR2    MR3    MR4   
## Age           0.266  0.172 -0.392       
## Comorbidity   0.603  0.121         0.196
## ASA           1.014                     
## Hemoglobine                 0.440  0.189
## Leucocytes                         0.647
## Thrombocytes -0.127        -0.280  0.416
## CRP                        -0.368       
## Creatinine           0.916              
## Urea          0.109  0.758 -0.101       
## Albumine                    0.657       
## Delay         0.265                     
## 
##                  MR1   MR2   MR3   MR4
## SS loadings    1.569 1.472 1.018 0.678
## Proportion Var 0.143 0.134 0.093 0.062
## Cumulative Var 0.143 0.276 0.369 0.431

#Hmm. I see a few problemmatic variables but I’m hesitant to remove them just yet. #I will run parallel analysis to make sure I’m not overfactoring.

fa.parallel(cordata_revised, fm = "minres", fa = "fa")
## Warning in fa.parallel(cordata_revised, fm = "minres", fa = "fa"): It seems as
## if you are using a correlation matrix, but have not specified the number of
## cases. The number of subjects is arbitrarily set to be 100
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully

## Parallel analysis suggests that the number of factors =  4  and the number of components =  NA

#Hmm. Not super helpful. Let’s try removing ASA, CRP, and Delay, our problemmatic variables and run a 3 factor model.

vars_to_keep <- c("Age", "Comorbidity", "Hemoglobine", "Leucocytes", 
                  "Thrombocytes", "Creatinine", "Urea", "Albumine")

cordata_reduced <- cordata_revised[, vars_to_keep]

res.paf_clean <- fa(cordata_reduced, nfactors = 3, fm = "minres", max.iter = 1000)
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully

#Still some issues. Let’s take a closer look. In particular, variables with low communality (<.20) or high factor loadings (>.90) often lead to Heywood cases.

print(res.paf_clean$communality, digits = 3)
##          Age  Comorbidity  Hemoglobine   Leucocytes Thrombocytes   Creatinine 
##        0.891        0.252        0.552        0.329        1.009        0.913 
##         Urea     Albumine 
##        0.953        0.678
print(res.paf_clean$loadings)
## 
## Loadings:
##              MR3    MR1    MR2   
## Age           0.968        -0.118
## Comorbidity   0.412  0.111 -0.135
## Hemoglobine  -0.556 -0.245 -0.239
## Leucocytes   -0.120 -0.371  0.258
## Thrombocytes                0.987
## Creatinine   -0.101  0.992       
## Urea          0.167  0.873       
## Albumine     -0.759        -0.176
## 
##                  MR3   MR1   MR2
## SS loadings    2.045 1.966 1.163
## Proportion Var 0.256 0.246 0.145
## Cumulative Var 0.256 0.501 0.647
fa.parallel(cordata_reduced, fa = "fa", fm = "minres")
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Parallel analysis suggests that the number of factors =  1  and the number of components =  NA

#We may be overfactoring. Let’s try a two factor model.

res.paf_cleaner<- fa(cordata_reduced, nfactors = 2, fm = "minres", max.iter = 1000)
res.paf_cleaner
## Factor Analysis using method =  minres
## Call: fa(r = cordata_reduced, nfactors = 2, max.iter = 1000, fm = "minres")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                MR1   MR2   h2    u2 com
## Age           0.76  0.20 0.73 0.274 1.1
## Comorbidity   0.33  0.27 0.24 0.762 1.9
## Hemoglobine  -0.73 -0.05 0.56 0.435 1.0
## Leucocytes   -0.03 -0.59 0.36 0.640 1.0
## Thrombocytes  0.43 -0.75 0.52 0.475 1.6
## Creatinine    0.14  0.83 0.78 0.221 1.1
## Urea          0.36  0.76 0.90 0.096 1.4
## Albumine     -0.86  0.03 0.73 0.271 1.0
## 
##                        MR1  MR2
## SS loadings           2.42 2.41
## Proportion Var        0.30 0.30
## Cumulative Var        0.30 0.60
## Proportion Explained  0.50 0.50
## Cumulative Proportion 0.50 1.00
## 
##  With factor correlations of 
##      MR1  MR2
## MR1 1.00 0.34
## MR2 0.34 1.00
## 
## Mean item complexity =  1.3
## Test of the hypothesis that 2 factors are sufficient.
## 
## df null model =  28  with the objective function =  5.4 with Chi Square =  35.09
## df of  the model are 13  and the objective function was  1.02 
## 
## The root mean square of the residuals (RMSR) is  0.06 
## The df corrected root mean square of the residuals is  0.08 
## 
## The harmonic n.obs is  11 with the empirical chi square  1.97  with prob <  1 
## The total n.obs was  11  with Likelihood Chi Square =  5.26  with prob <  0.97 
## 
## Tucker Lewis Index of factoring reliability =  -158.72
## RMSEA index =  0  and the 90 % confidence intervals are  0 0
## BIC =  -25.91
## Fit based upon off diagonal values = 0.98
## Measures of factor score adequacy             
##                                                    MR1  MR2
## Correlation of (regression) scores with factors   0.94 0.95
## Multiple R square of scores with factors          0.89 0.91
## Minimum correlation of possible factor scores     0.78 0.82

#It looks like the two factor model, removing three problemmatic variables, plus more iterations, and minres helped us avoid the Heywood case.

print(res.paf_cleaner$loadings, cutoff = 0.3)
## 
## Loadings:
##              MR1    MR2   
## Age           0.762       
## Comorbidity   0.328       
## Hemoglobine  -0.732       
## Leucocytes          -0.589
## Thrombocytes  0.429 -0.748
## Creatinine           0.827
## Urea          0.361  0.765
## Albumine     -0.865       
## 
##                  MR1   MR2
## SS loadings    2.305 2.290
## Proportion Var 0.288 0.286
## Cumulative Var 0.288 0.574
print(res.paf_cleaner$communality, digits = 3)
##          Age  Comorbidity  Hemoglobine   Leucocytes Thrombocytes   Creatinine 
##        0.726        0.238        0.565        0.360        0.525        0.779 
##         Urea     Albumine 
##        0.904        0.729
fa.diagram(res.paf_cleaner)

#To simplify interpretation, I am going to use an oblimin rotation. This type of rotation allows for some correlation between my factors. Factors 1 and 2 are correlated (.3), hence my decision to use this.

res.paf_cleaner_rotated<- fa(cordata_reduced, nfactors = 2, fm = "minres", rotate = "oblimin")
res.paf_cleaner_rotated
## Factor Analysis using method =  minres
## Call: fa(r = cordata_reduced, nfactors = 2, rotate = "oblimin", fm = "minres")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                MR1   MR2   h2    u2 com
## Age           0.76  0.20 0.73 0.274 1.1
## Comorbidity   0.33  0.27 0.24 0.762 1.9
## Hemoglobine  -0.73 -0.05 0.56 0.435 1.0
## Leucocytes   -0.03 -0.59 0.36 0.640 1.0
## Thrombocytes  0.43 -0.75 0.52 0.475 1.6
## Creatinine    0.14  0.83 0.78 0.221 1.1
## Urea          0.36  0.76 0.90 0.096 1.4
## Albumine     -0.86  0.03 0.73 0.271 1.0
## 
##                        MR1  MR2
## SS loadings           2.42 2.41
## Proportion Var        0.30 0.30
## Cumulative Var        0.30 0.60
## Proportion Explained  0.50 0.50
## Cumulative Proportion 0.50 1.00
## 
##  With factor correlations of 
##      MR1  MR2
## MR1 1.00 0.34
## MR2 0.34 1.00
## 
## Mean item complexity =  1.3
## Test of the hypothesis that 2 factors are sufficient.
## 
## df null model =  28  with the objective function =  5.4 with Chi Square =  35.09
## df of  the model are 13  and the objective function was  1.02 
## 
## The root mean square of the residuals (RMSR) is  0.06 
## The df corrected root mean square of the residuals is  0.08 
## 
## The harmonic n.obs is  11 with the empirical chi square  1.97  with prob <  1 
## The total n.obs was  11  with Likelihood Chi Square =  5.26  with prob <  0.97 
## 
## Tucker Lewis Index of factoring reliability =  -158.72
## RMSEA index =  0  and the 90 % confidence intervals are  0 0
## BIC =  -25.91
## Fit based upon off diagonal values = 0.98
## Measures of factor score adequacy             
##                                                    MR1  MR2
## Correlation of (regression) scores with factors   0.94 0.95
## Multiple R square of scores with factors          0.89 0.91
## Minimum correlation of possible factor scores     0.78 0.82
print(res.paf_cleaner_rotated$loadings, cutoff = 0.3)
## 
## Loadings:
##              MR1    MR2   
## Age           0.762       
## Comorbidity   0.328       
## Hemoglobine  -0.732       
## Leucocytes          -0.589
## Thrombocytes  0.429 -0.748
## Creatinine           0.827
## Urea          0.361  0.765
## Albumine     -0.865       
## 
##                  MR1   MR2
## SS loadings    2.305 2.290
## Proportion Var 0.288 0.286
## Cumulative Var 0.288 0.574
print(res.paf_cleaner_rotated$communality, digits = 3)
##          Age  Comorbidity  Hemoglobine   Leucocytes Thrombocytes   Creatinine 
##        0.726        0.238        0.565        0.360        0.525        0.779 
##         Urea     Albumine 
##        0.904        0.729
fa.diagram(res.paf_cleaner_rotated)

#In Oblimin rotation, loadings of .4 or .5 and above are considered strong loadings

#Based on my results, I will call Factor 1 Vulnerability since it loads positively and strongly with age and negatively and strongly with Albumine and Hemoglobine levels. It also loads less strongly though positively with Comorbidity. Low Albumine levels often indicate underlying liver disease or kidney disease, or even malnutrition. They are also associated with greater mortality after Total Hip Arthroplasty or Hemiarthroplasty (Thomas et al., 2023). Low levels of hemoglobine pre-surgery are associated with chronic disease (Clevenger and Richards, 2014). Higher comorbidities are associated with increased morbidity post surgery, particularly in older adults (Ho et al., 2020). #I will call Factor 2 Waste Accumulation because Waste Products because Creatinine and Urea strongly and positively loaded onto it, while Thrombocytes and Leucocytes strongly but negatively loaded onto it. Note: Most of the participants had normal Creatine, low hemoglobine, normal Leucocyte levels (though on the higher range of normal), and normal Thrombocyte levels, low Urea levels, and normal Albumine levels. Higher creatinine levels pre-surgery have been found to be associated with decreased risk of short-term mortality (Chen et al., 2022). Low BUN scores (Urea scores) were generally associated with decreased mortality (Liu et al., 2022). Pre-surgery, it is generally better to have higher Thrombocyte levels (a higher platelet count) (Estcourt et al., 2017). It is been found that it is better for mortality rates to have lower leuococyte levels pre-surgery (Connolly et al., 2013)

#Time for a CFA based on a two-factor model and using the second half of the data. #t-rule: Observed pieces of information > unknown parameters to be estimated See PSYC 7302 Week 5_CFA.pdf #We have p means, so 8 means [p(p+1)]/2 or 22.5 variances/covariances t<less than the above to be overidentified. We have 16 parameters to estimate. #The 8 residuals of each of our 8 variables, and the residuals of the variances of the factors (2).Plus 6 loadings since we will hold constant the first one of each factor loading. In short our model is over-identified.

library(lavaan)
## This is lavaan 0.6-19
## lavaan is FREE software! Please report any bugs.
## 
## Attaching package: 'lavaan'
## The following object is masked from 'package:psych':
## 
##     cor2cov
cfa_model<-'
Vulnerability=~Age+Comorbidity+Albumine+Hemoglobine
Waste_Accumulation =~ Creatinine + Urea + Thrombocytes+Leucocytes
Vulnerability~~Waste_Accumulation
'
fit.cfa_model=cfa(cfa_model, data=cfa_full, estimator="MLMV")
## Warning: lavaan->lav_data_full():  
##    some observed variances are (at least) a factor 1000 times larger than 
##    others; use varTable(fit) to investigate
summary(fit.cfa_model, fit.measures=TRUE, standardized=TRUE, rsquare=TRUE)
## lavaan 0.6-19 ended normally after 287 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        17
## 
##   Number of observations                           213
## 
## Model Test User Model:
##                                               Standard      Scaled
##   Test Statistic                                71.023      53.128
##   Degrees of freedom                                19          19
##   P-value (Chi-square)                           0.000       0.000
##   Scaling correction factor                                  1.463
##   Shift parameter                                            4.596
##     simple second-order correction                                
## 
## Model Test Baseline Model:
## 
##   Test statistic                               321.363     146.004
##   Degrees of freedom                                28          28
##   P-value                                        0.000       0.000
##   Scaling correction factor                                  2.413
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.823       0.711
##   Tucker-Lewis Index (TLI)                       0.739       0.574
##                                                                   
##   Robust Comparative Fit Index (CFI)                         0.825
##   Robust Tucker-Lewis Index (TLI)                            0.742
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -5299.686   -5299.686
##   Loglikelihood unrestricted model (H1)      -5264.175   -5264.175
##                                                                   
##   Akaike (AIC)                               10633.372   10633.372
##   Bayesian (BIC)                             10690.514   10690.514
##   Sample-size adjusted Bayesian (SABIC)      10636.646   10636.646
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.113       0.092
##   90 Percent confidence interval - lower         0.086       0.063
##   90 Percent confidence interval - upper         0.142       0.122
##   P-value H_0: RMSEA <= 0.050                    0.000       0.010
##   P-value H_0: RMSEA >= 0.080                    0.976       0.768
##                                                                   
##   Robust RMSEA                                               0.111
##   90 Percent confidence interval - lower                     0.076
##   90 Percent confidence interval - upper                     0.147
##   P-value H_0: Robust RMSEA <= 0.050                         0.003
##   P-value H_0: Robust RMSEA >= 0.080                         0.931
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.080       0.080
## 
## Parameter Estimates:
## 
##   Standard errors                           Robust.sem
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                         Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Vulnerability =~                                                           
##     Age                    1.000                               7.055    0.620
##     Comorbidity            0.050    0.016    3.149    0.002    0.350    0.333
##     Albumine              -0.245    0.062   -3.957    0.000   -1.728   -0.425
##     Hemoglobine           -0.055    0.017   -3.194    0.001   -0.390   -0.373
##   Waste_Accumulation =~                                                      
##     Creatinine             1.000                              26.326    0.819
##     Urea                   0.133    0.019    7.047    0.000    3.494    0.915
##     Thrombocytes          -0.451    0.158   -2.855    0.004  -11.867   -0.178
##     Leucocytes            -0.003    0.009   -0.343    0.731   -0.082   -0.025
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Vulnerability ~~                                                      
##     Waste_Accumltn   93.280   20.317    4.591    0.000    0.502    0.502
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .Age              79.753   17.224    4.630    0.000   79.753    0.616
##    .Comorbidity       0.977    0.106    9.235    0.000    0.977    0.889
##    .Albumine         13.569    2.404    5.643    0.000   13.569    0.820
##    .Hemoglobine       0.943    0.134    7.025    0.000    0.943    0.861
##    .Creatinine      339.168   88.005    3.854    0.000  339.168    0.329
##    .Urea              2.365    1.601    1.477    0.140    2.365    0.162
##    .Thrombocytes   4319.538  574.254    7.522    0.000 4319.538    0.968
##    .Leucocytes       10.543    1.044   10.100    0.000   10.543    0.999
##     Vulnerability    49.780   15.487    3.214    0.001    1.000    1.000
##     Waste_Accumltn  693.067  201.759    3.435    0.001    1.000    1.000
## 
## R-Square:
##                    Estimate
##     Age               0.384
##     Comorbidity       0.111
##     Albumine          0.180
##     Hemoglobine       0.139
##     Creatinine        0.671
##     Urea              0.838
##     Thrombocytes      0.032
##     Leucocytes        0.001

#Because Thrombocytes and Leucocytes were poor indicators, I removed them from my model.

cfa_model_refined <- '
  Vulnerability =~ Age + Comorbidity + Albumine + Hemoglobine
  Waste_Accumulation =~ Creatinine + Urea
  Vulnerability ~~ Waste_Accumulation
'

fit_refined <- cfa(cfa_model_refined, data = cfa_full, estimator = "MLMV")
summary(fit_refined, fit.measures = TRUE, standardized = TRUE, rsquare = TRUE)
## lavaan 0.6-19 ended normally after 232 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        13
## 
##   Number of observations                           213
## 
## Model Test User Model:
##                                               Standard      Scaled
##   Test Statistic                                18.322      15.260
##   Degrees of freedom                                 8           8
##   P-value (Chi-square)                           0.019       0.054
##   Scaling correction factor                                  1.280
##   Shift parameter                                            0.946
##     simple second-order correction                                
## 
## Model Test Baseline Model:
## 
##   Test statistic                               263.018     102.168
##   Degrees of freedom                                15          15
##   P-value                                        0.000       0.000
##   Scaling correction factor                                  2.753
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.958       0.917
##   Tucker-Lewis Index (TLI)                       0.922       0.844
##                                                                   
##   Robust Comparative Fit Index (CFI)                         0.961
##   Robust Tucker-Lewis Index (TLI)                            0.927
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -3552.203   -3552.203
##   Loglikelihood unrestricted model (H1)      -3543.041   -3543.041
##                                                                   
##   Akaike (AIC)                                7130.405    7130.405
##   Bayesian (BIC)                              7174.102    7174.102
##   Sample-size adjusted Bayesian (SABIC)       7132.909    7132.909
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.078       0.065
##   90 Percent confidence interval - lower         0.030       0.000
##   90 Percent confidence interval - upper         0.125       0.115
##   P-value H_0: RMSEA <= 0.050                    0.144       0.265
##   P-value H_0: RMSEA >= 0.080                    0.517       0.353
##                                                                   
##   Robust RMSEA                                               0.074
##   90 Percent confidence interval - lower                     0.000
##   90 Percent confidence interval - upper                     0.130
##   P-value H_0: Robust RMSEA <= 0.050                         0.210
##   P-value H_0: Robust RMSEA >= 0.080                         0.479
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.044       0.044
## 
## Parameter Estimates:
## 
##   Standard errors                           Robust.sem
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                         Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Vulnerability =~                                                           
##     Age                    1.000                               6.942    0.610
##     Comorbidity            0.048    0.016    3.083    0.002    0.335    0.320
##     Albumine              -0.251    0.062   -4.021    0.000   -1.743   -0.428
##     Hemoglobine           -0.059    0.018   -3.325    0.001   -0.409   -0.391
##   Waste_Accumulation =~                                                      
##     Creatinine             1.000                              25.037    0.779
##     Urea                   0.147    0.024    6.196    0.000    3.672    0.962
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Vulnerability ~~                                                      
##     Waste_Accumltn   86.570   20.086    4.310    0.000    0.498    0.498
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .Age              81.347   17.418    4.670    0.000   81.347    0.628
##    .Comorbidity       0.987    0.106    9.353    0.000    0.987    0.898
##    .Albumine         13.514    2.396    5.641    0.000   13.514    0.816
##    .Hemoglobine       0.928    0.134    6.947    0.000    0.928    0.847
##    .Creatinine      405.406   93.584    4.332    0.000  405.406    0.393
##    .Urea              1.095    2.057    0.532    0.594    1.095    0.075
##     Vulnerability    48.185   15.048    3.202    0.001    1.000    1.000
##     Waste_Accumltn  626.829  195.060    3.214    0.001    1.000    1.000
## 
## R-Square:
##                    Estimate
##     Age               0.372
##     Comorbidity       0.102
##     Albumine          0.184
##     Hemoglobine       0.153
##     Creatinine        0.607
##     Urea              0.925

#This model demonstrates good fit! Yay!

#Multi-group comparison using this model:

cfa_full$Dementia<- as.factor(cfa_full$Dementia)
table(cfa_full$Dementia)
## 
##   1   2 
## 158  55

#Configural invariance

fit_config <- cfa(cfa_model_refined, data = cfa_full, group = "Dementia", estimator = "MLMV")
## Warning: lavaan->lav_object_post_check():  
##    some estimated ov variances are negative
summary(fit_config, fit.measures = TRUE, standardized = TRUE, rsquare = TRUE)
## lavaan 0.6-19 ended normally after 1007 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        38
## 
##   Number of observations per group:                   
##     1                                              158
##     2                                               55
## 
## Model Test User Model:
##                                               Standard      Scaled
##   Test Statistic                                30.074      26.177
##   Degrees of freedom                                16          16
##   P-value (Chi-square)                           0.018       0.052
##   Scaling correction factor                                  1.320
##   Shift parameter                                            3.394
##     simple second-order correction                                
##   Test statistic for each group:
##     1                                           16.015      16.015
##     2                                           10.162      10.162
## 
## Model Test Baseline Model:
## 
##   Test statistic                               290.300     115.520
##   Degrees of freedom                                30          30
##   P-value                                        0.000       0.000
##   Scaling correction factor                                  2.868
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.946       0.881
##   Tucker-Lewis Index (TLI)                       0.899       0.777
##                                                                   
##   Robust Comparative Fit Index (CFI)                         0.945
##   Robust Tucker-Lewis Index (TLI)                            0.897
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -3529.080   -3529.080
##   Loglikelihood unrestricted model (H1)      -3514.043   -3514.043
##                                                                   
##   Akaike (AIC)                                7134.161    7134.161
##   Bayesian (BIC)                              7261.890    7261.890
##   Sample-size adjusted Bayesian (SABIC)       7141.479    7141.479
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.091       0.077
##   90 Percent confidence interval - lower         0.037       0.000
##   90 Percent confidence interval - upper         0.140       0.129
##   P-value H_0: RMSEA <= 0.050                    0.091       0.188
##   P-value H_0: RMSEA >= 0.080                    0.673       0.506
##                                                                   
##   Robust RMSEA                                               0.089
##   90 Percent confidence interval - lower                     0.000
##   90 Percent confidence interval - upper                     0.148
##   P-value H_0: Robust RMSEA <= 0.050                         0.148
##   P-value H_0: Robust RMSEA >= 0.080                         0.631
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.057       0.057
## 
## Parameter Estimates:
## 
##   Standard errors                           Robust.sem
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## 
## Group 1 [1]:
## 
## Latent Variables:
##                         Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Vulnerability =~                                                           
##     Age                    1.000                               7.367    0.661
##     Comorbidity            0.056    0.018    3.024    0.002    0.409    0.383
##     Albumine              -0.246    0.070   -3.518    0.000   -1.814   -0.421
##     Hemoglobine           -0.048    0.018   -2.671    0.008   -0.356   -0.334
##   Waste_Accumulation =~                                                      
##     Creatinine             1.000                              26.357    0.802
##     Urea                   0.143    0.023    6.272    0.000    3.780    0.998
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Vulnerability ~~                                                      
##     Waste_Accumltn   94.135   23.446    4.015    0.000    0.485    0.485
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .Age              78.791    0.886   88.903    0.000   78.791    7.073
##    .Comorbidity       1.234    0.085   14.521    0.000    1.234    1.155
##    .Albumine         37.076    0.343  108.239    0.000   37.076    8.611
##    .Hemoglobine       7.868    0.085   92.988    0.000    7.868    7.398
##    .Creatinine       90.487    2.613   34.628    0.000   90.487    2.755
##    .Urea              7.589    0.301   25.197    0.000    7.589    2.005
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .Age              69.835   16.042    4.353    0.000   69.835    0.563
##    .Comorbidity       0.974    0.116    8.410    0.000    0.974    0.853
##    .Albumine         15.247    3.176    4.801    0.000   15.247    0.822
##    .Hemoglobine       1.005    0.164    6.117    0.000    1.005    0.888
##    .Creatinine      384.203  111.762    3.438    0.001  384.203    0.356
##    .Urea              0.044    2.072    0.021    0.983    0.044    0.003
##     Vulnerability    54.267   18.871    2.876    0.004    1.000    1.000
##     Waste_Accumltn  694.680  253.115    2.745    0.006    1.000    1.000
## 
## R-Square:
##                    Estimate
##     Age               0.437
##     Comorbidity       0.147
##     Albumine          0.178
##     Hemoglobine       0.112
##     Creatinine        0.644
##     Urea              0.997
## 
## 
## Group 2 [2]:
## 
## Latent Variables:
##                         Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Vulnerability =~                                                           
##     Age                    1.000                               0.357    0.033
##     Comorbidity           -0.277    1.571   -0.176    0.860   -0.099   -0.101
##     Albumine              -2.268   12.102   -0.187    0.851   -0.809   -0.253
##     Hemoglobine           -3.266   18.106   -0.180    0.857   -1.166   -1.235
##   Waste_Accumulation =~                                                      
##     Creatinine             1.000                              20.367    0.682
##     Urea                   0.165    0.078    2.127    0.033    3.361    0.869
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Vulnerability ~~                                                      
##     Waste_Accumltn    2.747   15.948    0.172    0.863    0.378    0.378
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .Age              84.964    1.458   58.285    0.000   84.964    7.859
##    .Comorbidity       1.382    0.132   10.441    0.000    1.382    1.408
##    .Albumine         36.182    0.432   83.781    0.000   36.182   11.297
##    .Hemoglobine       7.498    0.127   58.896    0.000    7.498    7.942
##    .Creatinine       93.309    4.028   23.166    0.000   93.309    3.124
##    .Urea              8.225    0.522   15.764    0.000    8.225    2.126
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .Age             116.744   54.786    2.131    0.033  116.744    0.999
##    .Comorbidity       0.954    0.185    5.164    0.000    0.954    0.990
##    .Albumine          9.603    2.056    4.671    0.000    9.603    0.936
##    .Hemoglobine      -0.468    1.471   -0.318    0.750   -0.468   -0.525
##    .Creatinine      477.507  118.817    4.019    0.000  477.507    0.535
##    .Urea              3.676    5.081    0.723    0.469    3.676    0.245
##     Vulnerability     0.127    1.369    0.093    0.926    1.000    1.000
##     Waste_Accumltn  414.815  245.935    1.687    0.092    1.000    1.000
## 
## R-Square:
##                    Estimate
##     Age               0.001
##     Comorbidity       0.010
##     Albumine          0.064
##     Hemoglobine          NA
##     Creatinine        0.465
##     Urea              0.755

#I tried removing the worst indicators.

cfa_model_trimmed <- '
  Vulnerability =~ Age + Comorbidity + Albumine
  Waste_Accumulation =~ Creatinine
  Vulnerability ~~ Waste_Accumulation
'

fit_trimmed <- cfa(cfa_model_trimmed, data = cfa_full, group = "Dementia", estimator = "MLMV")
## Warning: lavaan->lav_lavaan_step11_estoptim():  
##    Model estimation FAILED! Returning starting values.
summary(fit_trimmed, fit.measures = TRUE, standardized = TRUE, rsquare = TRUE)
## Warning: lavaan->lav_object_summary():  
##    fit measures not available if model did not converge
## lavaan 0.6-19 did NOT end normally after 761 iterations
## ** WARNING ** Estimates below are most likely unreliable
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        24
## 
##   Number of observations per group:                   
##     1                                              158
##     2                                               55
## 
## 
## Parameter Estimates:
## 
##   Standard errors                           Robust.sem
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## 
## Group 1 [1]:
## 
## Latent Variables:
##                         Estimate   Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Vulnerability =~                                                            
##     Age                     1.000                               0.000    0.000
##     Comorbidity         -2812.426       NA                     -0.383   -0.361
##     Albumine            10957.010       NA                      1.492    0.344
##   Waste_Accumulation =~                                                       
##     Creatinine              1.000                              32.822    1.000
## 
## Covariances:
##                    Estimate   Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Vulnerability ~~                                                       
##     Waste_Accumltn    -0.003       NA                     -0.688   -0.688
## 
## Intercepts:
##                    Estimate   Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .Age               78.791       NA                     78.791    7.073
##    .Comorbidity        1.234       NA                      1.234    1.163
##    .Albumine          37.076       NA                     37.076    8.555
##    .Creatinine        90.487       NA                     90.487    2.757
## 
## Variances:
##                    Estimate   Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .Age              124.103       NA                    124.103    1.000
##    .Comorbidity        0.980       NA                      0.980    0.870
##    .Albumine          16.556       NA                     16.556    0.882
##    .Creatinine         0.000                               0.000    0.000
##     Vulnerability      0.000       NA                      1.000    1.000
##     Waste_Accumltn  1077.274       NA                      1.000    1.000
## 
## R-Square:
##                    Estimate 
##     Age                0.000
##     Comorbidity        0.130
##     Albumine           0.118
##     Creatinine         1.000
## 
## 
## Group 2 [2]:
## 
## Latent Variables:
##                         Estimate   Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Vulnerability =~                                                            
##     Age                     1.000                               0.012    0.001
##     Comorbidity            -5.060       NA                     -0.059   -0.060
##     Albumine              938.711       NA                     10.986    3.426
##   Waste_Accumulation =~                                                       
##     Creatinine              1.000                              29.941    1.000
## 
## Covariances:
##                    Estimate   Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Vulnerability ~~                                                       
##     Waste_Accumltn    -0.004       NA                     -0.011   -0.011
## 
## Intercepts:
##                    Estimate   Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .Age               84.964       NA                     84.964    7.859
##    .Comorbidity        1.382       NA                      1.382    1.408
##    .Albumine          36.182       NA                     36.182   11.285
##    .Creatinine        93.309       NA                     93.309    3.116
## 
## Variances:
##                    Estimate   Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .Age              116.874       NA                    116.874    1.000
##    .Comorbidity        0.960       NA                      0.960    0.996
##    .Albumine        -110.408       NA                   -110.408  -10.740
##    .Creatinine         0.000                               0.000    0.000
##     Vulnerability      0.000       NA                      1.000    1.000
##     Waste_Accumltn   896.434       NA                      1.000    1.000
## 
## R-Square:
##                    Estimate 
##     Age                0.000
##     Comorbidity        0.004
##     Albumine              NA
##     Creatinine         1.000
#install.packages("semPlot")   # If not already installed
library(semPlot)

semPaths(fit_refined, 
         what = "std",        # standardized estimates
         layout = "tree",     # layout style: "tree", "circle", etc.
         edge.label.cex = .8,# text size for edge labels
         sizeMan = 8,         # size of manifest (observed) variables
         sizeLat = 12,         # size of latent variables
         residuals = TRUE,    # show residual variances
         intercepts = FALSE,  # usually not shown
         nCharNodes = 0       # do not truncate variable names
)