David Flora (Article, 2012)

library(faoutlier)

## Warning: package 'faoutlier' was built under R version 4.1.3

## Loading required package: sem

## Warning: package 'sem' was built under R version 4.1.3

## Loading required package: mvtnorm

## Loading required package: parallel

### read in data file with no missing values
library(rio)

## Warning: package 'rio' was built under R version 4.1.3

holzinger <- import("holzing_transformed.sav")
holzinger <- round(holzinger,0)

flora <- '
 1
 0.75 1
 0.78 0.72 1
 0.44 0.52 0.47 1
 0.45 0.53 0.48 0.82 1
 0.51 0.58 0.54 0.82 0.74 1
 0.21 0.23 0.28 0.33 0.37 0.35 1
 0.30 0.32 0.37 0.33 0.36 0.38 0.45 1
 0.31 0.30 0.37 0.31 0.36 0.38 0.52 0.67 1'


flora <- lavaan::getCov(flora,diagonal = T,  names = c("WrdMean", "SntComp", 
                "OddWrds", "MxdArit","Remndrs", "MissNum", "Gloves","Boots","Hatchts")) 

fa.flora <- psych::fa(flora, nfactors = 3, n.obs = 696, fm="mle", gam=0)

## Loading required namespace: GPArotation

print(fa.flora, cut=.30, sort=T)

## Factor Analysis using method =  ml
## Call: psych::fa(r = flora, nfactors = 3, n.obs = 696, fm = "mle", gam = 0)
## Standardized loadings (pattern matrix) based upon correlation matrix
##         item   ML1   ML2   ML3   h2    u2 com
## MxdArit    4  1.01             0.93 0.073 1.0
## Remndrs    5  0.81             0.74 0.260 1.0
## MissNum    6  0.75             0.76 0.241 1.1
## WrdMean    1        0.94       0.82 0.181 1.0
## OddWrds    3        0.83       0.75 0.248 1.0
## SntComp    2        0.77       0.71 0.290 1.1
## Hatchts    9              0.90 0.78 0.222 1.0
## Boots      8              0.73 0.58 0.420 1.0
## Gloves     7              0.55 0.37 0.629 1.2
## 
##                        ML1  ML2  ML3
## SS loadings           2.42 2.28 1.73
## Proportion Var        0.27 0.25 0.19
## Cumulative Var        0.27 0.52 0.72
## Proportion Explained  0.38 0.35 0.27
## Cumulative Proportion 0.38 0.73 1.00
## 
##  With factor correlations of 
##      ML1  ML2  ML3
## ML1 1.00 0.59 0.45
## ML2 0.59 1.00 0.43
## ML3 0.45 0.43 1.00
## 
## Mean item complexity =  1
## Test of the hypothesis that 3 factors are sufficient.
## 
## The degrees of freedom for the null model are  36  and the objective function was  5.91 with Chi Square of  4083.04
## The degrees of freedom for the model are 12  and the objective function was  0.02 
## 
## The root mean square of the residuals (RMSR) is  0.01 
## The df corrected root mean square of the residuals is  0.01 
## 
## The harmonic number of observations is  696 with the empirical chi square  2.1  with prob <  1 
## The total number of observations was  696  with Likelihood Chi Square =  11.27  with prob <  0.51 
## 
## Tucker Lewis Index of factoring reliability =  1.001
## RMSEA index =  0  and the 90 % confidence intervals are  0 0.037
## BIC =  -67.27
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                    ML1  ML2  ML3
## Correlation of (regression) scores with factors   0.98 0.96 0.92
## Multiple R square of scores with factors          0.95 0.91 0.85
## Minimum correlation of possible factor scores     0.90 0.82 0.71

Data Screening and Testing of Assumptions

################## Data Screening and Testing of Assumptions ################

# 1- Accuracy: are all the data in range?
# 2- Outliers: are there outliers, and what should I do with them?
# 3- Statistical assumptions: additivity or linearity, normality, homogeneity or        
#                             homoscedasticity.
# 4- Missing Data: are there missing data, and what should I do with them?
#
# 5- Conceptual assumptions(EFA & CFA): 
#   A-There are some underlying structure.
#   B-The relationship between measured variables and factors are linear.
#   C-The linear relationships are invariant across participants.
#   D- Effect indicators

##############################################################################

1.Data Accuracy

############################## 1.Data Accuracy ###############################
#summary statistics
psych::describe(holzinger[,2:10])

##         vars   n   mean    sd median trimmed   mad min max range  skew kurtosis
## WrdMean    1 101 100.01 15.08     99   99.96 11.86  59 144    85  0.10     0.50
## SntComp    2 101 100.00 14.99     99  100.11 14.83  61 135    74 -0.10    -0.09
## OddWrds    3 101 100.00 14.98     99   99.85 13.34  57 147    90  0.19     0.61
## MxdArit    4 101 100.03 14.96     99  100.14 16.31  66 129    63 -0.03    -0.72
## Remndrs    5 101  99.98 15.02     99   99.95 14.83  59 155    96  0.24     1.02
## MissNum    6 101  99.96 14.98    100  100.11 14.83  56 134    78 -0.13    -0.06
## Gloves     7 101  99.99 14.99    101   99.44 11.86  66 153    87  0.40     0.69
## Boots      8 101 100.01 15.01    101  100.05 16.31  60 134    74 -0.04    -0.38
## Hatchts    9 101 100.03 15.04     97   99.01 16.31  72 157    85  0.76     1.03
##           se
## WrdMean 1.50
## SntComp 1.49
## OddWrds 1.49
## MxdArit 1.49
## Remndrs 1.49
## MissNum 1.49
## Gloves  1.49
## Boots   1.49
## Hatchts 1.50

2.Univariate Outliers

############################### 2.Univariate  Outliers ######################

# compute Z scores for identifying univariate outliers
holzinger$z.Remndrs <- scale(holzinger$Remndrs)
holzinger$z.SntComp <- scale(holzinger$SntComp)
holzinger$z.WrdMean <- scale(holzinger$WrdMean)
holzinger$z.MissNum <- scale(holzinger$MissNum)
holzinger$z.OddWrds <- scale(holzinger$OddWrds)
holzinger$z.Boots <- scale(holzinger$Boots)
holzinger$z.Gloves <- scale(holzinger$Gloves)
holzinger$z.Hatches <- scale(holzinger$Hatchts)


# compute summary statistics for Z scores
psych::describe(holzinger[, 11:18])

##           vars   n mean sd median trimmed  mad   min  max range  skew kurtosis
## z.Remndrs    1 101    0  1  -0.07    0.00 0.99 -2.73 3.66  6.39  0.24     1.02
## z.SntComp    2 101    0  1  -0.07    0.01 0.99 -2.60 2.34  4.94 -0.10    -0.09
## z.WrdMean    3 101    0  1  -0.07    0.00 0.79 -2.72 2.92  5.64  0.10     0.50
## z.MissNum    4 101    0  1   0.00    0.01 0.99 -2.93 2.27  5.21 -0.13    -0.06
## z.OddWrds    5 101    0  1  -0.07   -0.01 0.89 -2.87 3.14  6.01  0.19     0.61
## z.Boots      6 101    0  1   0.07    0.00 1.09 -2.67 2.26  4.93 -0.04    -0.38
## z.Gloves     7 101    0  1   0.07   -0.04 0.79 -2.27 3.54  5.80  0.40     0.69
## z.Hatches    8 101    0  1  -0.20   -0.07 1.08 -1.86 3.79  5.65  0.76     1.03
##            se
## z.Remndrs 0.1
## z.SntComp 0.1
## z.WrdMean 0.1
## z.MissNum 0.1
## z.OddWrds 0.1
## z.Boots   0.1
## z.Gloves  0.1
## z.Hatches 0.1

which(abs(holzinger$z.Remndrs) > 3.29)

## [1] 1

which(abs(holzinger$z.Gloves) > 3.29)

## [1] 1

which(abs(holzinger$z.Hatches) > 3.29)

## [1] 1

# Grubbs tests for one or two univariate Outliers in R
library(outliers)

## Warning: package 'outliers' was built under R version 4.2.0

apply(holzinger[,2:10], 2, grubbs.test)

## $WrdMean
## 
##  Grubbs test for one outlier
## 
## data:  newX[, i]
## G.1 = 2.91645, U = 0.91409, p-value = 0.1482
## alternative hypothesis: highest value 144 is an outlier
## 
## 
## $SntComp
## 
##  Grubbs test for one outlier
## 
## data:  newX[, i]
## G.46 = 2.60208, U = 0.93161, p-value = 0.4168
## alternative hypothesis: lowest value 61 is an outlier
## 
## 
## $OddWrds
## 
##  Grubbs test for one outlier
## 
## data:  newX[, i]
## G.1 = 3.13696, U = 0.90061, p-value = 0.06675
## alternative hypothesis: highest value 147 is an outlier
## 
## 
## $MxdArit
## 
##  Grubbs test for one outlier
## 
## data:  newX[, i]
## G.74 = 2.27437, U = 0.94776, p-value = 1
## alternative hypothesis: lowest value 66 is an outlier
## 
## 
## $Remndrs
## 
##  Grubbs test for one outlier
## 
## data:  newX[, i]
## G.1 = 3.66214, U = 0.86455, p-value = 0.007715
## alternative hypothesis: highest value 155 is an outlier
## 
## 
## $MissNum
## 
##  Grubbs test for one outlier
## 
## data:  newX[, i]
## G.74 = 2.93397, U = 0.91306, p-value = 0.1394
## alternative hypothesis: lowest value 56 is an outlier
## 
## 
## $Gloves
## 
##  Grubbs test for one outlier
## 
## data:  newX[, i]
## G.1 = 3.53580, U = 0.87373, p-value = 0.01343
## alternative hypothesis: highest value 153 is an outlier
## 
## 
## $Boots
## 
##  Grubbs test for one outlier
## 
## data:  newX[, i]
## G.87 = 2.66585, U = 0.92822, p-value = 0.3411
## alternative hypothesis: lowest value 60 is an outlier
## 
## 
## $Hatchts
## 
##  Grubbs test for one outlier
## 
## data:  newX[, i]
## G.1 = 3.78886, U = 0.85501, p-value = 0.004323
## alternative hypothesis: highest value 157 is an outlier

# Boxplot with 1.5 time interquartile range, some experts suggest 2.2 
# interquartile range be used
boxplot(holzinger[,2:10], notch = TRUE, boxfill="royalblue", whiskcol="blue",
         pch=16, outcol="firebrick", plot = TRUE)

boxplot(holzinger[,2:10], notch = TRUE, boxfill="royalblue", whiskcol="blue",
         pch=16, outcol="firebrick", range = 2.2,
         border = "blue", col = "red", plot = TRUE)

3.Multivariate Outliers

############################### 3.Multivariate  Outliers ######################

##### calculate Mahalanobis Distance(MD) with two methods

### method 1: Traditional MD using psych package
library(psych)

## 
## Attaching package: 'psych'

## The following object is masked from 'package:outliers':
## 
##     outlier

holzinger[,11:18] <- NULL # Removing unnecessary variables

holzinger$mahal <- psych::outlier(holzinger[,2:10], plot = TRUE)

holzinger$leverage <- holzinger$mahal/(nrow(holzinger)-1)
m.leverage <- mean(holzinger$leverage)
holzinger$leve.outlier <- holzinger$leverage > 2*m.leverage


# compute significant level of Mahalanobis statistics
holzinger$p.value <- 1-pchisq(holzinger$mahal, ncol(holzinger[,2:10]))
holzinger$case.outlier <- holzinger$p.value < 0.001
pairs.panels(holzinger[,2:10],bg=c("yellow","blue")[(holzinger$mahal > 25)+1],pch=21)

boxplot(holzinger$mahal,boxfill="royalblue", whiskcol="blue",
         pch=16, outcol="firebrick", border = "blue", col = "red", plot = TRUE)

boxplot(holzinger$mahal)

hist(holzinger$mahal)

# Unfortunately, extreme outliers may negatively influence the accuracy of MD values so  
# robust MD estimation techniques have been recommended.

# method 2: Robust MD using faoutlier package
library(faoutlier)
robustmd <- robustMD(holzinger[,2:10], method="mcd")
robustmd

##          mah       p  sig
## 1  124.09615 0.00000 ****
## 76  35.52189 0.00005 ****
## 67  30.07464 0.00043  ***
## 62  26.95243 0.00142   **
## 93  24.52184 0.00355   **
## 73  20.67727 0.01416    *
## 75  20.41133 0.01554    *
## 68  19.88229 0.01865    *
## 74  18.86854 0.02633    *
## 66  18.52316 0.02957    *

plot(robustmd)

boxplot(robustmd$mah, main="Raw Mahalanobis Distance", range = 1.5,
        border = "blue", col = "red")

##    vars   n mean    sd median trimmed   mad    min    max  range skew kurtosis
## X1    1 101    0 56.03  -2.68   -0.17 45.34 -151.2 163.86 315.06 0.11      0.5
##      se
## X1 5.58

# par(mfrow=c(5,2))

# histogram of residuals for WrdMean
plot(h1,freq=FALSE,ylab = "Percent of Total", xlab = "Standardized residuals(WrdMean)",  
     main="Histogram of standardized residuals")

# histogram of residuals for SntComp
plot(h2,freq=FALSE,ylab = "Percent of Total", xlab = "Standardized residuals (SntComp)",
      main="Histogram of standardized residuals")

# histogram of residuals for OddWrds
plot(h3,freq=FALSE,ylab = "Percent of Total", xlab = "Standardized residuals (OddWrds)",
      main="Histogram of standardized residuals")

# histogram of residuals for MxdArit
plot(h4,freq=FALSE,ylab = "Percent of Total", xlab = "Standardized residuals (MxdArit)",
      main="Histogram of standardized residuals")

# histogram of residuals for Remndrs
plot(h5,freq=FALSE,ylab = "Percent of Total", xlab = "Standardized residuals (Remndrs)",
      main="Histogram of standardized residuals")

# histogram of residuals for MissNum
plot(h6,freq=FALSE,ylab = "Percent of Total", xlab = "Standardized residuals (MissNum)",
      main="Histogram of standardized residuals")

# histogram of residuals for Gloves
plot(h7,freq=FALSE,ylab = "Percent of Total", xlab = "Standardized residuals (Gloves)",
      main="Histogram of standardized residuals")

# histogram of residuals for Boots
plot(h8,freq=FALSE,ylab = "Percent of Total", xlab = "Standardized residuals (Boots)",
      main="Histogram of standardized residuals")

# histogram of residuals for Hatchts
plot(h8,freq=FALSE,ylab = "Percent of Total", xlab = "Standardized residuals (Hatchts)",
      main="Histogram of standardized residuals")

INFLUENCE

cases with large residuals are not necessarily influential and cases with high MD are not necessarily bad leverage points(Yuan and Zhong,2008). A key heuristic in regression diagnostics is that case influence is a product of both leverage and the discrepancy of predicted values from observed values as measured by residuals. Influence statistics known as deletion statistics summarize the extent to which parameter estimates (e.g.,regression slopes or factor loadings) change when an observation is deleted from a data set. A common deletion statistic used in multiple regression is Cook’s distance, which can be broadened to generalized Cook’s distance(gCD) to measure the influence of a case on a set of parameter estimates from a factor analysis model.

gCDresult <- gCD(holzinger[,2:10],3, DFBETAS=T)
plot(gCDresult)

hist(gCDresult$gCD)

plot(gCDresult$gCD)

boxplot(gCDresult$gCD)

holzinger$gCD <- gCDresult$gCD
holzinger$dfBetas <- gCDresult$dfbetas

# Based on results of robustMD and gCD, case 1 is potential outlier and influential 
# point.

Because gCD is calculated by deleting only a single case i from the complete dataset,it is susceptible to masking errors, which occur when an influential case is not identified as such because it is located in the multivariate space close to one or more similarly influential cases. Instead, a local influence or forward search approach can be used to identify groups of influential cases.

FS <- forward.search(holzinger[,2:10],3, print.messages = FALSE, stat="mah")
plot(FS)

4.Statistical assumptions

########################## 4.Statistical assumptions #######################

# 4.1. Additivity or linearity
pairs.panels(holzinger[,2:10])

# 4.2. Normality
boxplot(holzinger[,2:10], notch = TRUE, boxfill="royalblue", whiskcol="blue",
         pch=16, outcol="firebrick", range = 2.2, border = "blue",
         col = "red", plot = TRUE)

# Skew > 2.0 or kurtosis > 7.0 would indicate severe univariate non-normality
describe(holzinger[,2:10])

##         vars   n   mean    sd median trimmed   mad min max range  skew kurtosis
## WrdMean    1 101 100.01 15.08     99   99.96 11.86  59 144    85  0.10     0.50
## SntComp    2 101 100.00 14.99     99  100.11 14.83  61 135    74 -0.10    -0.09
## OddWrds    3 101 100.00 14.98     99   99.85 13.34  57 147    90  0.19     0.61
## MxdArit    4 101 100.03 14.96     99  100.14 16.31  66 129    63 -0.03    -0.72
## Remndrs    5 101  99.98 15.02     99   99.95 14.83  59 155    96  0.24     1.02
## MissNum    6 101  99.96 14.98    100  100.11 14.83  56 134    78 -0.13    -0.06
## Gloves     7 101  99.99 14.99    101   99.44 11.86  66 153    87  0.40     0.69
## Boots      8 101 100.01 15.01    101  100.05 16.31  60 134    74 -0.04    -0.38
## Hatchts    9 101 100.03 15.04     97   99.01 16.31  72 157    85  0.76     1.03
##           se
## WrdMean 1.50
## SntComp 1.49
## OddWrds 1.49
## MxdArit 1.49
## Remndrs 1.49
## MissNum 1.49
## Gloves  1.49
## Boots   1.49
## Hatchts 1.50

# multivariate normality
QuantPsyc::mult.norm(holzinger[, 2:10])[1]

## $mult.test
##           Beta-hat      kappa        p-val
## Skewness  22.57354 379.987894 0.000000e+00
## Kurtosis 114.34581   5.480092 4.251058e-08

# Collinearity or Multicolinearity

# 1. eigen values
# If any eigen values of  zero or are negative, R is non-positive definite and collinearity is present

eigen(cor(holzinger[,2:10]))$values

## [1] 4.5166687 1.3776051 1.0570865 0.8379775 0.4204236 0.2725291 0.2356054
## [8] 0.1656584 0.1164456

# 2. condition index
model <- lm(ID ~ WrdMean + SntComp + OddWrds + MxdArit + Remndrs + MissNum +
               Gloves + Boots + Hatchts, data = holzinger[,1:10])

round(olsrr::ols_eigen_cindex(model),3)

##    Eigenvalue Condition Index intercept WrdMean SntComp OddWrds MxdArit Remndrs
## 1       9.891           1.000     0.000   0.000   0.000   0.000   0.000   0.000
## 2       0.031          17.847     0.004   0.009   0.042   0.006   0.010   0.002
## 3       0.023          20.698     0.000   0.065   0.015   0.055   0.054   0.017
## 4       0.019          22.716     0.018   0.006   0.049   0.022   0.000   0.068
## 5       0.010          31.577     0.460   0.006   0.003   0.005   0.002   0.015
## 6       0.009          33.793     0.425   0.000   0.007   0.016   0.000   0.023
## 7       0.006          40.792     0.003   0.048   0.002   0.135   0.003   0.414
## 8       0.005          44.146     0.043   0.057   0.273   0.631   0.028   0.004
## 9       0.004          52.276     0.006   0.724   0.425   0.104   0.169   0.252
## 10      0.003          62.731     0.041   0.085   0.183   0.026   0.734   0.207
##    MissNum Gloves Boots Hatchts
## 1    0.000  0.000 0.000   0.000
## 2    0.011  0.066 0.057   0.081
## 3    0.041  0.001 0.000   0.003
## 4    0.001  0.072 0.262   0.005
## 5    0.005  0.242 0.052   0.236
## 6    0.009  0.417 0.110   0.191
## 7    0.245  0.003 0.192   0.143
## 8    0.015  0.134 0.202   0.173
## 9    0.005  0.001 0.005   0.016
## 10   0.669  0.062 0.121   0.151

# 3. Tolerance and VIF
olsrr::ols_coll_diag(model)$vif_t

##   Variables Tolerance      VIF
## 1   WrdMean 0.2711809 3.687576
## 2   SntComp 0.3109453 3.216000
## 3   OddWrds 0.3325070 3.007456
## 4   MxdArit 0.2144834 4.662366
## 5   Remndrs 0.3046720 3.282219
## 6   MissNum 0.2380165 4.201390
## 7    Gloves 0.5284617 1.892285
## 8     Boots 0.5475818 1.826211
## 9   Hatchts 0.4489071 2.227632

Factor analysis with and without oulier case

holz.with.outlier <- fa(holzinger[,2:10], nfactors = 3, fm = "ml", gam=0)
print(holz.with.outlier, sort = TRUE)

## Factor Analysis using method =  ml
## Call: fa(r = holzinger[, 2:10], nfactors = 3, fm = "ml", gam = 0)
## Standardized loadings (pattern matrix) based upon correlation matrix
##         item   ML1   ML2   ML3   h2    u2 com
## MxdArit    4  1.01 -0.06  0.00 0.95 0.045 1.0
## MissNum    6  0.82  0.08  0.02 0.76 0.242 1.0
## Remndrs    5  0.56  0.17  0.33 0.68 0.319 1.8
## WrdMean    1 -0.08  0.94  0.08 0.87 0.132 1.0
## OddWrds    3  0.01  0.76  0.20 0.72 0.275 1.1
## SntComp    2  0.26  0.73 -0.29 0.69 0.312 1.6
## Hatchts    9  0.02  0.08  0.75 0.63 0.370 1.0
## Gloves     7  0.14  0.02  0.69 0.57 0.430 1.1
## Boots      8  0.19  0.05  0.38 0.25 0.753 1.5
## 
##                        ML1  ML2  ML3
## SS loadings           2.33 2.22 1.58
## Proportion Var        0.26 0.25 0.18
## Cumulative Var        0.26 0.50 0.68
## Proportion Explained  0.38 0.36 0.26
## Cumulative Proportion 0.38 0.74 1.00
## 
##  With factor correlations of 
##      ML1  ML2  ML3
## ML1 1.00 0.51 0.28
## ML2 0.51 1.00 0.34
## ML3 0.28 0.34 1.00
## 
## Mean item complexity =  1.2
## Test of the hypothesis that 3 factors are sufficient.
## 
## The degrees of freedom for the null model are  36  and the objective function was  5.85 with Chi Square of  562.9
## The degrees of freedom for the model are 12  and the objective function was  0.53 
## 
## The root mean square of the residuals (RMSR) is  0.05 
## The df corrected root mean square of the residuals is  0.09 
## 
## The harmonic number of observations is  101 with the empirical chi square  19.11  with prob <  0.086 
## The total number of observations was  101  with Likelihood Chi Square =  49.5  with prob <  1.7e-06 
## 
## Tucker Lewis Index of factoring reliability =  0.782
## RMSEA index =  0.176  and the 90 % confidence intervals are  0.127 0.229
## BIC =  -5.88
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy             
##                                                    ML1  ML2  ML3
## Correlation of (regression) scores with factors   0.98 0.96 0.89
## Multiple R square of scores with factors          0.96 0.92 0.79
## Minimum correlation of possible factor scores     0.93 0.83 0.58

# replacing outlier case 
holzinger <- psych::scrub(holzinger, where=c("Remndrs", "Gloves", "Hatchts"),
                      isvalue=c(155,153,157), newvalue=c(132,134,143))

holz.without.outlier <- fa(holzinger[,2:10], nfactors = 3, fm = "ml", gam=0)
print(holz.without.outlier, cut= .25, sort = TRUE)

## Factor Analysis using method =  ml
## Call: fa(r = holzinger[, 2:10], nfactors = 3, fm = "ml", gam = 0)
## Standardized loadings (pattern matrix) based upon correlation matrix
##         item   ML1   ML2   ML3   h2    u2 com
## MxdArit    4  1.00             0.94 0.057 1.0
## MissNum    6  0.83             0.77 0.234 1.0
## Remndrs    5  0.65             0.68 0.318 1.3
## WrdMean    1        0.98       0.91 0.090 1.0
## OddWrds    3        0.76       0.70 0.305 1.1
## SntComp    2  0.28  0.66       0.61 0.393 1.6
## Hatchts    9              0.92 0.85 0.145 1.0
## Boots      8              0.52 0.35 0.645 1.2
## Gloves     7              0.51 0.39 0.607 1.3
## 
##                        ML1  ML2  ML3
## SS loadings           2.48 2.15 1.57
## Proportion Var        0.28 0.24 0.17
## Cumulative Var        0.28 0.51 0.69
## Proportion Explained  0.40 0.35 0.25
## Cumulative Proportion 0.40 0.75 1.00
## 
##  With factor correlations of 
##      ML1  ML2  ML3
## ML1 1.00 0.51 0.33
## ML2 0.51 1.00 0.37
## ML3 0.33 0.37 1.00
## 
## Mean item complexity =  1.2
## Test of the hypothesis that 3 factors are sufficient.
## 
## The degrees of freedom for the null model are  36  and the objective function was  5.81 with Chi Square of  558.42
## The degrees of freedom for the model are 12  and the objective function was  0.45 
## 
## The root mean square of the residuals (RMSR) is  0.05 
## The df corrected root mean square of the residuals is  0.08 
## 
## The harmonic number of observations is  101 with the empirical chi square  14.96  with prob <  0.24 
## The total number of observations was  101  with Likelihood Chi Square =  42.73  with prob <  2.5e-05 
## 
## Tucker Lewis Index of factoring reliability =  0.819
## RMSEA index =  0.159  and the 90 % confidence intervals are  0.11 0.213
## BIC =  -12.65
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy             
##                                                    ML1  ML2  ML3
## Correlation of (regression) scores with factors   0.98 0.97 0.94
## Multiple R square of scores with factors          0.96 0.93 0.88
## Minimum correlation of possible factor scores     0.92 0.87 0.75

4.Dealing With Missing Data

# Generate randomly missing values
set.seed(1234)

library(missForest)
holzinger.miss <- prodNA(holzinger[,2:10], noNA = 0.10) # generate missing values

# Frequencies and percentages of missing data 
colSums(is.na((holzinger.miss)))

## WrdMean SntComp OddWrds MxdArit Remndrs MissNum  Gloves   Boots Hatchts 
##       6      10      12      10       6      14      12       9      11

round(colSums(is.na(holzinger.miss))/nrow(holzinger.miss) * 100, 2)

## WrdMean SntComp OddWrds MxdArit Remndrs MissNum  Gloves   Boots Hatchts 
##    5.94    9.90   11.88    9.90    5.94   13.86   11.88    8.91   10.89

(sum(is.na(holzinger.miss))/(nrow(holzinger.miss)*ncol(holzinger.miss)))*100

## [1] 9.90099

# Number and Proportion of Usable Cases
p <- mice::md.pairs(holzinger.miss)
p$mr

##         WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## WrdMean       0       6       6       4       6       6      6     6       6
## SntComp      10       0       9       9       8       9      8    10      10
## OddWrds      12      11       0      11      11       9     12    10      11
## MxdArit       8       9       9       0      10       9      7    10       7
## Remndrs       6       4       5       6       0       5      5     5       6
## MissNum      14      13      11      13      13       0     12    12      12
## Gloves       12      10      12       9      11      10      0    12      12
## Boots         9       9       7       9       8       7      9     0       9
## Hatchts      11      11      10       8      11       9     11    11       0

round(p$mr/colSums(is.na(holzinger.miss)),2)

##         WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## WrdMean     0.0    1.00    1.00    0.67    1.00    1.00   1.00  1.00    1.00
## SntComp     1.0    0.00    0.90    0.90    0.80    0.90   0.80  1.00    1.00
## OddWrds     1.0    0.92    0.00    0.92    0.92    0.75   1.00  0.83    0.92
## MxdArit     0.8    0.90    0.90    0.00    1.00    0.90   0.70  1.00    0.70
## Remndrs     1.0    0.67    0.83    1.00    0.00    0.83   0.83  0.83    1.00
## MissNum     1.0    0.93    0.79    0.93    0.93    0.00   0.86  0.86    0.86
## Gloves      1.0    0.83    1.00    0.75    0.92    0.83   0.00  1.00    1.00
## Boots       1.0    1.00    0.78    1.00    0.89    0.78   1.00  0.00    1.00
## Hatchts     1.0    1.00    0.91    0.73    1.00    0.82   1.00  1.00    0.00

# correlations between the variables in the data set and the missing data indicators
R <- is.na(holzinger.miss)
R

##     WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## 1     FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 2     FALSE    TRUE   FALSE    TRUE   FALSE   FALSE   TRUE FALSE   FALSE
## 3     FALSE   FALSE   FALSE   FALSE   FALSE    TRUE  FALSE FALSE    TRUE
## 4      TRUE   FALSE   FALSE    TRUE   FALSE   FALSE  FALSE FALSE   FALSE
## 5     FALSE   FALSE   FALSE   FALSE   FALSE    TRUE  FALSE FALSE   FALSE
## 6     FALSE   FALSE    TRUE   FALSE   FALSE    TRUE  FALSE FALSE   FALSE
## 7     FALSE    TRUE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 8     FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE    TRUE
## 9     FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 10    FALSE   FALSE    TRUE    TRUE   FALSE   FALSE  FALSE FALSE   FALSE
## 11    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 12    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 13    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 14    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 15    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 16    FALSE   FALSE    TRUE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 17    FALSE   FALSE   FALSE   FALSE   FALSE    TRUE   TRUE FALSE   FALSE
## 18    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 19    FALSE   FALSE    TRUE   FALSE   FALSE   FALSE  FALSE  TRUE   FALSE
## 20    FALSE   FALSE   FALSE   FALSE    TRUE   FALSE  FALSE  TRUE   FALSE
## 21    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE   TRUE FALSE   FALSE
## 22    FALSE    TRUE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 23    FALSE   FALSE   FALSE    TRUE   FALSE   FALSE   TRUE FALSE   FALSE
## 24    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 25    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 26    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 27    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 28    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE   TRUE FALSE   FALSE
## 29    FALSE   FALSE   FALSE   FALSE   FALSE    TRUE  FALSE  TRUE   FALSE
## 30    FALSE    TRUE   FALSE   FALSE    TRUE   FALSE  FALSE FALSE   FALSE
## 31    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 32    FALSE   FALSE    TRUE   FALSE   FALSE   FALSE  FALSE FALSE    TRUE
## 33    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE  TRUE   FALSE
## 34    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 35    FALSE    TRUE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 36    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 37    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE   TRUE FALSE   FALSE
## 38    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 39    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE   TRUE FALSE   FALSE
## 40    FALSE   FALSE   FALSE    TRUE   FALSE   FALSE  FALSE FALSE    TRUE
## 41     TRUE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 42    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 43    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 44    FALSE    TRUE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 45    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 46    FALSE   FALSE    TRUE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 47    FALSE   FALSE   FALSE   FALSE   FALSE    TRUE  FALSE FALSE   FALSE
## 48    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 49    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 50    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE  TRUE   FALSE
## 51    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 52    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 53    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE  TRUE   FALSE
## 54    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE    TRUE
## 55    FALSE   FALSE   FALSE    TRUE   FALSE   FALSE   TRUE FALSE   FALSE
## 56    FALSE   FALSE    TRUE   FALSE    TRUE    TRUE  FALSE FALSE   FALSE
## 57    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 58    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 59    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE   TRUE FALSE   FALSE
## 60    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 61    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE   TRUE FALSE   FALSE
## 62    FALSE   FALSE   FALSE    TRUE   FALSE   FALSE  FALSE FALSE   FALSE
## 63    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 64    FALSE   FALSE   FALSE   FALSE   FALSE    TRUE  FALSE FALSE   FALSE
## 65    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 66    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE    TRUE
## 67    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 68    FALSE    TRUE    TRUE   FALSE   FALSE    TRUE  FALSE FALSE   FALSE
## 69    FALSE   FALSE   FALSE   FALSE   FALSE    TRUE  FALSE  TRUE   FALSE
## 70    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 71     TRUE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 72    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 73    FALSE   FALSE   FALSE   FALSE   FALSE    TRUE  FALSE FALSE   FALSE
## 74    FALSE   FALSE    TRUE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 75    FALSE   FALSE   FALSE   FALSE    TRUE   FALSE  FALSE FALSE   FALSE
## 76    FALSE   FALSE   FALSE    TRUE   FALSE   FALSE  FALSE FALSE    TRUE
## 77    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 78    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 79     TRUE   FALSE   FALSE    TRUE   FALSE   FALSE  FALSE FALSE   FALSE
## 80    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 81    FALSE    TRUE   FALSE   FALSE    TRUE   FALSE   TRUE FALSE   FALSE
## 82    FALSE   FALSE    TRUE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 83    FALSE    TRUE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 84    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 85    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 86    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 87    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE  TRUE   FALSE
## 88    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE    TRUE
## 89    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE    TRUE
## 90    FALSE   FALSE   FALSE   FALSE   FALSE    TRUE   TRUE FALSE   FALSE
## 91    FALSE   FALSE   FALSE   FALSE    TRUE   FALSE  FALSE FALSE   FALSE
## 92    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE    TRUE
## 93    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 94    FALSE    TRUE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 95    FALSE   FALSE    TRUE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 96    FALSE   FALSE    TRUE   FALSE   FALSE   FALSE  FALSE  TRUE   FALSE
## 97    FALSE   FALSE   FALSE    TRUE   FALSE    TRUE  FALSE FALSE    TRUE
## 98     TRUE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 99    FALSE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE
## 100   FALSE   FALSE   FALSE   FALSE   FALSE    TRUE  FALSE FALSE   FALSE
## 101    TRUE   FALSE   FALSE   FALSE   FALSE   FALSE  FALSE FALSE   FALSE

colnames(R) <- paste("R.", colnames(holzinger.miss), sep = "")
dat <- data.frame(holzinger.miss, R)

# non-zero correlations with the missing data indicators imply a deviation from MCAR
round(cor(dat[, 1:9], data.matrix(dat[, 10:18]), use = "pairwise.complete.obs"), 2)

## Warning in cor(dat[, 1:9], data.matrix(dat[, 10:18]), use =
## "pairwise.complete.obs"): the standard deviation is zero

##         R.WrdMean R.SntComp R.OddWrds R.MxdArit R.Remndrs R.MissNum R.Gloves
## WrdMean        NA      0.01      0.00      0.11      0.07      0.03     0.19
## SntComp      0.14        NA     -0.15      0.11     -0.09     -0.09     0.19
## OddWrds      0.08      0.01        NA      0.06      0.11     -0.16     0.11
## MxdArit      0.08      0.00     -0.11        NA      0.12      0.10     0.02
## Remndrs      0.13      0.04      0.04      0.07        NA     -0.05     0.06
## MissNum      0.19      0.02     -0.34      0.25      0.12        NA     0.11
## Gloves       0.07     -0.04     -0.03      0.06      0.20     -0.13       NA
## Boots       -0.23     -0.05      0.03      0.20      0.07     -0.04     0.03
## Hatchts      0.00     -0.13     -0.07      0.16      0.07     -0.16     0.07
##         R.Boots R.Hatchts
## WrdMean   -0.16     -0.12
## SntComp   -0.18     -0.12
## OddWrds   -0.17     -0.10
## MxdArit    0.04      0.00
## Remndrs   -0.02     -0.12
## MissNum    0.00      0.05
## Gloves    -0.03     -0.01
## Boots        NA     -0.04
## Hatchts   -0.13        NA

# missing values pattern
mice::md.pattern(holzinger.miss[ , 1:9], plot = TRUE) # pattern of missingness

##    WrdMean Remndrs Boots SntComp MxdArit Hatchts OddWrds Gloves MissNum   
## 39       1       1     1       1       1       1       1      1       1  0
## 5        1       1     1       1       1       1       1      1       0  1
## 6        1       1     1       1       1       1       1      0       1  1
## 2        1       1     1       1       1       1       1      0       0  2
## 5        1       1     1       1       1       1       0      1       1  1
## 1        1       1     1       1       1       1       0      1       0  2
## 6        1       1     1       1       1       0       1      1       1  1
## 1        1       1     1       1       1       0       1      1       0  2
## 1        1       1     1       1       1       0       0      1       1  2
## 1        1       1     1       1       0       1       1      1       1  1
## 2        1       1     1       1       0       1       1      0       1  2
## 1        1       1     1       1       0       1       0      1       1  2
## 2        1       1     1       1       0       0       1      1       1  2
## 1        1       1     1       1       0       0       1      1       0  3
## 6        1       1     1       0       1       1       1      1       1  1
## 1        1       1     1       0       1       1       0      1       0  3
## 1        1       1     1       0       0       1       1      0       1  3
## 4        1       1     0       1       1       1       1      1       1  1
## 2        1       1     0       1       1       1       1      1       0  2
## 2        1       1     0       1       1       1       0      1       1  2
## 2        1       0     1       1       1       1       1      1       1  1
## 1        1       0     1       1       1       1       0      1       0  3
## 1        1       0     1       0       1       1       1      1       1  2
## 1        1       0     1       0       1       1       1      0       1  3
## 1        1       0     0       1       1       1       1      1       1  2
## 4        0       1     1       1       1       1       1      1       1  1
## 2        0       1     1       1       0       1       1      1       1  2
##          6       6     9      10      10      11      12     12      14 90

# Imputing missing values using hot deck method
set.seed(1234)
holzinger.miss.hot.deck <- hot.deck::hot.deck(holzinger.miss[,1:9], method = "p.draw")


# Extract the first imputed data set
holzinger.miss.hot.deck.1 <- as.data.frame(holzinger.miss.hot.deck$data[1])


# Imputing missing values using EM method with missMethods package
holzinger.miss.EM <- missMethods::impute_EM(holzinger.miss[,1:9], stochastic = TRUE)
View(holzinger.miss.EM)


# Multiple imputation using EM and MCMC methods with norm2 package
emResult <-  norm2::emNorm(holzinger.miss[,1:9])

## Note: Eigen power method failed to converge
## OCCURRED IN: estimate_worst_frac in MOD norm_engine

summary(emResult)

## Predictor (X) variables:
##       Mean SD Observed Missing Pct.Missing
## CONST    1  0      101       0           0
## 
## Response (Y) variables:
##              Mean       SD Observed Missing Pct.Missing
## WrdMean  99.64211 15.29978       95       6    5.940594
## SntComp  99.48352 14.51617       91      10    9.900990
## OddWrds 100.28090 14.10350       89      12   11.881188
## MxdArit  99.39560 14.95243       91      10    9.900990
## Remndrs  99.61053 14.62168       95       6    5.940594
## MissNum  99.66667 15.10403       87      14   13.861386
## Gloves  100.30337 14.72506       89      12   11.881188
## Boots   100.71739 15.02549       92       9    8.910891
## Hatchts  99.32222 14.20621       90      11   10.891089
## 
## Missingness patterns for response (Y) variables
##    (. denotes observed value, m denotes missing value)
##    (variable names are displayed vertically)
##    (rightmost column is the frequency):
## WSOMRM  H
## rndxeiG a
## dtddmslBt
## MCWAnsooc
## eorrdNvoh
## amdiruett
## npstsmsss
## ......... 39
## ........m  6
## .......m.  4
## ......m..  6
## .....m...  5
## .....m..m  1
## .....m.m.  2
## .....mm..  2
## ....m....  2
## ....m..m.  1
## ...m.....  1
## ...m....m  2
## ...m..m..  2
## ...m.m..m  1
## ..m......  5
## ..m.....m  1
## ..m....m.  2
## ..m..m...  1
## ..m.mm...  1
## ..mm.....  1
## .m.......  6
## .m..m....  1
## .m..m.m..  1
## .m.m..m..  1
## .mm..m...  1
## m........  4
## m..m.....  2
## 
## Method:                             EM
## Prior:                              "uniform"
## Convergence criterion:              1e-05
## Iterations:                         16
## Converged:                          TRUE
## Max. rel. difference:               6.4619e-06
## -2 Loglikelihood:                   4720.574
## -2 Log-posterior density:           4720.574
## Worst fraction missing information: NA
## 
## Eigen power method failed to converge
## OCCURRED IN: estimate_worst_frac in MOD norm_engine
## 
## Estimated coefficients (beta):
##       WrdMean  SntComp  OddWrds  MxdArit  Remndrs  MissNum   Gloves    Boots
## CONST 100.168 99.71267 100.2361 99.95225 100.0323 99.60731 100.5306 100.2666
##        Hatchts
## CONST 99.03079
## 
## Estimated covariance matrix (sigma):
##           WrdMean   SntComp   OddWrds   MxdArit   Remndrs   MissNum    Gloves
## WrdMean 235.13673 144.77946 170.58483  87.99104 118.59161 102.54104  84.22429
## SntComp 144.77946 202.72683 106.97100 102.86199  79.62614 123.74094  31.84955
## OddWrds 170.58483 106.97100 201.12780  76.55909 109.45757  83.79457  79.97757
## MxdArit  87.99104 102.86199  76.55909 225.18505 160.07526 190.79802  76.35147
## Remndrs 118.59161  79.62614 109.45757 160.07526 208.17272 149.13254 104.48805
## MissNum 102.54104 123.74094  83.79457 190.79802 149.13254 228.34206  83.72404
## Gloves   84.22429  31.84955  79.97757  76.35147 104.48805  83.72404 211.96934
## Boots    43.35593  66.84546  41.14095  67.62947  53.69282  85.23938  83.75301
## Hatchts  85.22380  21.10028  82.06294  59.35629  98.69285  69.59804 109.01125
##             Boots   Hatchts
## WrdMean  43.35593  85.22380
## SntComp  66.84546  21.10028
## OddWrds  41.14095  82.06294
## MxdArit  67.62947  59.35629
## Remndrs  53.69282  98.69285
## MissNum  85.23938  69.59804
## Gloves   83.75301 109.01125
## Boots   222.70261 117.23885
## Hatchts 117.23885 201.76040

mcmcResult <- norm2::mcmcNorm(emResult, iter = 5000, impute.every = 1000,multicycle=10)
summary(mcmcResult)

## 
## Method:                        MCMC
## Prior:                         "uniform"
## Iterations:                    5000
## Cycles per iteration:          10
## Impute every k iterations, k = 1000
## No. of imputations created:    5
## series.worst present:          FALSE
## series.beta  present:          TRUE
## series.sigma present:          TRUE

set.seed(123456)
par(mfrow=c(4,2))
for(i in 1:8){
  acf(mcmcResult$series.beta[,i],lag.max=1000, main=colnames(holzinger.miss[,1:9])[i])}

par(mfrow=c(4,2))
for(i in 1:8)
  plot(mcmcResult$series.beta[,i],ylab="", main=colnames(holzinger.miss[,1:9])[i])

# Combine imputed data sets
imp.list <- as.list(NULL)
 for(m in 1:50){
 mcmcResult <- norm2:: mcmcNorm(emResult, iter=100)
 imp.list[[m]] <- norm2::impNorm(mcmcResult) }

# Let use the third imputed data set
# holzinger.completeData_3 <- imp.list[[3]]
# head(holzinger.completeData_3)

# Imputing missing values using MI method in mice package
holzinger.miss.MI <- mice::mice(holzinger.miss[,1:9])

## 
##  iter imp variable
##   1   1  WrdMean  SntComp  OddWrds  MxdArit  Remndrs  MissNum  Gloves  Boots  Hatchts
##   1   2  WrdMean  SntComp  OddWrds  MxdArit  Remndrs  MissNum  Gloves  Boots  Hatchts
##   1   3  WrdMean  SntComp  OddWrds  MxdArit  Remndrs  MissNum  Gloves  Boots  Hatchts
##   1   4  WrdMean  SntComp  OddWrds  MxdArit  Remndrs  MissNum  Gloves  Boots  Hatchts
##   1   5  WrdMean  SntComp  OddWrds  MxdArit  Remndrs  MissNum  Gloves  Boots  Hatchts
##   2   1  WrdMean  SntComp  OddWrds  MxdArit  Remndrs  MissNum  Gloves  Boots  Hatchts
##   2   2  WrdMean  SntComp  OddWrds  MxdArit  Remndrs  MissNum  Gloves  Boots  Hatchts
##   2   3  WrdMean  SntComp  OddWrds  MxdArit  Remndrs  MissNum  Gloves  Boots  Hatchts
##   2   4  WrdMean  SntComp  OddWrds  MxdArit  Remndrs  MissNum  Gloves  Boots  Hatchts
##   2   5  WrdMean  SntComp  OddWrds  MxdArit  Remndrs  MissNum  Gloves  Boots  Hatchts
##   3   1  WrdMean  SntComp  OddWrds  MxdArit  Remndrs  MissNum  Gloves  Boots  Hatchts
##   3   2  WrdMean  SntComp  OddWrds  MxdArit  Remndrs  MissNum  Gloves  Boots  Hatchts
##   3   3  WrdMean  SntComp  OddWrds  MxdArit  Remndrs  MissNum  Gloves  Boots  Hatchts
##   3   4  WrdMean  SntComp  OddWrds  MxdArit  Remndrs  MissNum  Gloves  Boots  Hatchts
##   3   5  WrdMean  SntComp  OddWrds  MxdArit  Remndrs  MissNum  Gloves  Boots  Hatchts
##   4   1  WrdMean  SntComp  OddWrds  MxdArit  Remndrs  MissNum  Gloves  Boots  Hatchts
##   4   2  WrdMean  SntComp  OddWrds  MxdArit  Remndrs  MissNum  Gloves  Boots  Hatchts
##   4   3  WrdMean  SntComp  OddWrds  MxdArit  Remndrs  MissNum  Gloves  Boots  Hatchts
##   4   4  WrdMean  SntComp  OddWrds  MxdArit  Remndrs  MissNum  Gloves  Boots  Hatchts
##   4   5  WrdMean  SntComp  OddWrds  MxdArit  Remndrs  MissNum  Gloves  Boots  Hatchts
##   5   1  WrdMean  SntComp  OddWrds  MxdArit  Remndrs  MissNum  Gloves  Boots  Hatchts
##   5   2  WrdMean  SntComp  OddWrds  MxdArit  Remndrs  MissNum  Gloves  Boots  Hatchts
##   5   3  WrdMean  SntComp  OddWrds  MxdArit  Remndrs  MissNum  Gloves  Boots  Hatchts
##   5   4  WrdMean  SntComp  OddWrds  MxdArit  Remndrs  MissNum  Gloves  Boots  Hatchts
##   5   5  WrdMean  SntComp  OddWrds  MxdArit  Remndrs  MissNum  Gloves  Boots  Hatchts

plot(holzinger.miss.MI)

holzinger.long <- mice::complete(holzinger.miss.MI, "long", include = F)

# Let use the third iteration
 holzinger.completeData_3 <- mice::complete(holzinger.miss.MI, 3)
 head(holzinger.completeData_3)

##   WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## 1     144      76     147      89     132      83    134    86     143
## 2     102      99     105     128     117     116    121   124     120
## 3      72      88      84      84      71      81     70   101      86
## 4     126     135     115     123     117     133    121   117     119
## 5     117     117     113     118     111     125     94   114      95
## 6     120     124     103     125     122     113    106   120      92

################### Doing Exploratory Factor Analysis ######################
#loading required packages
library(psych)
library(GPArotation)

## Warning: package 'GPArotation' was built under R version 4.1.3

library(stats4)
library(MBESS)

## Warning: package 'MBESS' was built under R version 4.1.3

## 
## Attaching package: 'MBESS'

## The following object is masked from 'package:psych':
## 
##     cor2cov

library(plyr)

## Warning: package 'plyr' was built under R version 4.1.3

library(nlme)

## Warning: package 'nlme' was built under R version 4.1.3

library(plyr)
library(FactoMineR)

## Warning: package 'FactoMineR' was built under R version 4.1.3

library(factoextra)

## Loading required package: ggplot2

## 
## Attaching package: 'ggplot2'

## The following objects are masked from 'package:psych':
## 
##     %+%, alpha

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

library(parameters)

## Warning: package 'parameters' was built under R version 4.1.3

## Registered S3 method overwritten by 'parameters':
##   method                         from      
##   format.parameters_distribution datawizard

library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v tibble  3.1.6     v dplyr   1.0.8
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## v purrr   0.3.4

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x ggplot2::%+%()     masks psych::%+%()
## x ggplot2::alpha()   masks psych::alpha()
## x dplyr::arrange()   masks plyr::arrange()
## x dplyr::collapse()  masks nlme::collapse()
## x purrr::compact()   masks plyr::compact()
## x dplyr::count()     masks plyr::count()
## x dplyr::failwith()  masks plyr::failwith()
## x dplyr::filter()    masks stats::filter()
## x dplyr::id()        masks plyr::id()
## x dplyr::lag()       masks stats::lag()
## x dplyr::mutate()    masks plyr::mutate()
## x dplyr::rename()    masks plyr::rename()
## x dplyr::summarise() masks plyr::summarise()
## x dplyr::summarize() masks plyr::summarize()

library(see)

## Warning: package 'see' was built under R version 4.1.3

library(performance)

## Warning: package 'performance' was built under R version 4.1.3

library(semPlot)

## Warning: package 'semPlot' was built under R version 4.1.3

library(psych)
library(PCDimension)

## Loading required package: ClassDiscovery

## Loading required package: cluster

## Loading required package: oompaBase

library(datawizard)

## Warning: package 'datawizard' was built under R version 4.1.3

## 
## Attaching package: 'datawizard'

## The following object is masked from 'package:tidyr':
## 
##     extract

library(tidymodels)

## Warning: package 'tidymodels' was built under R version 4.1.3

## -- Attaching packages -------------------------------------- tidymodels 0.2.0 --

## v broom        0.8.0     v rsample      0.1.1
## v dials        0.1.1     v tune         0.2.0
## v infer        1.0.0     v workflows    0.2.6
## v modeldata    0.1.1     v workflowsets 0.2.1
## v parsnip      0.2.1     v yardstick    0.0.9
## v recipes      0.2.0

## Warning: package 'broom' was built under R version 4.1.3

## Warning: package 'dials' was built under R version 4.1.3

## Warning: package 'scales' was built under R version 4.1.3

## Warning: package 'parsnip' was built under R version 4.1.3

## Warning: package 'tune' was built under R version 4.1.3

## Warning: package 'workflows' was built under R version 4.1.3

## Warning: package 'workflowsets' was built under R version 4.1.3

## -- Conflicts ----------------------------------------- tidymodels_conflicts() --
## x ggplot2::%+%()        masks psych::%+%()
## x scales::alpha()       masks ggplot2::alpha(), psych::alpha()
## x dplyr::arrange()      masks plyr::arrange()
## x dplyr::collapse()     masks nlme::collapse()
## x purrr::compact()      masks plyr::compact()
## x dplyr::count()        masks plyr::count()
## x scales::discard()     masks purrr::discard()
## x datawizard::extract() masks tidyr::extract()
## x dplyr::failwith()     masks plyr::failwith()
## x dplyr::filter()       masks stats::filter()
## x recipes::fixed()      masks stringr::fixed()
## x dplyr::id()           masks plyr::id()
## x dplyr::lag()          masks stats::lag()
## x yardstick::mae()      masks performance::mae()
## x dplyr::mutate()       masks plyr::mutate()
## x infer::p_value()      masks parameters::p_value()
## x tune::parameters()    masks dials::parameters(), parameters::parameters()
## x dplyr::rename()       masks plyr::rename()
## x yardstick::rmse()     masks performance::rmse()
## x dials::smoothness()   masks datawizard::smoothness(), parameters::smoothness()
## x yardstick::spec()     masks readr::spec()
## x recipes::step()       masks stats::step()
## x dplyr::summarise()    masks plyr::summarise()
## x dplyr::summarize()    masks plyr::summarize()
## x recipes::update()     masks stats4::update(), stats::update()
## * Use suppressPackageStartupMessages() to eliminate package startup messages

library(missForest)
library(missMDA)

## Warning: package 'missMDA' was built under R version 4.1.3

library(MASS)

## Warning: package 'MASS' was built under R version 4.1.3

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

library(softImpute)

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

## Loaded softImpute 1.4-1

## 
## Attaching package: 'softImpute'

## The following object is masked from 'package:tidyr':
## 
##     complete

library(dplyr)
library(tidyr)
library(ggplot2)
library(devtools)

## Loading required package: usethis

## 
## Attaching package: 'devtools'

## The following object is masked from 'package:recipes':
## 
##     check

library(VIM)

## Loading required package: colorspace

## Loading required package: grid

## VIM is ready to use.

## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues

## 
## Attaching package: 'VIM'

## The following object is masked from 'package:recipes':
## 
##     prepare

## The following object is masked from 'package:missForest':
## 
##     nrmse

## The following object is masked from 'package:datasets':
## 
##     sleep

library(corrplot)

## corrplot 0.92 loaded

library(qgraph)

## Warning: package 'qgraph' was built under R version 4.1.3

library(EFA.MRFA)

## 
## Attaching package: 'EFA.MRFA'

## The following object is masked from 'package:purrr':
## 
##     transpose

library(EFAtools)

## Warning: package 'EFAtools' was built under R version 4.1.3

## 
## Attaching package: 'EFAtools'

## The following object is masked from 'package:psych':
## 
##     KMO

library(EFA.dimensions)

## 
## Attaching package: 'EFA.dimensions'

## The following objects are masked from 'package:EFAtools':
## 
##     PARALLEL, SMT

## The following object is masked from 'package:FactoMineR':
## 
##     PCA

library(RGenData)
library(nFactors)

## Warning: package 'nFactors' was built under R version 4.1.3

## Loading required package: lattice

## 
## Attaching package: 'nFactors'

## The following object is masked from 'package:lattice':
## 
##     parallel

library(paran)

## Warning: package 'paran' was built under R version 4.1.3

# Is the data suitable for Factor Analysis
parameters::check_factorstructure(holzinger.completeData_3)

## # Is the data suitable for Factor Analysis?
## 
##   - KMO: The Kaiser, Meyer, Olkin (KMO) measure of sampling adequacy suggests that data seems appropriate for factor analysis (KMO = 0.73).
##   - Sphericity: Bartlett's test of sphericity suggests that there is sufficient significant correlation in the data for factor analysis (Chisq(36) = 568.93, p < .001).

corrplot::corrplot(cor(holzinger.completeData_3), type = "lower", method = "circle")

correlations <- cor(holzinger.completeData_3)
round(correlations,2)

##         WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## WrdMean    1.00    0.68    0.78    0.40    0.54    0.44   0.36  0.21    0.37
## SntComp    0.68    1.00    0.51    0.48    0.38    0.60   0.17  0.30    0.08
## OddWrds    0.78    0.51    1.00    0.34    0.54    0.36   0.36  0.20    0.38
## MxdArit    0.40    0.48    0.34    1.00    0.75    0.83   0.35  0.33    0.27
## Remndrs    0.54    0.38    0.54    0.75    1.00    0.66   0.52  0.26    0.44
## MissNum    0.44    0.60    0.36    0.83    0.66    1.00   0.37  0.35    0.30
## Gloves     0.36    0.17    0.36    0.35    0.52    0.37   1.00  0.41    0.49
## Boots      0.21    0.30    0.20    0.33    0.26    0.35   0.41  1.00    0.58
## Hatchts    0.37    0.08    0.38    0.27    0.44    0.30   0.49  0.58    1.00

round(cor(holzinger[,2:10]),2)

##         WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## WrdMean    1.00    0.68    0.78    0.37    0.51    0.41   0.32  0.25    0.36
## SntComp    0.68    1.00    0.58    0.50    0.43    0.58   0.12  0.32    0.12
## OddWrds    0.78    0.58    1.00    0.40    0.53    0.46   0.40  0.25    0.40
## MxdArit    0.37    0.50    0.40    1.00    0.76    0.84   0.35  0.31    0.27
## Remndrs    0.51    0.43    0.53    0.76    1.00    0.68   0.47  0.24    0.41
## MissNum    0.41    0.58    0.46    0.84    0.68    1.00   0.38  0.34    0.32
## Gloves     0.32    0.12    0.40    0.35    0.47    0.38   1.00  0.42    0.53
## Boots      0.25    0.32    0.25    0.31    0.24    0.34   0.42  1.00    0.55
## Hatchts    0.36    0.12    0.40    0.27    0.41    0.32   0.53  0.55    1.00

symnum(correlations)

##         W S O MA R MN G B H
## WrdMean 1                  
## SntComp , 1                
## OddWrds , . 1              
## MxdArit . . . 1            
## Remndrs . . . ,  1         
## MissNum . . . +  , 1       
## Gloves  .   . .  . .  1    
## Boots     .   .    .  . 1  
## Hatchts .   .    . .  . . 1
## attr(,"legend")
## [1] 0 ' ' 0.3 '.' 0.6 ',' 0.8 '+' 0.9 '*' 0.95 'B' 1

# determining number of correlations above .30
correlations <- cor(holzinger.completeData_3)

#number of coefficients => .30 off-diagonal
BigR <- sum(correlations >= abs(.30) & correlations < abs(1), na.rm = T)/2
BigR

## [1] 30

# total number of off-diagonal elements
totR <- length(holzinger.completeData_3)*(length(holzinger.completeData_3)-1)/2
totR

## [1] 36

# percent of off-diagonal elements > .30
(BigR/totR)*100

## [1] 83.33333

# determinant of a correlation matrix
det(cor(holzinger.completeData_3))

## [1] 0.002695812

psych::KMO(holzinger.completeData_3)

## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = holzinger.completeData_3)
## Overall MSA =  0.73
## MSA for each item = 
## WrdMean SntComp OddWrds MxdArit Remndrs MissNum  Gloves   Boots Hatchts 
##    0.71    0.63    0.84    0.74    0.82    0.77    0.86    0.58    0.61

psych::cortest.bartlett(cor(holzinger.completeData_3), n=nrow(holzinger.completeData_3))

## $chisq
## [1] 568.9274
## 
## $p.value
## [1] 4.498225e-97
## 
## $df
## [1] 36

# CD works with complete data. no missing
EFA.dimensions::DIMTESTS(holzinger.completeData_3, corkind = "pearson",
         tests = c('CD','EMPKC','HULL','RAWPAR','NEVALSGT1'), display=2)

## 
## 
## COMPARISON DATA

## 
## The number of factors according to the Comparison Data test (from EFAtools) = 5

## 
## 
## EMPIRICAL KAISER CRITERION

## 
## Kind of correlations analyzed: from user

##     Nfactors    Eigenvalue    Reference Values
##            1         4.482               1.686
##            2         1.362               1.542
##            3         1.101               1.390
##            4         0.797               1.231
##            5         0.501               1.062
##            6         0.280               1.000
##            7         0.215               1.000
##            8         0.152               1.000
##            9         0.110               1.000

## 
## The number of factors according to the Empirical Kaiser Criterion = 1

## 
## 
## HULL METHOD

## 
## The number of factors according to the Hull method test (from EFAtools) = 1

## 
## 
## PARALLEL ANALYSIS

## 
## The entered data is a correlation matrix.

## 
## Randomization method: generated data

## 
## Type of correlations specified for the real data eigenvalues: from user

## 
## Type of correlations specified for the random data eigenvalues: pearson

## 
## Extraction Method: Principal Components

## 
## Variables = 9

## 
## Cases = 101

## 
## Ndatasets = 100

## 
## Percentile = 95

## 
## Compare the Real Data eigenvalues below to the corresponding

## random data Mean and Percentile eigenvalues

##    Root   Real Data    Mean   Percentile
##       1       4.482   1.486        1.625
##       2       1.362   1.325        1.436
##       3       1.101   1.190        1.270
##       4       0.797   1.080        1.155
##       5       0.501   0.981        1.058
##       6       0.280   0.882        0.947
##       7       0.215   0.786        0.862
##       8       0.152   0.689        0.757
##       9       0.110   0.581        0.660

## 
## The number of factors according to the parallel analysis = 1

##

## 
## 
## NUMBER OF EIGENVALUES > 1

## 
## The entered data is a correlation matrix.

## 
## Number of cases in the data file = 101

## 
## Number of variables in the data file = 9

## 
## Specified kind of correlations for this analysis: from user

## 
## 
## Total Variance Explained (Initial Eigenvalues):

##             Eigenvalues    Proportion of Variance    Cumulative Prop. Variance
## Factor 1           4.48                      0.50                         0.50
## Factor 2           1.36                      0.15                         0.65
## Factor 3           1.10                      0.12                         0.77
## Factor 4           0.80                      0.09                         0.86
## Factor 5           0.50                      0.06                         0.92
## Factor 6           0.28                      0.03                         0.95
## Factor 7           0.21                      0.02                         0.97
## Factor 8           0.15                      0.02                         0.99
## Factor 9           0.11                      0.01                         1.00

## 
## The number of eigenvalues greater than one = 3

## 
## 
## DIMTESTS results:

##           # of Factors:
## CD                    5
## EMPKC                 1
## HULL                  1
## RAWPAR                1
## NEVALSGT1             3

#Factorability of a correlation matrix
EFA.dimensions::FACTORABILITY(holzinger.completeData_3, corkind='pearson', Ncases=NULL, verbose=TRUE)

## 
## 
## Three methods of assessing the factorability of a correlation matrix or raw data set:

## 
## Specified kind of correlations for this analysis: Pearson

## 
## 
## The determinant of the correlation matrix should be > 0.00001 for factorability.

## 
## The determinant is 0.0026958 which is > 0.00001, indicating factorability.

## 
## 
## The Bartlett test of whether a correlation matrix is significantly different

## from an identity matrix (wherein all of the off-diagonal elements are zero):

## 
## chisq = 568.927356923569    df= 36     p = 4.49822548177856e-97

## 
## A significant difference is required for factorability.

## 
## 
## The Kaiser-Meyer-Olkin measure of sampling adequacy (MSA):

##         Variable MSA
## WrdMean         0.71
## SntComp         0.63
## OddWrds         0.84
## MxdArit         0.74
## Remndrs         0.82
## MissNum         0.77
## Gloves          0.86
## Boots           0.58
## Hatchts         0.61

## 
## The overall measure of sampling adequacy (MSA) = 0.73

## 
## The Kaiser & Rice (1974) interpretation guidelines for MSA values:

## 
##     KMO >= .9 is marvelous
##     KMO in the .80s is mertitorious
##     KMO in the .70s is middling
##     KMO in the .60s is medicore
##     KMO in the .50s is miserable
##     KMO < .5 is unacceptable

## 
## Consider excluding items with KMO values < .5 and then re-run the FACTORABILITY analyses.

## 
## The overall KMO coefficient indicates the proportion of

## variance in the variables that might be caused by underlying

## factors. If the variables share common factors, then the

## overall KMO coefficient should be close to 1.0. The overall

## KMO indicates the extent to which there is at least one

## latent factor underlying the variables. The overall KMO

## index is considered particularly meaningful when the cases

## to variables ratio is less than 1:5. The KMO coefficient for

## a variable is a kind of summary index of how much a

## variable overlaps with the other variables.

# Local dependence
EFA.dimensions::LOCALDEP(holzinger.completeData_3, corkind = 'pearson', verbose=TRUE)

## 
## 
## Eigenvalues & residuals correlations after partialling out the first component

## 
## Number of cases = 101

## 
## Number of variables = 9

## 
## Specified kind of correlations for this analysis: Pearson

## 
## 
## Total Variance Explained (Initial Eigenvalues):

##             Eigenvalues    Proportion of Variance    Cumulative Prop. Variance
## Factor 1           4.48                      0.50                         0.50
## Factor 2           1.36                      0.15                         0.65
## Factor 3           1.10                      0.12                         0.77
## Factor 4           0.80                      0.09                         0.86
## Factor 5           0.50                      0.06                         0.92
## Factor 6           0.28                      0.03                         0.95
## Factor 7           0.21                      0.02                         0.97
## Factor 8           0.15                      0.02                         0.99
## Factor 9           0.11                      0.01                         1.00

## 
## Ratio of the 1st to the 2nd eigenvalue = 3.3

## 
## 
## Original correlation matrix:

##         WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## WrdMean    1.00    0.68    0.78    0.40    0.54    0.44   0.36  0.21    0.37
## SntComp    0.68    1.00    0.51    0.48    0.38    0.60   0.17  0.30    0.08
## OddWrds    0.78    0.51    1.00    0.34    0.54    0.36   0.36  0.20    0.38
## MxdArit    0.40    0.48    0.34    1.00    0.75    0.83   0.35  0.33    0.27
## Remndrs    0.54    0.38    0.54    0.75    1.00    0.66   0.52  0.26    0.44
## MissNum    0.44    0.60    0.36    0.83    0.66    1.00   0.37  0.35    0.30
## Gloves     0.36    0.17    0.36    0.35    0.52    0.37   1.00  0.41    0.49
## Boots      0.21    0.30    0.20    0.33    0.26    0.35   0.41  1.00    0.58
## Hatchts    0.37    0.08    0.38    0.27    0.44    0.30   0.49  0.58    1.00

## 
## Residual correlations after partialling out the first component:

##         WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## WrdMean    1.00    0.34    0.51   -0.49   -0.26   -0.45  -0.23 -0.37   -0.14
## SntComp    0.34    1.00    0.06   -0.10   -0.44    0.12  -0.43 -0.09   -0.53
## OddWrds    0.51    0.06    1.00   -0.49   -0.13   -0.49  -0.15 -0.31   -0.07
## MxdArit   -0.49   -0.10   -0.49    1.00    0.30    0.56  -0.25 -0.15   -0.36
## Remndrs   -0.26   -0.44   -0.13    0.30    1.00    0.01   0.02 -0.37   -0.10
## MissNum   -0.45    0.12   -0.49    0.56    0.01    1.00  -0.26 -0.14   -0.33
## Gloves    -0.23   -0.43   -0.15   -0.25    0.02   -0.26   1.00  0.12    0.21
## Boots     -0.37   -0.09   -0.31   -0.15   -0.37   -0.14   0.12  1.00    0.39
## Hatchts   -0.14   -0.53   -0.07   -0.36   -0.10   -0.33   0.21  0.39    1.00

## 
##  The # of residual correlations >= .1 is  8   percentage = 0.22
##  The # of residual correlations >= .2 is  6   percentage = 0.17
##  The # of residual correlations >= .3 is  4   percentage = 0.11
##  The # of residual correlations >= .4 is  2   percentage = 0.06
##  The # of residual correlations >= .5 is  2   percentage = 0.06
##  The # of residual correlations >= .6 is  0   percentage = 0
##  The # of residual correlations >= .7 is  0   percentage = 0
##  The # of residual correlations >= .8 is  0   percentage = 0

# How many factors to retain
evalues<-eigen(cor(holzinger.completeData_3))$values
evalues

## [1] 4.4821617 1.3617792 1.1013649 0.7971958 0.5011669 0.2799404 0.2145597
## [8] 0.1516015 0.1102300

scree(holzinger.completeData_3,factors=FALSE,pc=TRUE)

# percent of variance explained by each factors
(evalues/sum(evalues))*100

## [1] 49.801797 15.130880 12.237388  8.857731  5.568521  3.110448  2.383997
## [8]  1.684462  1.224777

plot(nFactors::nScree(cor(holzinger.completeData_3),model="factors"))

paran::paran(holzinger.completeData_3, iterations = 500, centile = 95)

## 
## Using eigendecomposition of correlation matrix.
## Computing: 10%  20%  30%  40%  50%  60%  70%  80%  90%  100%
## 
## 
## Results of Horn's Parallel Analysis for component retention
## 500 iterations, using the 95 centile estimate
## 
## -------------------------------------------------- 
## Component   Adjusted    Unadjusted    Estimated 
##             Eigenvalue  Eigenvalue    Bias 
## -------------------------------------------------- 
## 1           3.843800    4.482161      0.638361
## -------------------------------------------------- 
## 
## Adjusted eigenvalues > 1 indicate dimensions to retain.
## (1 components retained)

EFA.MRFA::hullEFA(holzinger.completeData_3, extr = "ML", index_hull = 'CFI')

## HULL METHOD - CFI INDEX
## 
##         q      f          g       st
##         0      0.0000    36       0.0000 
##         1      0.6947    27       4.1668 
##         2      0.8429    19       1.4814 
##         3      0.9304    12       0.0000 
## 
## Number of advised dimensions: 1 
## 
## -----------------------------------------------

EFA.MRFA::parallelMRFA(holzinger.completeData_3,Ndatsets = 500, display = FALSE)

EGAnet::EGA(holzinger.completeData_3, model='glasso')

## Number of communities: 3 
## 
## WrdMean SntComp OddWrds MxdArit Remndrs MissNum  Gloves   Boots Hatchts 
##       2       2       2       1       1       1       3       3       3 
## 
## Methods:
##                                                          
## Correlations =          auto (from qgraph)               
## Model =                 glasso                           
## Algorithm =             walktrap                         
## Unidimensional Method = louvain with consensus clustering

qgraph::qgraph(cor(holzinger.completeData_3), cut=.300, details=F, posCol="orangered3",
       negCol="green", labels=names(holzinger.completeData_3))

RGenData::EFACompData(holzinger.completeData_3, f.max = 6, graph=T)

## Number of factors to retain:  4

nfactor <- nFactors::nScree(cor(holzinger.completeData_3), model="components")
summary(nfactor)

## Report For a nScree Class 
## 
## Details: components 
## 
##   Eigenvalues Prop Cumu Par.Analysis Pred.eig     OC Acc.factor     AF
## 1           4    0    0            1        2                NA (< AF)
## 2           1    0    1            1        1                 3       
## 3           1    0    1            1        1 (< OC)          0       
## 4           1    0    1            1        1                 0       
## 5           1    0    1            1        0                 0       
## 6           0    0    1            1        0                 0       
## 7           0    0    1            1        0                 0       
## 8           0    0    1            1       NA                 0       
## 9           0    0    1            1       NA                NA       
## 
## 
##  Number of factors retained by index 
## 
##   noc naf nparallel nkaiser
## 1   3   1         3       3

n <- parameters::n_factors(holzinger.completeData_3, type = "FA", package = "all")
n

## # Method Agreement Procedure:
## 
## The choice of 3 dimensions is supported by 8 (36.36%) methods out of 22 (CNG, t, p, Optimal coordinates, Parallel analysis, Kaiser criterion, Scree (SE), EGA (glasso)).

summary(n)

##   n_Factors n_Methods
## 1         1         2
## 2         2         1
## 3         3         8
## 4         4         7
## 5         6         2
## 6         7         2

qgraph::qgraph(cor(holzinger.completeData_3), cut=.300, details=F, posCol="orangered3",
       negCol="green", labels=names(holzinger.completeData_3))

# Three factor model

holzinger.long[,1:2] <- NULL
datasets.list <- list(holzinger.long[1:101,],holzinger.long[102:202,],
        holzinger.long[203:303,],holzinger.long[304:404,],holzinger.long[405:505,]) 
holzinger.f3 <- fa.pooled(datasets.list,3, gam=0, fm="ml", residuals =TRUE, SMC=TRUE) 


# off-diagonal residuals
resd <- round(holzinger.f3$residual,3)
resd <- lowerMat(resd)

##         WrdMn SntCm OddWr MxdAr Rmndr MssNm Glovs Boots Htcht
## WrdMean  0.01                                                
## SntComp  0.00  0.47                                          
## OddWrds  0.00  0.01  0.41                                    
## MxdArit  0.00 -0.01  0.00  0.07                              
## Remndrs  0.00 -0.08  0.06  0.01  0.31                        
## MissNum  0.00  0.06 -0.02  0.00 -0.02  0.19                  
## Gloves   0.00 -0.14  0.05 -0.01  0.09  0.00  0.64            
## Boots    0.00  0.25 -0.08 -0.02 -0.11  0.06  0.03  0.66      
## Hatchts  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00

#largest residual
max(abs(resd),na.rm = TRUE, diag=FALSE)

## [1] 0.247

min(abs(resd),na.rm = TRUE, diag=FALSE)

## [1] 0

# display a histogram of residuals
hist(resd, col = "red", main = "residuals", xlab = "Residuals")

# print structure matrix
print(sort=T, digits=3, cut= 0, holzinger.f3$Structure)

## 
## Loadings:
##         ML3   ML2   ML1  
## MxdArit 0.960 0.404 0.218
## Remndrs 0.772 0.559 0.475
## MissNum 0.896 0.497 0.276
## WrdMean 0.463 0.996 0.365
## SntComp 0.508 0.664 0.073
## OddWrds 0.362 0.762 0.371
## Gloves  0.407 0.396 0.518
## Boots   0.324 0.160 0.536
## Hatchts 0.270 0.374 0.997
## 
##                  ML3   ML2   ML1
## SS loadings    3.267 3.058 2.174
## Proportion Var 0.363 0.340 0.242
## Cumulative Var 0.363 0.703 0.944

# display names
names(holzinger.f3)

##  [1] "residual"      "dof"           "chi"           "nh"           
##  [5] "rms"           "EPVAL"         "crms"          "EBIC"         
##  [9] "ESABIC"        "fit"           "fit.off"       "sd"           
## [13] "factors"       "complexity"    "n.obs"         "objective"    
## [17] "criteria"      "STATISTIC"     "PVAL"          "Call"         
## [21] "null.model"    "null.dof"      "null.chisq"    "TLI"          
## [25] "RMSEA"         "BIC"           "SABIC"         "r.scores"     
## [29] "R2"            "valid"         "score.cor"     "rotation"     
## [33] "hyperplane"    "communality"   "communalities" "uniquenesses" 
## [37] "values"        "e.values"      "loadings"      "model"        
## [41] "fm"            "rot.mat"       "Phi"           "Structure"    
## [45] "method"        "R2.scores"     "r"             "np.obs"       
## [49] "fn"            "Vaccounted"    "cis"

print(holzinger.f3$rms)

## [1] 0.05996106

# path diagram
par(mfrow=c(1,2))
fa.diagram(holzinger.f3, sort = T, cut = 0, digits = 3, cex=.75)

5. Reliability analysis

# subscale alpha reliability
CTT::reliability(holzinger.completeData_3[,c("WrdMean","SntComp","OddWrds")], itemal = F)

## You will find additional options and better formatting using itemAnalysis().

## 
##  Number of Items 
##  3 
## 
##  Number of Examinees 
##  101 
## 
##  Coefficient Alpha 
##  0.852

CTT::reliability(holzinger.completeData_3[,c("MxdArit","Remndrs","MissNum")], itemal = F)

## You will find additional options and better formatting using itemAnalysis().

## 
##  Number of Items 
##  3 
## 
##  Number of Examinees 
##  101 
## 
##  Coefficient Alpha 
##  0.9

CTT::reliability(holzinger.completeData_3[,c("Gloves","Boots","Hatchts")], itemal = F)

## You will find additional options and better formatting using itemAnalysis().

## 
##  Number of Items 
##  3 
## 
##  Number of Examinees 
##  101 
## 
##  Coefficient Alpha 
##  0.744

# bootstrapped confidence interval for alpha
library(userfriendlyscience)

## 
## Attaching package: 'userfriendlyscience'

## The following object is masked from 'package:lattice':
## 
##     oneway

## The following object is masked from 'package:nlme':
## 
##     getData

## The following object is masked from 'package:psych':
## 
##     reliability

library(MBESS)
MBESS::ci.reliability(holzinger.completeData_3[,c("WrdMean","SntComp","OddWrds")],
               interval.type = "31", B=1000, conf.level = .99, type = "omega")

## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative

## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative

## $est
## [1] 0.8723179
## 
## $se
## [1] 0.02088919
## 
## $ci.lower
## [1] 0.8185109
## 
## $ci.upper
## [1] 0.9261249
## 
## $conf.level
## [1] 0.99
## 
## $type
## [1] "omega"
## 
## $interval.type
## [1] "maximum likelihood (wald ci)"

MBESS::ci.reliability(holzinger.completeData_3[,c("MxdArit","Remndrs","MissNum")],
               interval.type = "31", B=1000, conf.level = .99, type = "omega")

## $est
## [1] 0.9050276
## 
## $se
## [1] 0.01625001
## 
## $ci.lower
## [1] 0.8631703
## 
## $ci.upper
## [1] 0.9468848
## 
## $conf.level
## [1] 0.99
## 
## $type
## [1] "omega"
## 
## $interval.type
## [1] "maximum likelihood (wald ci)"

MBESS::ci.reliability(holzinger.completeData_3[,c("Gloves","Boots","Hatchts")],
               interval.type = "31", B=1000, conf.level = .99, type = "omega")

## $est
## [1] 0.7513082
## 
## $se
## [1] 0.04217824
## 
## $ci.lower
## [1] 0.6426643
## 
## $ci.upper
## [1] 0.8599522
## 
## $conf.level
## [1] 0.99
## 
## $type
## [1] "omega"
## 
## $interval.type
## [1] "maximum likelihood (wald ci)"

scaleReliability(holzinger[, c("WrdMean","SntComp","OddWrds")])

## 
## Information about this analysis:
## 
##                  Dataframe: holzinger[, c("WrdMean", "SntComp", "OddWrds")]
##                      Items: all
##               Observations: 101
##      Positive correlations: 3 out of 3 (100%)
## 
## Estimates assuming interval level:
## 
##              Omega (total): 0.87
##       Omega (hierarchical): 0.86
##    Revelle's omega (total): 0.88
## Greatest Lower Bound (GLB): 0.9
##              Coefficient H: 0.93
##           Cronbach's alpha: 0.86
## Confidence intervals:
##              Omega (total): [0.83, 0.91]
##           Cronbach's alpha: [0.82, 0.91]
## 
## (Estimates assuming ordinal level not computed, as at least one item seems to have more than 8 levels; the highest number of distinct levels is 51 and the highest range is 91. This last number needs to be lower than 9 for the polychoric function to work. If this is unexpected, you may want to check for outliers.)
## 
## Note: the normal point estimate and confidence interval for omega are based on the procedure suggested by Dunn, Baguley & Brunsden (2013) using the MBESS function ci.reliability, whereas the psych package point estimate was suggested in Revelle & Zinbarg (2008). See the help ('?scaleStructure') for more information.

scaleReliability(holzinger[, c("MxdArit","Remndrs","MissNum")])

## 
## Information about this analysis:
## 
##                  Dataframe: holzinger[, c("MxdArit", "Remndrs", "MissNum")]
##                      Items: all
##               Observations: 101
##      Positive correlations: 3 out of 3 (100%)
## 
## Estimates assuming interval level:
## 
##              Omega (total): 0.91
##       Omega (hierarchical): 0.01
##    Revelle's omega (total): 0.91
## Greatest Lower Bound (GLB): 0.93
##              Coefficient H: 0.95
##           Cronbach's alpha: 0.91
## Confidence intervals:
##              Omega (total): [0.88, 0.94]
##           Cronbach's alpha: [0.87, 0.94]
## 
## (Estimates assuming ordinal level not computed, as at least one item seems to have more than 8 levels; the highest number of distinct levels is 53 and the highest range is 79. This last number needs to be lower than 9 for the polychoric function to work. If this is unexpected, you may want to check for outliers.)
## 
## Note: the normal point estimate and confidence interval for omega are based on the procedure suggested by Dunn, Baguley & Brunsden (2013) using the MBESS function ci.reliability, whereas the psych package point estimate was suggested in Revelle & Zinbarg (2008). See the help ('?scaleStructure') for more information.

scaleReliability(holzinger[, c("Gloves","Boots","Hatchts")])

## 
## Information about this analysis:
## 
##                  Dataframe: holzinger[, c("Gloves", "Boots", "Hatchts")]
##                      Items: all
##               Observations: 101
##      Positive correlations: 3 out of 3 (100%)
## 
## Estimates assuming interval level:
## 
##              Omega (total): 0.75
##       Omega (hierarchical): 0.73
##    Revelle's omega (total): 0.76
## Greatest Lower Bound (GLB): 0.78
##              Coefficient H: 0.79
##           Cronbach's alpha: 0.75
## Confidence intervals:
##              Omega (total): [0.67, 0.84]
##           Cronbach's alpha: [0.66, 0.83]
## 
## (Estimates assuming ordinal level not computed, as at least one item seems to have more than 8 levels; the highest number of distinct levels is 50 and the highest range is 84. This last number needs to be lower than 9 for the polychoric function to work. If this is unexpected, you may want to check for outliers.)
## 
## Note: the normal point estimate and confidence interval for omega are based on the procedure suggested by Dunn, Baguley & Brunsden (2013) using the MBESS function ci.reliability, whereas the psych package point estimate was suggested in Revelle & Zinbarg (2008). See the help ('?scaleStructure') for more information.

factorscores <- psych::factor.scores(holzinger.completeData_3, 
                holzinger.f3, Phi = NULL, method="Thurstone",impute="none") 
(View(factor.scores <- factorscores$scores))

## NULL

cor(factorscores$scores)

##           ML3       ML2       ML1
## ML3 1.0000000 0.4978971 0.2335568
## ML2 0.4978971 1.0000000 0.3676876
## ML1 0.2335568 0.3676876 1.0000000

David Flora (Article, 2012)

Dr. Asghar Minaei

2022-07-12

Data Screening and Testing of Assumptions

1.Data Accuracy

2.Univariate Outliers

3.Multivariate Outliers

INFLUENCE

4.Statistical assumptions

Factor analysis with and without oulier case

4.Dealing With Missing Data

5. Reliability analysis