4.Dealing With Missing Data
# Generate randomly missing values
set.seed(1234)
library(missForest)
holzinger.miss <- prodNA(holzinger[,2:10], noNA = 0.10) # generate missing values
# Frequencies and percentages of missing data
colSums(is.na((holzinger.miss)))
## WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## 6 10 12 10 6 14 12 9 11
round(colSums(is.na(holzinger.miss))/nrow(holzinger.miss) * 100, 2)
## WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## 5.94 9.90 11.88 9.90 5.94 13.86 11.88 8.91 10.89
(sum(is.na(holzinger.miss))/(nrow(holzinger.miss)*ncol(holzinger.miss)))*100
## [1] 9.90099
# Number and Proportion of Usable Cases
p <- mice::md.pairs(holzinger.miss)
p$mr
## WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## WrdMean 0 6 6 4 6 6 6 6 6
## SntComp 10 0 9 9 8 9 8 10 10
## OddWrds 12 11 0 11 11 9 12 10 11
## MxdArit 8 9 9 0 10 9 7 10 7
## Remndrs 6 4 5 6 0 5 5 5 6
## MissNum 14 13 11 13 13 0 12 12 12
## Gloves 12 10 12 9 11 10 0 12 12
## Boots 9 9 7 9 8 7 9 0 9
## Hatchts 11 11 10 8 11 9 11 11 0
round(p$mr/colSums(is.na(holzinger.miss)),2)
## WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## WrdMean 0.0 1.00 1.00 0.67 1.00 1.00 1.00 1.00 1.00
## SntComp 1.0 0.00 0.90 0.90 0.80 0.90 0.80 1.00 1.00
## OddWrds 1.0 0.92 0.00 0.92 0.92 0.75 1.00 0.83 0.92
## MxdArit 0.8 0.90 0.90 0.00 1.00 0.90 0.70 1.00 0.70
## Remndrs 1.0 0.67 0.83 1.00 0.00 0.83 0.83 0.83 1.00
## MissNum 1.0 0.93 0.79 0.93 0.93 0.00 0.86 0.86 0.86
## Gloves 1.0 0.83 1.00 0.75 0.92 0.83 0.00 1.00 1.00
## Boots 1.0 1.00 0.78 1.00 0.89 0.78 1.00 0.00 1.00
## Hatchts 1.0 1.00 0.91 0.73 1.00 0.82 1.00 1.00 0.00
# correlations between the variables in the data set and the missing data indicators
R <- is.na(holzinger.miss)
R
## WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## 1 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 2 FALSE TRUE FALSE TRUE FALSE FALSE TRUE FALSE FALSE
## 3 FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE TRUE
## 4 TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE
## 5 FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
## 6 FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE FALSE
## 7 FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 8 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
## 9 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 10 FALSE FALSE TRUE TRUE FALSE FALSE FALSE FALSE FALSE
## 11 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 12 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 13 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 14 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 15 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 16 FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE
## 17 FALSE FALSE FALSE FALSE FALSE TRUE TRUE FALSE FALSE
## 18 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 19 FALSE FALSE TRUE FALSE FALSE FALSE FALSE TRUE FALSE
## 20 FALSE FALSE FALSE FALSE TRUE FALSE FALSE TRUE FALSE
## 21 FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
## 22 FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 23 FALSE FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE
## 24 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 25 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 26 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 27 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 28 FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
## 29 FALSE FALSE FALSE FALSE FALSE TRUE FALSE TRUE FALSE
## 30 FALSE TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
## 31 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 32 FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE TRUE
## 33 FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE
## 34 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 35 FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 36 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 37 FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
## 38 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 39 FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
## 40 FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE TRUE
## 41 TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 42 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 43 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 44 FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 45 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 46 FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE
## 47 FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
## 48 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 49 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 50 FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE
## 51 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 52 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 53 FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE
## 54 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
## 55 FALSE FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE
## 56 FALSE FALSE TRUE FALSE TRUE TRUE FALSE FALSE FALSE
## 57 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 58 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 59 FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
## 60 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 61 FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
## 62 FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE
## 63 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 64 FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
## 65 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 66 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
## 67 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 68 FALSE TRUE TRUE FALSE FALSE TRUE FALSE FALSE FALSE
## 69 FALSE FALSE FALSE FALSE FALSE TRUE FALSE TRUE FALSE
## 70 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 71 TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 72 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 73 FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
## 74 FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE
## 75 FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
## 76 FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE TRUE
## 77 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 78 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 79 TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE
## 80 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 81 FALSE TRUE FALSE FALSE TRUE FALSE TRUE FALSE FALSE
## 82 FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE
## 83 FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 84 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 85 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 86 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 87 FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE
## 88 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
## 89 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
## 90 FALSE FALSE FALSE FALSE FALSE TRUE TRUE FALSE FALSE
## 91 FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
## 92 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
## 93 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 94 FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 95 FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE
## 96 FALSE FALSE TRUE FALSE FALSE FALSE FALSE TRUE FALSE
## 97 FALSE FALSE FALSE TRUE FALSE TRUE FALSE FALSE TRUE
## 98 TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 99 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 100 FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
## 101 TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
colnames(R) <- paste("R.", colnames(holzinger.miss), sep = "")
dat <- data.frame(holzinger.miss, R)
# non-zero correlations with the missing data indicators imply a deviation from MCAR
round(cor(dat[, 1:9], data.matrix(dat[, 10:18]), use = "pairwise.complete.obs"), 2)
## Warning in cor(dat[, 1:9], data.matrix(dat[, 10:18]), use =
## "pairwise.complete.obs"): the standard deviation is zero
## R.WrdMean R.SntComp R.OddWrds R.MxdArit R.Remndrs R.MissNum R.Gloves
## WrdMean NA 0.01 0.00 0.11 0.07 0.03 0.19
## SntComp 0.14 NA -0.15 0.11 -0.09 -0.09 0.19
## OddWrds 0.08 0.01 NA 0.06 0.11 -0.16 0.11
## MxdArit 0.08 0.00 -0.11 NA 0.12 0.10 0.02
## Remndrs 0.13 0.04 0.04 0.07 NA -0.05 0.06
## MissNum 0.19 0.02 -0.34 0.25 0.12 NA 0.11
## Gloves 0.07 -0.04 -0.03 0.06 0.20 -0.13 NA
## Boots -0.23 -0.05 0.03 0.20 0.07 -0.04 0.03
## Hatchts 0.00 -0.13 -0.07 0.16 0.07 -0.16 0.07
## R.Boots R.Hatchts
## WrdMean -0.16 -0.12
## SntComp -0.18 -0.12
## OddWrds -0.17 -0.10
## MxdArit 0.04 0.00
## Remndrs -0.02 -0.12
## MissNum 0.00 0.05
## Gloves -0.03 -0.01
## Boots NA -0.04
## Hatchts -0.13 NA
# missing values pattern
mice::md.pattern(holzinger.miss[ , 1:9], plot = TRUE) # pattern of missingness

## WrdMean Remndrs Boots SntComp MxdArit Hatchts OddWrds Gloves MissNum
## 39 1 1 1 1 1 1 1 1 1 0
## 5 1 1 1 1 1 1 1 1 0 1
## 6 1 1 1 1 1 1 1 0 1 1
## 2 1 1 1 1 1 1 1 0 0 2
## 5 1 1 1 1 1 1 0 1 1 1
## 1 1 1 1 1 1 1 0 1 0 2
## 6 1 1 1 1 1 0 1 1 1 1
## 1 1 1 1 1 1 0 1 1 0 2
## 1 1 1 1 1 1 0 0 1 1 2
## 1 1 1 1 1 0 1 1 1 1 1
## 2 1 1 1 1 0 1 1 0 1 2
## 1 1 1 1 1 0 1 0 1 1 2
## 2 1 1 1 1 0 0 1 1 1 2
## 1 1 1 1 1 0 0 1 1 0 3
## 6 1 1 1 0 1 1 1 1 1 1
## 1 1 1 1 0 1 1 0 1 0 3
## 1 1 1 1 0 0 1 1 0 1 3
## 4 1 1 0 1 1 1 1 1 1 1
## 2 1 1 0 1 1 1 1 1 0 2
## 2 1 1 0 1 1 1 0 1 1 2
## 2 1 0 1 1 1 1 1 1 1 1
## 1 1 0 1 1 1 1 0 1 0 3
## 1 1 0 1 0 1 1 1 1 1 2
## 1 1 0 1 0 1 1 1 0 1 3
## 1 1 0 0 1 1 1 1 1 1 2
## 4 0 1 1 1 1 1 1 1 1 1
## 2 0 1 1 1 0 1 1 1 1 2
## 6 6 9 10 10 11 12 12 14 90
# Imputing missing values using hot deck method
set.seed(1234)
holzinger.miss.hot.deck <- hot.deck::hot.deck(holzinger.miss[,1:9], method = "p.draw")
# Extract the first imputed data set
holzinger.miss.hot.deck.1 <- as.data.frame(holzinger.miss.hot.deck$data[1])
# Imputing missing values using EM method with missMethods package
holzinger.miss.EM <- missMethods::impute_EM(holzinger.miss[,1:9], stochastic = TRUE)
View(holzinger.miss.EM)
# Multiple imputation using EM and MCMC methods with norm2 package
emResult <- norm2::emNorm(holzinger.miss[,1:9])
## Note: Eigen power method failed to converge
## OCCURRED IN: estimate_worst_frac in MOD norm_engine
summary(emResult)
## Predictor (X) variables:
## Mean SD Observed Missing Pct.Missing
## CONST 1 0 101 0 0
##
## Response (Y) variables:
## Mean SD Observed Missing Pct.Missing
## WrdMean 99.64211 15.29978 95 6 5.940594
## SntComp 99.48352 14.51617 91 10 9.900990
## OddWrds 100.28090 14.10350 89 12 11.881188
## MxdArit 99.39560 14.95243 91 10 9.900990
## Remndrs 99.61053 14.62168 95 6 5.940594
## MissNum 99.66667 15.10403 87 14 13.861386
## Gloves 100.30337 14.72506 89 12 11.881188
## Boots 100.71739 15.02549 92 9 8.910891
## Hatchts 99.32222 14.20621 90 11 10.891089
##
## Missingness patterns for response (Y) variables
## (. denotes observed value, m denotes missing value)
## (variable names are displayed vertically)
## (rightmost column is the frequency):
## WSOMRM H
## rndxeiG a
## dtddmslBt
## MCWAnsooc
## eorrdNvoh
## amdiruett
## npstsmsss
## ......... 39
## ........m 6
## .......m. 4
## ......m.. 6
## .....m... 5
## .....m..m 1
## .....m.m. 2
## .....mm.. 2
## ....m.... 2
## ....m..m. 1
## ...m..... 1
## ...m....m 2
## ...m..m.. 2
## ...m.m..m 1
## ..m...... 5
## ..m.....m 1
## ..m....m. 2
## ..m..m... 1
## ..m.mm... 1
## ..mm..... 1
## .m....... 6
## .m..m.... 1
## .m..m.m.. 1
## .m.m..m.. 1
## .mm..m... 1
## m........ 4
## m..m..... 2
##
## Method: EM
## Prior: "uniform"
## Convergence criterion: 1e-05
## Iterations: 16
## Converged: TRUE
## Max. rel. difference: 6.4619e-06
## -2 Loglikelihood: 4720.574
## -2 Log-posterior density: 4720.574
## Worst fraction missing information: NA
##
## Eigen power method failed to converge
## OCCURRED IN: estimate_worst_frac in MOD norm_engine
##
## Estimated coefficients (beta):
## WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots
## CONST 100.168 99.71267 100.2361 99.95225 100.0323 99.60731 100.5306 100.2666
## Hatchts
## CONST 99.03079
##
## Estimated covariance matrix (sigma):
## WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves
## WrdMean 235.13673 144.77946 170.58483 87.99104 118.59161 102.54104 84.22429
## SntComp 144.77946 202.72683 106.97100 102.86199 79.62614 123.74094 31.84955
## OddWrds 170.58483 106.97100 201.12780 76.55909 109.45757 83.79457 79.97757
## MxdArit 87.99104 102.86199 76.55909 225.18505 160.07526 190.79802 76.35147
## Remndrs 118.59161 79.62614 109.45757 160.07526 208.17272 149.13254 104.48805
## MissNum 102.54104 123.74094 83.79457 190.79802 149.13254 228.34206 83.72404
## Gloves 84.22429 31.84955 79.97757 76.35147 104.48805 83.72404 211.96934
## Boots 43.35593 66.84546 41.14095 67.62947 53.69282 85.23938 83.75301
## Hatchts 85.22380 21.10028 82.06294 59.35629 98.69285 69.59804 109.01125
## Boots Hatchts
## WrdMean 43.35593 85.22380
## SntComp 66.84546 21.10028
## OddWrds 41.14095 82.06294
## MxdArit 67.62947 59.35629
## Remndrs 53.69282 98.69285
## MissNum 85.23938 69.59804
## Gloves 83.75301 109.01125
## Boots 222.70261 117.23885
## Hatchts 117.23885 201.76040
mcmcResult <- norm2::mcmcNorm(emResult, iter = 5000, impute.every = 1000,multicycle=10)
summary(mcmcResult)
##
## Method: MCMC
## Prior: "uniform"
## Iterations: 5000
## Cycles per iteration: 10
## Impute every k iterations, k = 1000
## No. of imputations created: 5
## series.worst present: FALSE
## series.beta present: TRUE
## series.sigma present: TRUE
set.seed(123456)
par(mfrow=c(4,2))
for(i in 1:8){
acf(mcmcResult$series.beta[,i],lag.max=1000, main=colnames(holzinger.miss[,1:9])[i])}

par(mfrow=c(4,2))
for(i in 1:8)
plot(mcmcResult$series.beta[,i],ylab="", main=colnames(holzinger.miss[,1:9])[i])

# Combine imputed data sets
imp.list <- as.list(NULL)
for(m in 1:50){
mcmcResult <- norm2:: mcmcNorm(emResult, iter=100)
imp.list[[m]] <- norm2::impNorm(mcmcResult) }
# Let use the third imputed data set
# holzinger.completeData_3 <- imp.list[[3]]
# head(holzinger.completeData_3)
# Imputing missing values using MI method in mice package
holzinger.miss.MI <- mice::mice(holzinger.miss[,1:9])
##
## iter imp variable
## 1 1 WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## 1 2 WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## 1 3 WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## 1 4 WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## 1 5 WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## 2 1 WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## 2 2 WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## 2 3 WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## 2 4 WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## 2 5 WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## 3 1 WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## 3 2 WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## 3 3 WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## 3 4 WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## 3 5 WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## 4 1 WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## 4 2 WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## 4 3 WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## 4 4 WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## 4 5 WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## 5 1 WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## 5 2 WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## 5 3 WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## 5 4 WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## 5 5 WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
plot(holzinger.miss.MI)



holzinger.long <- mice::complete(holzinger.miss.MI, "long", include = F)
# Let use the third iteration
holzinger.completeData_3 <- mice::complete(holzinger.miss.MI, 3)
head(holzinger.completeData_3)
## WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## 1 144 76 147 89 132 83 134 86 143
## 2 102 99 105 128 117 116 121 124 120
## 3 72 88 84 84 71 81 70 101 86
## 4 126 135 115 123 117 133 121 117 119
## 5 117 117 113 118 111 125 94 114 95
## 6 120 124 103 125 122 113 106 120 92
################### Doing Exploratory Factor Analysis ######################
#loading required packages
library(psych)
library(GPArotation)
## Warning: package 'GPArotation' was built under R version 4.1.3
library(stats4)
library(MBESS)
## Warning: package 'MBESS' was built under R version 4.1.3
##
## Attaching package: 'MBESS'
## The following object is masked from 'package:psych':
##
## cor2cov
library(plyr)
## Warning: package 'plyr' was built under R version 4.1.3
library(nlme)
## Warning: package 'nlme' was built under R version 4.1.3
library(plyr)
library(FactoMineR)
## Warning: package 'FactoMineR' was built under R version 4.1.3
library(factoextra)
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(parameters)
## Warning: package 'parameters' was built under R version 4.1.3
## Registered S3 method overwritten by 'parameters':
## method from
## format.parameters_distribution datawizard
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## v purrr 0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x ggplot2::%+%() masks psych::%+%()
## x ggplot2::alpha() masks psych::alpha()
## x dplyr::arrange() masks plyr::arrange()
## x dplyr::collapse() masks nlme::collapse()
## x purrr::compact() masks plyr::compact()
## x dplyr::count() masks plyr::count()
## x dplyr::failwith() masks plyr::failwith()
## x dplyr::filter() masks stats::filter()
## x dplyr::id() masks plyr::id()
## x dplyr::lag() masks stats::lag()
## x dplyr::mutate() masks plyr::mutate()
## x dplyr::rename() masks plyr::rename()
## x dplyr::summarise() masks plyr::summarise()
## x dplyr::summarize() masks plyr::summarize()
library(see)
## Warning: package 'see' was built under R version 4.1.3
library(performance)
## Warning: package 'performance' was built under R version 4.1.3
library(semPlot)
## Warning: package 'semPlot' was built under R version 4.1.3
library(psych)
library(PCDimension)
## Loading required package: ClassDiscovery
## Loading required package: cluster
## Loading required package: oompaBase
library(datawizard)
## Warning: package 'datawizard' was built under R version 4.1.3
##
## Attaching package: 'datawizard'
## The following object is masked from 'package:tidyr':
##
## extract
library(tidymodels)
## Warning: package 'tidymodels' was built under R version 4.1.3
## -- Attaching packages -------------------------------------- tidymodels 0.2.0 --
## v broom 0.8.0 v rsample 0.1.1
## v dials 0.1.1 v tune 0.2.0
## v infer 1.0.0 v workflows 0.2.6
## v modeldata 0.1.1 v workflowsets 0.2.1
## v parsnip 0.2.1 v yardstick 0.0.9
## v recipes 0.2.0
## Warning: package 'broom' was built under R version 4.1.3
## Warning: package 'dials' was built under R version 4.1.3
## Warning: package 'scales' was built under R version 4.1.3
## Warning: package 'parsnip' was built under R version 4.1.3
## Warning: package 'tune' was built under R version 4.1.3
## Warning: package 'workflows' was built under R version 4.1.3
## Warning: package 'workflowsets' was built under R version 4.1.3
## -- Conflicts ----------------------------------------- tidymodels_conflicts() --
## x ggplot2::%+%() masks psych::%+%()
## x scales::alpha() masks ggplot2::alpha(), psych::alpha()
## x dplyr::arrange() masks plyr::arrange()
## x dplyr::collapse() masks nlme::collapse()
## x purrr::compact() masks plyr::compact()
## x dplyr::count() masks plyr::count()
## x scales::discard() masks purrr::discard()
## x datawizard::extract() masks tidyr::extract()
## x dplyr::failwith() masks plyr::failwith()
## x dplyr::filter() masks stats::filter()
## x recipes::fixed() masks stringr::fixed()
## x dplyr::id() masks plyr::id()
## x dplyr::lag() masks stats::lag()
## x yardstick::mae() masks performance::mae()
## x dplyr::mutate() masks plyr::mutate()
## x infer::p_value() masks parameters::p_value()
## x tune::parameters() masks dials::parameters(), parameters::parameters()
## x dplyr::rename() masks plyr::rename()
## x yardstick::rmse() masks performance::rmse()
## x dials::smoothness() masks datawizard::smoothness(), parameters::smoothness()
## x yardstick::spec() masks readr::spec()
## x recipes::step() masks stats::step()
## x dplyr::summarise() masks plyr::summarise()
## x dplyr::summarize() masks plyr::summarize()
## x recipes::update() masks stats4::update(), stats::update()
## * Use suppressPackageStartupMessages() to eliminate package startup messages
library(missForest)
library(missMDA)
## Warning: package 'missMDA' was built under R version 4.1.3
library(MASS)
## Warning: package 'MASS' was built under R version 4.1.3
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(softImpute)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Loaded softImpute 1.4-1
##
## Attaching package: 'softImpute'
## The following object is masked from 'package:tidyr':
##
## complete
library(dplyr)
library(tidyr)
library(ggplot2)
library(devtools)
## Loading required package: usethis
##
## Attaching package: 'devtools'
## The following object is masked from 'package:recipes':
##
## check
library(VIM)
## Loading required package: colorspace
## Loading required package: grid
## VIM is ready to use.
## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues
##
## Attaching package: 'VIM'
## The following object is masked from 'package:recipes':
##
## prepare
## The following object is masked from 'package:missForest':
##
## nrmse
## The following object is masked from 'package:datasets':
##
## sleep
library(corrplot)
## corrplot 0.92 loaded
library(qgraph)
## Warning: package 'qgraph' was built under R version 4.1.3
library(EFA.MRFA)
##
## Attaching package: 'EFA.MRFA'
## The following object is masked from 'package:purrr':
##
## transpose
library(EFAtools)
## Warning: package 'EFAtools' was built under R version 4.1.3
##
## Attaching package: 'EFAtools'
## The following object is masked from 'package:psych':
##
## KMO
library(EFA.dimensions)
##
## Attaching package: 'EFA.dimensions'
## The following objects are masked from 'package:EFAtools':
##
## PARALLEL, SMT
## The following object is masked from 'package:FactoMineR':
##
## PCA
library(RGenData)
library(nFactors)
## Warning: package 'nFactors' was built under R version 4.1.3
## Loading required package: lattice
##
## Attaching package: 'nFactors'
## The following object is masked from 'package:lattice':
##
## parallel
library(paran)
## Warning: package 'paran' was built under R version 4.1.3
# Is the data suitable for Factor Analysis
parameters::check_factorstructure(holzinger.completeData_3)
## # Is the data suitable for Factor Analysis?
##
## - KMO: The Kaiser, Meyer, Olkin (KMO) measure of sampling adequacy suggests that data seems appropriate for factor analysis (KMO = 0.73).
## - Sphericity: Bartlett's test of sphericity suggests that there is sufficient significant correlation in the data for factor analysis (Chisq(36) = 568.93, p < .001).
corrplot::corrplot(cor(holzinger.completeData_3), type = "lower", method = "circle")

correlations <- cor(holzinger.completeData_3)
round(correlations,2)
## WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## WrdMean 1.00 0.68 0.78 0.40 0.54 0.44 0.36 0.21 0.37
## SntComp 0.68 1.00 0.51 0.48 0.38 0.60 0.17 0.30 0.08
## OddWrds 0.78 0.51 1.00 0.34 0.54 0.36 0.36 0.20 0.38
## MxdArit 0.40 0.48 0.34 1.00 0.75 0.83 0.35 0.33 0.27
## Remndrs 0.54 0.38 0.54 0.75 1.00 0.66 0.52 0.26 0.44
## MissNum 0.44 0.60 0.36 0.83 0.66 1.00 0.37 0.35 0.30
## Gloves 0.36 0.17 0.36 0.35 0.52 0.37 1.00 0.41 0.49
## Boots 0.21 0.30 0.20 0.33 0.26 0.35 0.41 1.00 0.58
## Hatchts 0.37 0.08 0.38 0.27 0.44 0.30 0.49 0.58 1.00
round(cor(holzinger[,2:10]),2)
## WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## WrdMean 1.00 0.68 0.78 0.37 0.51 0.41 0.32 0.25 0.36
## SntComp 0.68 1.00 0.58 0.50 0.43 0.58 0.12 0.32 0.12
## OddWrds 0.78 0.58 1.00 0.40 0.53 0.46 0.40 0.25 0.40
## MxdArit 0.37 0.50 0.40 1.00 0.76 0.84 0.35 0.31 0.27
## Remndrs 0.51 0.43 0.53 0.76 1.00 0.68 0.47 0.24 0.41
## MissNum 0.41 0.58 0.46 0.84 0.68 1.00 0.38 0.34 0.32
## Gloves 0.32 0.12 0.40 0.35 0.47 0.38 1.00 0.42 0.53
## Boots 0.25 0.32 0.25 0.31 0.24 0.34 0.42 1.00 0.55
## Hatchts 0.36 0.12 0.40 0.27 0.41 0.32 0.53 0.55 1.00
symnum(correlations)
## W S O MA R MN G B H
## WrdMean 1
## SntComp , 1
## OddWrds , . 1
## MxdArit . . . 1
## Remndrs . . . , 1
## MissNum . . . + , 1
## Gloves . . . . . 1
## Boots . . . . 1
## Hatchts . . . . . . 1
## attr(,"legend")
## [1] 0 ' ' 0.3 '.' 0.6 ',' 0.8 '+' 0.9 '*' 0.95 'B' 1
# determining number of correlations above .30
correlations <- cor(holzinger.completeData_3)
#number of coefficients => .30 off-diagonal
BigR <- sum(correlations >= abs(.30) & correlations < abs(1), na.rm = T)/2
BigR
## [1] 30
# total number of off-diagonal elements
totR <- length(holzinger.completeData_3)*(length(holzinger.completeData_3)-1)/2
totR
## [1] 36
# percent of off-diagonal elements > .30
(BigR/totR)*100
## [1] 83.33333
# determinant of a correlation matrix
det(cor(holzinger.completeData_3))
## [1] 0.002695812
psych::KMO(holzinger.completeData_3)
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = holzinger.completeData_3)
## Overall MSA = 0.73
## MSA for each item =
## WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## 0.71 0.63 0.84 0.74 0.82 0.77 0.86 0.58 0.61
psych::cortest.bartlett(cor(holzinger.completeData_3), n=nrow(holzinger.completeData_3))
## $chisq
## [1] 568.9274
##
## $p.value
## [1] 4.498225e-97
##
## $df
## [1] 36
# CD works with complete data. no missing
EFA.dimensions::DIMTESTS(holzinger.completeData_3, corkind = "pearson",
tests = c('CD','EMPKC','HULL','RAWPAR','NEVALSGT1'), display=2)
##
##
## COMPARISON DATA
##
## The number of factors according to the Comparison Data test (from EFAtools) = 5
##
##
## EMPIRICAL KAISER CRITERION
##
## Kind of correlations analyzed: from user
## Nfactors Eigenvalue Reference Values
## 1 4.482 1.686
## 2 1.362 1.542
## 3 1.101 1.390
## 4 0.797 1.231
## 5 0.501 1.062
## 6 0.280 1.000
## 7 0.215 1.000
## 8 0.152 1.000
## 9 0.110 1.000
##
## The number of factors according to the Empirical Kaiser Criterion = 1
##
##
## HULL METHOD
##
## The number of factors according to the Hull method test (from EFAtools) = 1
##
##
## PARALLEL ANALYSIS
##
## The entered data is a correlation matrix.
##
## Randomization method: generated data
##
## Type of correlations specified for the real data eigenvalues: from user
##
## Type of correlations specified for the random data eigenvalues: pearson
##
## Extraction Method: Principal Components
##
## Variables = 9
##
## Cases = 101
##
## Ndatasets = 100
##
## Percentile = 95
##
## Compare the Real Data eigenvalues below to the corresponding
## random data Mean and Percentile eigenvalues
## Root Real Data Mean Percentile
## 1 4.482 1.486 1.625
## 2 1.362 1.325 1.436
## 3 1.101 1.190 1.270
## 4 0.797 1.080 1.155
## 5 0.501 0.981 1.058
## 6 0.280 0.882 0.947
## 7 0.215 0.786 0.862
## 8 0.152 0.689 0.757
## 9 0.110 0.581 0.660
##
## The number of factors according to the parallel analysis = 1
##
##
##
## NUMBER OF EIGENVALUES > 1
##
## The entered data is a correlation matrix.
##
## Number of cases in the data file = 101
##
## Number of variables in the data file = 9
##
## Specified kind of correlations for this analysis: from user
##
##
## Total Variance Explained (Initial Eigenvalues):
## Eigenvalues Proportion of Variance Cumulative Prop. Variance
## Factor 1 4.48 0.50 0.50
## Factor 2 1.36 0.15 0.65
## Factor 3 1.10 0.12 0.77
## Factor 4 0.80 0.09 0.86
## Factor 5 0.50 0.06 0.92
## Factor 6 0.28 0.03 0.95
## Factor 7 0.21 0.02 0.97
## Factor 8 0.15 0.02 0.99
## Factor 9 0.11 0.01 1.00
##
## The number of eigenvalues greater than one = 3
##
##
## DIMTESTS results:
## # of Factors:
## CD 5
## EMPKC 1
## HULL 1
## RAWPAR 1
## NEVALSGT1 3
#Factorability of a correlation matrix
EFA.dimensions::FACTORABILITY(holzinger.completeData_3, corkind='pearson', Ncases=NULL, verbose=TRUE)
##
##
## Three methods of assessing the factorability of a correlation matrix or raw data set:
##
## Specified kind of correlations for this analysis: Pearson
##
##
## The determinant of the correlation matrix should be > 0.00001 for factorability.
##
## The determinant is 0.0026958 which is > 0.00001, indicating factorability.
##
##
## The Bartlett test of whether a correlation matrix is significantly different
## from an identity matrix (wherein all of the off-diagonal elements are zero):
##
## chisq = 568.927356923569 df= 36 p = 4.49822548177856e-97
##
## A significant difference is required for factorability.
##
##
## The Kaiser-Meyer-Olkin measure of sampling adequacy (MSA):
## Variable MSA
## WrdMean 0.71
## SntComp 0.63
## OddWrds 0.84
## MxdArit 0.74
## Remndrs 0.82
## MissNum 0.77
## Gloves 0.86
## Boots 0.58
## Hatchts 0.61
##
## The overall measure of sampling adequacy (MSA) = 0.73
##
## The Kaiser & Rice (1974) interpretation guidelines for MSA values:
##
## KMO >= .9 is marvelous
## KMO in the .80s is mertitorious
## KMO in the .70s is middling
## KMO in the .60s is medicore
## KMO in the .50s is miserable
## KMO < .5 is unacceptable
##
## Consider excluding items with KMO values < .5 and then re-run the FACTORABILITY analyses.
##
## The overall KMO coefficient indicates the proportion of
## variance in the variables that might be caused by underlying
## factors. If the variables share common factors, then the
## overall KMO coefficient should be close to 1.0. The overall
## KMO indicates the extent to which there is at least one
## latent factor underlying the variables. The overall KMO
## index is considered particularly meaningful when the cases
## to variables ratio is less than 1:5. The KMO coefficient for
## a variable is a kind of summary index of how much a
## variable overlaps with the other variables.
# Local dependence
EFA.dimensions::LOCALDEP(holzinger.completeData_3, corkind = 'pearson', verbose=TRUE)
##
##
## Eigenvalues & residuals correlations after partialling out the first component
##
## Number of cases = 101
##
## Number of variables = 9
##
## Specified kind of correlations for this analysis: Pearson
##
##
## Total Variance Explained (Initial Eigenvalues):
## Eigenvalues Proportion of Variance Cumulative Prop. Variance
## Factor 1 4.48 0.50 0.50
## Factor 2 1.36 0.15 0.65
## Factor 3 1.10 0.12 0.77
## Factor 4 0.80 0.09 0.86
## Factor 5 0.50 0.06 0.92
## Factor 6 0.28 0.03 0.95
## Factor 7 0.21 0.02 0.97
## Factor 8 0.15 0.02 0.99
## Factor 9 0.11 0.01 1.00
##
## Ratio of the 1st to the 2nd eigenvalue = 3.3
##
##
## Original correlation matrix:
## WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## WrdMean 1.00 0.68 0.78 0.40 0.54 0.44 0.36 0.21 0.37
## SntComp 0.68 1.00 0.51 0.48 0.38 0.60 0.17 0.30 0.08
## OddWrds 0.78 0.51 1.00 0.34 0.54 0.36 0.36 0.20 0.38
## MxdArit 0.40 0.48 0.34 1.00 0.75 0.83 0.35 0.33 0.27
## Remndrs 0.54 0.38 0.54 0.75 1.00 0.66 0.52 0.26 0.44
## MissNum 0.44 0.60 0.36 0.83 0.66 1.00 0.37 0.35 0.30
## Gloves 0.36 0.17 0.36 0.35 0.52 0.37 1.00 0.41 0.49
## Boots 0.21 0.30 0.20 0.33 0.26 0.35 0.41 1.00 0.58
## Hatchts 0.37 0.08 0.38 0.27 0.44 0.30 0.49 0.58 1.00
##
## Residual correlations after partialling out the first component:
## WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## WrdMean 1.00 0.34 0.51 -0.49 -0.26 -0.45 -0.23 -0.37 -0.14
## SntComp 0.34 1.00 0.06 -0.10 -0.44 0.12 -0.43 -0.09 -0.53
## OddWrds 0.51 0.06 1.00 -0.49 -0.13 -0.49 -0.15 -0.31 -0.07
## MxdArit -0.49 -0.10 -0.49 1.00 0.30 0.56 -0.25 -0.15 -0.36
## Remndrs -0.26 -0.44 -0.13 0.30 1.00 0.01 0.02 -0.37 -0.10
## MissNum -0.45 0.12 -0.49 0.56 0.01 1.00 -0.26 -0.14 -0.33
## Gloves -0.23 -0.43 -0.15 -0.25 0.02 -0.26 1.00 0.12 0.21
## Boots -0.37 -0.09 -0.31 -0.15 -0.37 -0.14 0.12 1.00 0.39
## Hatchts -0.14 -0.53 -0.07 -0.36 -0.10 -0.33 0.21 0.39 1.00
##
## The # of residual correlations >= .1 is 8 percentage = 0.22
## The # of residual correlations >= .2 is 6 percentage = 0.17
## The # of residual correlations >= .3 is 4 percentage = 0.11
## The # of residual correlations >= .4 is 2 percentage = 0.06
## The # of residual correlations >= .5 is 2 percentage = 0.06
## The # of residual correlations >= .6 is 0 percentage = 0
## The # of residual correlations >= .7 is 0 percentage = 0
## The # of residual correlations >= .8 is 0 percentage = 0
# How many factors to retain
evalues<-eigen(cor(holzinger.completeData_3))$values
evalues
## [1] 4.4821617 1.3617792 1.1013649 0.7971958 0.5011669 0.2799404 0.2145597
## [8] 0.1516015 0.1102300
scree(holzinger.completeData_3,factors=FALSE,pc=TRUE)

# percent of variance explained by each factors
(evalues/sum(evalues))*100
## [1] 49.801797 15.130880 12.237388 8.857731 5.568521 3.110448 2.383997
## [8] 1.684462 1.224777
plot(nFactors::nScree(cor(holzinger.completeData_3),model="factors"))

paran::paran(holzinger.completeData_3, iterations = 500, centile = 95)
##
## Using eigendecomposition of correlation matrix.
## Computing: 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
##
##
## Results of Horn's Parallel Analysis for component retention
## 500 iterations, using the 95 centile estimate
##
## --------------------------------------------------
## Component Adjusted Unadjusted Estimated
## Eigenvalue Eigenvalue Bias
## --------------------------------------------------
## 1 3.843800 4.482161 0.638361
## --------------------------------------------------
##
## Adjusted eigenvalues > 1 indicate dimensions to retain.
## (1 components retained)
EFA.MRFA::hullEFA(holzinger.completeData_3, extr = "ML", index_hull = 'CFI')
## HULL METHOD - CFI INDEX
##
## q f g st
## 0 0.0000 36 0.0000
## 1 0.6947 27 4.1668
## 2 0.8429 19 1.4814
## 3 0.9304 12 0.0000
##
## Number of advised dimensions: 1
##
## -----------------------------------------------

EFA.MRFA::parallelMRFA(holzinger.completeData_3,Ndatsets = 500, display = FALSE)

EGAnet::EGA(holzinger.completeData_3, model='glasso')

## Number of communities: 3
##
## WrdMean SntComp OddWrds MxdArit Remndrs MissNum Gloves Boots Hatchts
## 2 2 2 1 1 1 3 3 3
##
## Methods:
##
## Correlations = auto (from qgraph)
## Model = glasso
## Algorithm = walktrap
## Unidimensional Method = louvain with consensus clustering
qgraph::qgraph(cor(holzinger.completeData_3), cut=.300, details=F, posCol="orangered3",
negCol="green", labels=names(holzinger.completeData_3))

RGenData::EFACompData(holzinger.completeData_3, f.max = 6, graph=T)
## Number of factors to retain: 4

nfactor <- nFactors::nScree(cor(holzinger.completeData_3), model="components")
summary(nfactor)
## Report For a nScree Class
##
## Details: components
##
## Eigenvalues Prop Cumu Par.Analysis Pred.eig OC Acc.factor AF
## 1 4 0 0 1 2 NA (< AF)
## 2 1 0 1 1 1 3
## 3 1 0 1 1 1 (< OC) 0
## 4 1 0 1 1 1 0
## 5 1 0 1 1 0 0
## 6 0 0 1 1 0 0
## 7 0 0 1 1 0 0
## 8 0 0 1 1 NA 0
## 9 0 0 1 1 NA NA
##
##
## Number of factors retained by index
##
## noc naf nparallel nkaiser
## 1 3 1 3 3
n <- parameters::n_factors(holzinger.completeData_3, type = "FA", package = "all")
n
## # Method Agreement Procedure:
##
## The choice of 3 dimensions is supported by 8 (36.36%) methods out of 22 (CNG, t, p, Optimal coordinates, Parallel analysis, Kaiser criterion, Scree (SE), EGA (glasso)).
summary(n)
## n_Factors n_Methods
## 1 1 2
## 2 2 1
## 3 3 8
## 4 4 7
## 5 6 2
## 6 7 2
qgraph::qgraph(cor(holzinger.completeData_3), cut=.300, details=F, posCol="orangered3",
negCol="green", labels=names(holzinger.completeData_3))

# Three factor model
holzinger.long[,1:2] <- NULL
datasets.list <- list(holzinger.long[1:101,],holzinger.long[102:202,],
holzinger.long[203:303,],holzinger.long[304:404,],holzinger.long[405:505,])
holzinger.f3 <- fa.pooled(datasets.list,3, gam=0, fm="ml", residuals =TRUE, SMC=TRUE)
# off-diagonal residuals
resd <- round(holzinger.f3$residual,3)
resd <- lowerMat(resd)
## WrdMn SntCm OddWr MxdAr Rmndr MssNm Glovs Boots Htcht
## WrdMean 0.01
## SntComp 0.00 0.47
## OddWrds 0.00 0.01 0.41
## MxdArit 0.00 -0.01 0.00 0.07
## Remndrs 0.00 -0.08 0.06 0.01 0.31
## MissNum 0.00 0.06 -0.02 0.00 -0.02 0.19
## Gloves 0.00 -0.14 0.05 -0.01 0.09 0.00 0.64
## Boots 0.00 0.25 -0.08 -0.02 -0.11 0.06 0.03 0.66
## Hatchts 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
#largest residual
max(abs(resd),na.rm = TRUE, diag=FALSE)
## [1] 0.247
min(abs(resd),na.rm = TRUE, diag=FALSE)
## [1] 0
# display a histogram of residuals
hist(resd, col = "red", main = "residuals", xlab = "Residuals")

# print structure matrix
print(sort=T, digits=3, cut= 0, holzinger.f3$Structure)
##
## Loadings:
## ML3 ML2 ML1
## MxdArit 0.960 0.404 0.218
## Remndrs 0.772 0.559 0.475
## MissNum 0.896 0.497 0.276
## WrdMean 0.463 0.996 0.365
## SntComp 0.508 0.664 0.073
## OddWrds 0.362 0.762 0.371
## Gloves 0.407 0.396 0.518
## Boots 0.324 0.160 0.536
## Hatchts 0.270 0.374 0.997
##
## ML3 ML2 ML1
## SS loadings 3.267 3.058 2.174
## Proportion Var 0.363 0.340 0.242
## Cumulative Var 0.363 0.703 0.944
# display names
names(holzinger.f3)
## [1] "residual" "dof" "chi" "nh"
## [5] "rms" "EPVAL" "crms" "EBIC"
## [9] "ESABIC" "fit" "fit.off" "sd"
## [13] "factors" "complexity" "n.obs" "objective"
## [17] "criteria" "STATISTIC" "PVAL" "Call"
## [21] "null.model" "null.dof" "null.chisq" "TLI"
## [25] "RMSEA" "BIC" "SABIC" "r.scores"
## [29] "R2" "valid" "score.cor" "rotation"
## [33] "hyperplane" "communality" "communalities" "uniquenesses"
## [37] "values" "e.values" "loadings" "model"
## [41] "fm" "rot.mat" "Phi" "Structure"
## [45] "method" "R2.scores" "r" "np.obs"
## [49] "fn" "Vaccounted" "cis"
print(holzinger.f3$rms)
## [1] 0.05996106
# path diagram
par(mfrow=c(1,2))
fa.diagram(holzinger.f3, sort = T, cut = 0, digits = 3, cex=.75)

5. Reliability analysis
# subscale alpha reliability
CTT::reliability(holzinger.completeData_3[,c("WrdMean","SntComp","OddWrds")], itemal = F)
## You will find additional options and better formatting using itemAnalysis().
##
## Number of Items
## 3
##
## Number of Examinees
## 101
##
## Coefficient Alpha
## 0.852
CTT::reliability(holzinger.completeData_3[,c("MxdArit","Remndrs","MissNum")], itemal = F)
## You will find additional options and better formatting using itemAnalysis().
##
## Number of Items
## 3
##
## Number of Examinees
## 101
##
## Coefficient Alpha
## 0.9
CTT::reliability(holzinger.completeData_3[,c("Gloves","Boots","Hatchts")], itemal = F)
## You will find additional options and better formatting using itemAnalysis().
##
## Number of Items
## 3
##
## Number of Examinees
## 101
##
## Coefficient Alpha
## 0.744
# bootstrapped confidence interval for alpha
library(userfriendlyscience)
##
## Attaching package: 'userfriendlyscience'
## The following object is masked from 'package:lattice':
##
## oneway
## The following object is masked from 'package:nlme':
##
## getData
## The following object is masked from 'package:psych':
##
## reliability
library(MBESS)
MBESS::ci.reliability(holzinger.completeData_3[,c("WrdMean","SntComp","OddWrds")],
interval.type = "31", B=1000, conf.level = .99, type = "omega")
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
## $est
## [1] 0.8723179
##
## $se
## [1] 0.02088919
##
## $ci.lower
## [1] 0.8185109
##
## $ci.upper
## [1] 0.9261249
##
## $conf.level
## [1] 0.99
##
## $type
## [1] "omega"
##
## $interval.type
## [1] "maximum likelihood (wald ci)"
MBESS::ci.reliability(holzinger.completeData_3[,c("MxdArit","Remndrs","MissNum")],
interval.type = "31", B=1000, conf.level = .99, type = "omega")
## $est
## [1] 0.9050276
##
## $se
## [1] 0.01625001
##
## $ci.lower
## [1] 0.8631703
##
## $ci.upper
## [1] 0.9468848
##
## $conf.level
## [1] 0.99
##
## $type
## [1] "omega"
##
## $interval.type
## [1] "maximum likelihood (wald ci)"
MBESS::ci.reliability(holzinger.completeData_3[,c("Gloves","Boots","Hatchts")],
interval.type = "31", B=1000, conf.level = .99, type = "omega")
## $est
## [1] 0.7513082
##
## $se
## [1] 0.04217824
##
## $ci.lower
## [1] 0.6426643
##
## $ci.upper
## [1] 0.8599522
##
## $conf.level
## [1] 0.99
##
## $type
## [1] "omega"
##
## $interval.type
## [1] "maximum likelihood (wald ci)"
scaleReliability(holzinger[, c("WrdMean","SntComp","OddWrds")])
##
## Information about this analysis:
##
## Dataframe: holzinger[, c("WrdMean", "SntComp", "OddWrds")]
## Items: all
## Observations: 101
## Positive correlations: 3 out of 3 (100%)
##
## Estimates assuming interval level:
##
## Omega (total): 0.87
## Omega (hierarchical): 0.86
## Revelle's omega (total): 0.88
## Greatest Lower Bound (GLB): 0.9
## Coefficient H: 0.93
## Cronbach's alpha: 0.86
## Confidence intervals:
## Omega (total): [0.83, 0.91]
## Cronbach's alpha: [0.82, 0.91]
##
## (Estimates assuming ordinal level not computed, as at least one item seems to have more than 8 levels; the highest number of distinct levels is 51 and the highest range is 91. This last number needs to be lower than 9 for the polychoric function to work. If this is unexpected, you may want to check for outliers.)
##
## Note: the normal point estimate and confidence interval for omega are based on the procedure suggested by Dunn, Baguley & Brunsden (2013) using the MBESS function ci.reliability, whereas the psych package point estimate was suggested in Revelle & Zinbarg (2008). See the help ('?scaleStructure') for more information.
scaleReliability(holzinger[, c("MxdArit","Remndrs","MissNum")])
##
## Information about this analysis:
##
## Dataframe: holzinger[, c("MxdArit", "Remndrs", "MissNum")]
## Items: all
## Observations: 101
## Positive correlations: 3 out of 3 (100%)
##
## Estimates assuming interval level:
##
## Omega (total): 0.91
## Omega (hierarchical): 0.01
## Revelle's omega (total): 0.91
## Greatest Lower Bound (GLB): 0.93
## Coefficient H: 0.95
## Cronbach's alpha: 0.91
## Confidence intervals:
## Omega (total): [0.88, 0.94]
## Cronbach's alpha: [0.87, 0.94]
##
## (Estimates assuming ordinal level not computed, as at least one item seems to have more than 8 levels; the highest number of distinct levels is 53 and the highest range is 79. This last number needs to be lower than 9 for the polychoric function to work. If this is unexpected, you may want to check for outliers.)
##
## Note: the normal point estimate and confidence interval for omega are based on the procedure suggested by Dunn, Baguley & Brunsden (2013) using the MBESS function ci.reliability, whereas the psych package point estimate was suggested in Revelle & Zinbarg (2008). See the help ('?scaleStructure') for more information.
scaleReliability(holzinger[, c("Gloves","Boots","Hatchts")])
##
## Information about this analysis:
##
## Dataframe: holzinger[, c("Gloves", "Boots", "Hatchts")]
## Items: all
## Observations: 101
## Positive correlations: 3 out of 3 (100%)
##
## Estimates assuming interval level:
##
## Omega (total): 0.75
## Omega (hierarchical): 0.73
## Revelle's omega (total): 0.76
## Greatest Lower Bound (GLB): 0.78
## Coefficient H: 0.79
## Cronbach's alpha: 0.75
## Confidence intervals:
## Omega (total): [0.67, 0.84]
## Cronbach's alpha: [0.66, 0.83]
##
## (Estimates assuming ordinal level not computed, as at least one item seems to have more than 8 levels; the highest number of distinct levels is 50 and the highest range is 84. This last number needs to be lower than 9 for the polychoric function to work. If this is unexpected, you may want to check for outliers.)
##
## Note: the normal point estimate and confidence interval for omega are based on the procedure suggested by Dunn, Baguley & Brunsden (2013) using the MBESS function ci.reliability, whereas the psych package point estimate was suggested in Revelle & Zinbarg (2008). See the help ('?scaleStructure') for more information.
factorscores <- psych::factor.scores(holzinger.completeData_3,
holzinger.f3, Phi = NULL, method="Thurstone",impute="none")
(View(factor.scores <- factorscores$scores))
## NULL
cor(factorscores$scores)
## ML3 ML2 ML1
## ML3 1.0000000 0.4978971 0.2335568
## ML2 0.4978971 1.0000000 0.3676876
## ML1 0.2335568 0.3676876 1.0000000