Libraries used

library(kableExtra) # for tables
library(psych)
library(ggplot2)
library(MVN) # for mvn()
library(parameters) # efa_to_cfa()
library(lavaan) # for cfa()
library(car) # for qqPlot()
library(semTools) # for AVE, compRelSEM(), discriminantValidity
library(apaTables) # for apa.cor.table()

Data

love <- read.csv("love_cloud.csv")

EFA and CFA datasets

Randomly split the original dataset into two, one for the EFA, and one for the CFA.

set.seed(243)
n <- floor(nrow(love)/2)
w_efa <- sample(1:nrow(love), n)
w_cfa <- setdiff(1:nrow(love), w_efa) 
# leave only item variables (block 1)
w1 <- which(names(love) == "Q1")
w2 <- which(names(love) == "Q206")
vars <- names(love)[w1:w2]
aux_efa <- love[w_efa,vars]
aux_cfa <- love[w_cfa,vars]
f <- function(x){
  1*(x == "Not at all") + 2*(x == "A little bit") + 3*(x == "Somewhat") + 4*(x == "Quite a bit") + 5*(x == "Very much")
}
b1 <- as.data.frame(f(as.matrix(aux_efa)))
b2 <- as.data.frame(f(as.matrix(aux_cfa)))

Sample characteristics

Age

summary(love$age[w_efa])

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   18.00   33.00   41.50   44.04   55.00   85.00

sd(love$age[w_efa], na.rm = T)

## [1] 14.39224

summary(love$age[w_cfa])

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   19.00   32.00   41.00   43.55   53.25   87.00

sd(love$age[w_cfa], na.rm = T)

## [1] 15.04731

Gender

summary(factor((love$gender[w_efa])))

##        Man Non-binary      Woman 
##         81          3        264

summary(factor((love$gender[w_efa])))/length(w_efa)

##        Man Non-binary      Woman 
## 0.23275862 0.00862069 0.75862069

summary(factor((love$gender[w_cfa])))

##        Man Non-binary      Woman 
##         87          3        258

summary(factor((love$gender[w_cfa])))/length(w_cfa)

##        Man Non-binary      Woman 
## 0.25000000 0.00862069 0.74137931

transgender

summary(factor((love$transgender[w_efa])))

##  No Yes 
## 345   3

summary(factor((love$transgender[w_efa])))/length(w_efa)

##         No        Yes 
## 0.99137931 0.00862069

summary(factor((love$transgender[w_cfa])))

##  No 
## 348

summary(factor((love$transgender[w_cfa])))/length(w_cfa)

## No 
##  1

Sexual orientation

summary(factor((love$orientation[w_efa])))

##              Asexual             Bisexual          Gay/Lesbian 
##                    9                   25                    9 
##         Heterosexual            Pansexual Prefer not to answer 
##                  295                    4                    3 
##       Sexually Fluid       Something else 
##                    2                    1

summary(factor((love$orientation[w_efa])))/length(w_efa)

##              Asexual             Bisexual          Gay/Lesbian 
##          0.025862069          0.071839080          0.025862069 
##         Heterosexual            Pansexual Prefer not to answer 
##          0.847701149          0.011494253          0.008620690 
##       Sexually Fluid       Something else 
##          0.005747126          0.002873563

summary(factor((love$orientation[w_cfa])))

##              Asexual             Bisexual          Gay/Lesbian 
##                   13                   35                    6 
##         Heterosexual Prefer not to answer       Sexually Fluid 
##                  292                    1                    1

summary(factor((love$orientation[w_cfa])))/length(w_cfa)

##              Asexual             Bisexual          Gay/Lesbian 
##          0.037356322          0.100574713          0.017241379 
##         Heterosexual Prefer not to answer       Sexually Fluid 
##          0.839080460          0.002873563          0.002873563

Race

summary(factor(c(love$race1[w_efa], love$race2[w_efa], love$race3[w_efa], love$race4[w_efa])))

##                     African/Black                             Asian 
##                                35                                15 
##                   Hispanic/Latinx                   Native American 
##                                40                                11 
## Native Hawaiian/ Pacific Islander              Prefer not to answer 
##                                 2                                 1 
##                    Something else                   White/ European 
##                                 5                               275 
##                              NA's 
##                              1008

summary(factor(c(love$race1[w_efa], love$race2[w_efa], love$race3[w_efa], love$race4[w_efa])))/length(w_efa)

##                     African/Black                             Asian 
##                       0.100574713                       0.043103448 
##                   Hispanic/Latinx                   Native American 
##                       0.114942529                       0.031609195 
## Native Hawaiian/ Pacific Islander              Prefer not to answer 
##                       0.005747126                       0.002873563 
##                    Something else                   White/ European 
##                       0.014367816                       0.790229885 
##                              NA's 
##                       2.896551724

summary(factor(c(love$race1[w_cfa], love$race2[w_cfa], love$race3[w_cfa], love$race4[w_cfa])))

##                     African/Black                             Asian 
##                                32                                13 
##                   Hispanic/Latinx      Middle Eastern/North African 
##                                38                                 2 
##                   Native American Native Hawaiian/ Pacific Islander 
##                                 8                                 1 
##              Prefer not to answer                    Something else 
##                                 1                                 3 
##                   White/ European                              NA's 
##                               269                              1025

summary(factor(c(love$race1[w_cfa], love$race2[w_cfa], love$race3[w_cfa], love$race4[w_cfa])))/length(w_cfa)

##                     African/Black                             Asian 
##                       0.091954023                       0.037356322 
##                   Hispanic/Latinx      Middle Eastern/North African 
##                       0.109195402                       0.005747126 
##                   Native American Native Hawaiian/ Pacific Islander 
##                       0.022988506                       0.002873563 
##              Prefer not to answer                    Something else 
##                       0.002873563                       0.008620690 
##                   White/ European                              NA's 
##                       0.772988506                       2.945402299

Rel length

mean(love$rel_length[w_efa], na.rm = T)

## [1] 15.31063

median(love$rel_length[w_efa], na.rm = T)

## [1] 11.3

sd(love$rel_length[w_efa], na.rm = T)

## [1] 12.85766

mean(love$rel_length[w_cfa], na.rm = T)

## [1] 15.00403

median(love$rel_length[w_cfa], na.rm = T)

## [1] 10

sd(love$rel_length[w_cfa], na.rm = T)

## [1] 13.50026

Rel type

summary(factor(love$rel_type[w_efa]))

##                                          Dating 
##                                               8 
##                                         Engaged 
##                                              36 
##                                         Married 
##                                             242 
## Partnered/committed, but not engaged or married 
##                                              62

summary(factor(love$rel_type[w_efa]))/length(w_efa)

##                                          Dating 
##                                      0.02298851 
##                                         Engaged 
##                                      0.10344828 
##                                         Married 
##                                      0.69540230 
## Partnered/committed, but not engaged or married 
##                                      0.17816092

summary(factor(love$rel_type[w_cfa]))

##                                          Dating 
##                                               8 
##                                         Engaged 
##                                              28 
##                                         Married 
##                                             245 
## Partnered/committed, but not engaged or married 
##                                              67

summary(factor(love$rel_type[w_cfa]))/length(w_cfa)

##                                          Dating 
##                                      0.02298851 
##                                         Engaged 
##                                      0.08045977 
##                                         Married 
##                                      0.70402299 
## Partnered/committed, but not engaged or married 
##                                      0.19252874

Living together

summary(factor(love$live[w_efa]))

##  No Yes 
##  11 337

summary(factor(love$live[w_efa]))/length(w_efa)

##        No       Yes 
## 0.0316092 0.9683908

summary(factor(love$live[w_cfa]))

##  No Yes 
##   8 340

summary(factor(love$live[w_cfa]))/length(w_cfa)

##         No        Yes 
## 0.02298851 0.97701149

EFA

Checking multivariate normality on the EFA dataset

Descriptive stats:

mvn1 <- mvn(b1)
mvn1_df <- data.frame(mvn1$Descriptives)

write.csv(mvn1_df, "mvn1_cloud.csv")
median(mvn1_df$Skew)

## [1] -1.234714

min(mvn1_df$Skew)

## [1] -1.889561

max(mvn1_df$Skew)

## [1] -0.101065

median(mvn1_df$Kurtosis)

## [1] 0.9469358

min(mvn1_df$Kurtosis)

## [1] -1.347901

max(mvn1_df$Kurtosis)

## [1] 3.46337

Number of potential outliers:

cutoff <- qchisq(1-0.001, ncol(b1))
mahal <- mahalanobis(b1, colMeans(b1), cov(b1))
s <- summary(mahal < cutoff); s

##    Mode   FALSE    TRUE 
## logical      53     295

Henze-Zirkler test for multivariate normality:

mvn1$multivariateNormality

##            Test       HZ p value MVN
## 1 Henze-Zirkler 3.489327       0  NO

The data is not multivariate normal (do principal axis factoring).

Correlations

cor1 <- cor(b1)
cor1_mat <- as.matrix(cor1)
diag(cor1_mat) <- NA
cor_min <- min(apply(cor1_mat, 2, min, na.rm = T)); cor_min

## [1] 0.2115231

cor_max <- max(apply(cor1_mat, 2, max, na.rm = T)); cor_max

## [1] 0.7954706

Correlations between two items varied from 0.2115231 to 0.7954706.

Factorability of the data (need KMO > 0.6)

X <- b1
# Kaiser-Meyer-Olkin (KMO) measure
KMO(r = cor1)

## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = cor1)
## Overall MSA =  0.97
## MSA for each item = 
##   Q1   Q2   Q3   Q4   Q5   Q6   Q7   Q8   Q9  Q10  Q11  Q12  Q13  Q14  Q15  Q16 
## 0.97 0.98 0.98 0.97 0.98 0.98 0.98 0.98 0.98 0.98 0.98 0.98 0.98 0.98 0.97 0.97 
##  Q17  Q18  Q19  Q20 Q100 Q101 Q102 Q103 Q104 Q105 Q106 Q107 Q122 Q145 Q146 Q147 
## 0.97 0.98 0.97 0.97 0.97 0.98 0.97 0.97 0.98 0.97 0.96 0.96 0.98 0.97 0.98 0.96 
## Q148 Q149 Q150 Q151 Q152 Q161 Q162 Q163 Q164 Q165 Q166 Q167 Q168 Q169 Q179 Q180 
## 0.96 0.98 0.97 0.97 0.97 0.98 0.97 0.97 0.97 0.97 0.97 0.96 0.97 0.96 0.96 0.97 
## Q181 Q182 Q183 Q184 Q185 Q186 Q187 Q198 Q199 Q200 Q201 Q202 Q203 Q204 Q205 Q206 
## 0.98 0.97 0.96 0.97 0.98 0.97 0.97 0.97 0.97 0.98 0.98 0.98 0.98 0.97 0.98 0.97

Do Bartlett’s Test of Sphericity only if participants per item is low. Usually between 3:1 and 5:1. Small p-values (< 0.05) indicate that a factor analysis may be appropriate.

cortest.bartlett(cor1, n = nrow(X))

## $chisq
## [1] 20004.16
## 
## $p.value
## [1] 0
## 
## $df
## [1] 2016

A positive determinant means the factor analysis will probably run.

det(cor1)

## [1] 1.798511e-27

Number of factors to extract

#library(ggplot2)
fafit <- fa(cor1, nfactors = 20, n.obs = nrow(X), rotate = "oblimin", fm = "pa")

## Loading required namespace: GPArotation

## Warning in GPFoblq(L, Tmat = Tmat, normalize = normalize, eps = eps, maxit =
## maxit, : convergence not obtained in GPFoblq. 1000 iterations used.

# n_factors <- length(fafit$e.values)
n_factors <- 10
scree <- data.frame(
  Factor_n =  as.factor(1:n_factors), 
  Eigenvalue = fafit$e.values[1:n_factors])
ggplot(scree, aes(x = Factor_n, y = Eigenvalue, group = 1)) + 
  geom_point() + geom_line() +
  xlab("Number of factors") +
  ylab("Initial eigenvalue") +
  labs( title = "Scree Plot", 
        subtitle = "(Based on the unreduced correlation matrix)")

scree

##    Factor_n Eigenvalue
## 1         1 32.5230691
## 2         2  2.9010950
## 3         3  2.1184491
## 4         4  1.8335764
## 5         5  1.2721213
## 6         6  1.2508326
## 7         7  1.1160389
## 8         8  1.0281000
## 9         9  0.9734206
## 10       10  0.8653245

Eigenvalues less than 1 reflect potentially unstable factors. No more than 8 factors.

Parallel analysis

parallel <- fa.parallel(cor1, n.obs = nrow(X), fa = "fa", fm = "pa")

## Parallel analysis suggests that the number of factors =  6  and the number of components =  NA

1 factor

Number of factors set to 1. After an initial elimination of items with loadings less than 0.32, we further eliminated items aiming to keep at least 2 items from each love language. This led to the following solution.

nfactors <- 1
remove <- c("Q1", "Q2", "Q3", "Q4", "Q6", "Q8", "Q9", "Q11", "Q12", "Q15", "Q16", "Q17", "Q18", "Q20",  "Q100", "Q102", "Q103", "Q105", "Q106", "Q107", "Q122", "Q145", "Q146", "Q147", "Q148", "Q150", "Q151", "Q152", "Q161", "Q163", "Q166", "Q167", "Q169", "Q179", "Q180", "Q182", "Q183", "Q184", "Q185", "Q187", "Q198", "Q199", "Q202", "Q206") 
#remove <- NA
cols <- setdiff(names(X), remove)
cor2 <- cor(X[,cols])
fa1 <- fa(r = cor2, n.obs = nrow(X[,cols]), nfactors = nfactors,
 fm = "pa", max.iter = 100, rotate =  "oblimin") 
print(fa1$loadings, cutoff = 0.319)

## 
## Loadings:
##      PA1  
## Q5   0.729
## Q7   0.770
## Q10  0.823
## Q13  0.781
## Q14  0.733
## Q19  0.706
## Q101 0.766
## Q104 0.768
## Q149 0.748
## Q162 0.727
## Q164 0.796
## Q165 0.788
## Q168 0.771
## Q181 0.773
## Q186 0.807
## Q200 0.813
## Q201 0.758
## Q203 0.793
## Q204 0.740
## Q205 0.771
## 
##                   PA1
## SS loadings    11.816
## Proportion Var  0.591

fa1$RMSEA

##      RMSEA      lower      upper confidence 
## 0.09037319 0.08326465 0.09794797 0.90000000

fa1$TLI

## [1] 0.9000017

fa1$Vaccounted

##                       PA1
## SS loadings    11.8160665
## Proportion Var  0.5908033

# Factor
factors1 <- efa_to_cfa(fa1)
aux2 <- strsplit(factors1, " ")[[1]]
aux3 <- aux2[grep("Q", aux2)]
F1 <- aux3[1:length(aux3)]

# Alpha
alpha_F1 <- psych::alpha(X[,F1]); alpha_F1$total[1]

##  raw_alpha
##  0.9656432

2 factors

Number of factors set to 2. After an initial elimination of items with loadings less than 0.32 and/or with cross loadings, approximate simple structure was achieved with 41 items in the first factor and 9 items in the second one. To further reduce the number of items, we excluded items with lower factor loadings, lower communality, and higher complexity. This led to 15 items in F1 and 5 in F2. Of the 15 items in F1, 5 were words of affirmation, 4 were acts of service, 3 were quality time, and 3 were gifts. This led to the following 2-factor solution :

nfactors <- 2
remove <- c("Q1", "Q2", "Q4", "Q5", "Q6", "Q7", "Q8", "Q9", "Q11", "Q14", "Q16", "Q17", "Q18", "Q19", "Q20", "Q103", "Q104", "Q105", "Q106", "Q122", "Q146", "Q147", "Q148", "Q150", "Q151", "Q152", "Q161", "Q162", "Q164", "Q166", "Q167", "Q168", "Q181", "Q182", "Q183", "Q185", "Q186", "Q187", "Q198", "Q199", "Q202", "Q203", "Q205", "Q206") 
#remove <- NA
cols <- setdiff(names(X), remove)
cor2 <- cor(X[,cols])
fa1 <- fa(r = cor2, n.obs = nrow(X[,cols]), nfactors = nfactors,
 fm = "pa", max.iter = 100, rotate =  "oblimin") 
print(fa1$loadings, cutoff = 0.319)

## 
## Loadings:
##      PA1    PA2   
## Q3    0.820       
## Q10   0.783       
## Q12   0.770       
## Q13   0.847       
## Q15   0.844       
## Q100  0.785       
## Q101  0.717       
## Q102  0.788       
## Q107         0.618
## Q145         0.836
## Q149  0.778       
## Q163  0.657       
## Q165  0.627       
## Q169         0.756
## Q179         0.792
## Q180         0.926
## Q184  0.690       
## Q200  0.730       
## Q201  0.726       
## Q204  0.694       
## 
##                  PA1   PA2
## SS loadings    8.526 3.231
## Proportion Var 0.426 0.162
## Cumulative Var 0.426 0.588

fa1$RMSEA

##      RMSEA      lower      upper confidence 
## 0.07682237 0.06903262 0.08505370 0.90000000

fa1$TLI

## [1] 0.9215219

fa1$Vaccounted

##                             PA1       PA2
## SS loadings           8.6930667 3.3984906
## Proportion Var        0.4346533 0.1699245
## Cumulative Var        0.4346533 0.6045779
## Proportion Explained  0.7189369 0.2810631
## Cumulative Proportion 0.7189369 1.0000000

# Factors
factors2 <- efa_to_cfa(fa1)
aux2 <- strsplit(factors2, " ")[[1]]
aux3 <- aux2[grep("Q", aux2)]
w <- grep("PA", aux3)
F1 <- aux3[1:w]
F2 <- aux3[(w+1):length(aux3)]
F1[length(F1)] <- substr(F1[length(F1)], 1, 4)

# Alphas
alpha_F1 <- psych::alpha(X[,F1]); alpha_F1$total[1]

##  raw_alpha
##   0.953899

alpha_F2 <- psych::alpha(X[,F2]); alpha_F2$total[1]

##  raw_alpha
##  0.8986201

# Correlation between factors
cor(apply(X[,F1], 1, mean), apply(X[,F2], 1, mean))

## [1] 0.6195951

3 factors

When the number of factors was set to 3, the initial loadings suggested a 2-factor model. Specifically, there were no loadings above 0.32, only two cross-loadings below 0.4. However, working on the 4-factor solution led to three factors after an initial elimination of items with loadings less than 0.32 and/or with cross loadings. We then worked on a 3-factor solution starting with the 49 items from this 4-factor solution. We further decreased the number of items with lower loadings, cross-loadings, low communality, and conceptually inconsistent. Approximate simple structure was achieved with 11 items in the first factor, 11 items in the second one, and 10 items in the third one. To further reduce the number of items, we excluded items with lower factor loadings, lower communality, and higher complexity. This led to 7 items in F1 and 6 in F2 and 7 items in F3. Of the 7 items in F1, 5 were words of affirmation and 2 were quality time. Of the 7 items in F3, 5 were acts of service and 2 were gifts. This led to the following 3-factor solution:

nfactors <- 3
remove <- c("Q1", "Q5", "Q6", "Q107", "Q7", "Q9", "Q10", "Q11", "Q12", "Q13", "Q14", "Q15", "Q16", "Q17", "Q19", "Q20", "Q102", "Q103", "Q104", "Q106", "Q122", "Q146", "Q147", "Q148", "Q150", "Q151", "Q152", "Q161", "Q162", "Q165", "Q166", "Q167", "Q168", "Q181", "Q182", "Q183", "Q185", "Q187", "Q201", "Q202", "Q203", "Q204", "Q205", "Q206")
# remove <- NA
cols <- setdiff(names(X), remove)
cor2 <- cor(X[,cols])
fa1 <- fa(r = cor2, n.obs = nrow(X[,cols]), nfactors = nfactors,
 fm = "pa", max.iter = 100, rotate =  "oblimin") 
print(fa1$loadings, cutoff = 0.319)

## 
## Loadings:
##      PA1    PA2    PA3   
## Q2                  0.846
## Q3                  0.627
## Q4                  0.787
## Q8           0.724       
## Q18                 0.477
## Q100                0.770
## Q101                0.488
## Q105         0.565       
## Q145         0.852       
## Q149                0.650
## Q163  0.568              
## Q164  0.473              
## Q169         0.755       
## Q179         0.767       
## Q180         0.919       
## Q184  0.623              
## Q186  0.747              
## Q198  0.788              
## Q199  0.798              
## Q200  0.948              
## 
##                  PA1   PA2   PA3
## SS loadings    3.828 3.787 3.340
## Proportion Var 0.191 0.189 0.167
## Cumulative Var 0.191 0.381 0.548

fa1$RMSEA

##      RMSEA      lower      upper confidence 
## 0.07101185 0.06254932 0.07990382 0.90000000

fa1$TLI

## [1] 0.9326798

fa1$Vaccounted

##                             PA1       PA2       PA3
## SS loadings           4.4887954 4.2424761 3.9301884
## Proportion Var        0.2244398 0.2121238 0.1965094
## Cumulative Var        0.2244398 0.4365636 0.6330730
## Proportion Explained  0.3545243 0.3350701 0.3104056
## Cumulative Proportion 0.3545243 0.6895944 1.0000000

# Factors
factors3 <- efa_to_cfa(fa1)
aux2 <- strsplit(factors3, " ")[[1]]
aux3 <- aux2[grep("Q", aux2)]
w <- grep("PA", aux3)
F1 <- aux3[1:w[1]]
F2 <- aux3[(w[1]+1):w[2]]
F3 <- aux3[(w[2]+1):length(aux3)]
F1[length(F1)] <- substr(F1[length(F1)], 1, 4)
F2[length(F2)] <- substr(F2[length(F2)], 1, 4)

# Alphas
alpha_F1 <- psych::alpha(X[,F1]); alpha_F1$total[1]

##  raw_alpha
##  0.9226617

alpha_F2 <- psych::alpha(X[,F2]); alpha_F2$total[1]

##  raw_alpha
##  0.9147622

alpha_F3 <- psych::alpha(X[,F3]); alpha_F3$total[1]

##  raw_alpha
##   0.897103

# Correlation between factors
F1m <- apply(X[,F1], 1, mean)
F2m <- apply(X[,F2], 1, mean)
F3m <- apply(X[,F3], 1, mean)
factors <- data.frame(F1m, F2m, F3m)
cor(factors)

##           F1m      F2m       F3m
## F1m 1.0000000 0.672156 0.7543078
## F2m 0.6721560 1.000000 0.5895530
## F3m 0.7543078 0.589553 1.0000000

4 factors

The initial work on a 4-factor solution suggested a 3-factor model. However, working on the 5-factor solution led to four factors after an initial elimination of items with loadings less than 0.32 and/or with cross loadings (the fifth factor had only 3 items and has highly correlated with one of the four factors). We then worked on a 4-factor solution starting with the 33 items from this 5-factor solution. Approximate simple structure was achieved with 10 items in the first factor, 9 items in the second one, 8 items in the third one, and 6 items in the fourth one. To further reduce the number of items, we excluded items with lower factor loadings, lower communality, and higher complexity. This led to 6 items in F1, 4 in F2, 4 items in F3, and 4 items in F4. Of the 6 items in F1, 4 were words of affirmation and 2 were quality time. The other factors mapped to the remaining love languages. This led to the following 4-factor solution:

nfactors <- 4
remove <- c("Q1", "Q5", "Q6", "Q7", "Q8", "Q9", "Q10", "Q11", "Q12", "Q13", "Q14", "Q15", "Q16", "Q101", "Q102", "Q103", "Q104", "Q105", "Q106", "Q122", "Q146", "Q147", "Q148", "Q150", "Q151", "Q152", "Q161", "Q162", "Q165", "Q166", "Q167", "Q168", "Q181", "Q183", "Q185", "Q187", "Q199", "Q201", "Q203", "Q204", "Q205", "Q206", "Q17", "Q3", "Q107", "Q182")  
#remove <- NA
cols <- setdiff(names(X), remove)
cor2 <- cor(X[,cols])
fa1 <- fa(r = cor2, n.obs = nrow(X[,cols]), nfactors = nfactors,
 fm = "pa", max.iter = 100, rotate =  "oblimin") 
print(fa1$loadings, cutoff = 0.319)

## 
## Loadings:
##      PA1    PA2    PA4    PA3   
## Q2                  0.781       
## Q4                  0.685       
## Q18                        0.555
## Q19                        0.750
## Q20                        0.788
## Q100                0.707       
## Q145         0.772              
## Q149                0.617       
## Q163  0.637                     
## Q164  0.506                     
## Q169         0.758              
## Q179         0.735              
## Q180         0.937              
## Q184  0.580                     
## Q186  0.728                     
## Q198  0.619                     
## Q200  0.920                     
## Q202                       0.593
## 
##                  PA1   PA2   PA4   PA3
## SS loadings    2.904 2.722 2.146 1.934
## Proportion Var 0.161 0.151 0.119 0.107
## Cumulative Var 0.161 0.313 0.432 0.539

fa1$RMSEA

##      RMSEA      lower      upper confidence 
## 0.04837669 0.03628481 0.06044600 0.90000000

fa1$TLI

## [1] 0.971023

fa1$Vaccounted

##                             PA1       PA2       PA4       PA3
## SS loadings           3.5786543 3.1462183 2.7001419 2.5238021
## Proportion Var        0.1988141 0.1747899 0.1500079 0.1402112
## Cumulative Var        0.1988141 0.3736040 0.5236119 0.6638231
## Proportion Explained  0.2994986 0.2633079 0.2259757 0.2112177
## Cumulative Proportion 0.2994986 0.5628066 0.7887823 1.0000000

# Factors
factors4_2 <- efa_to_cfa(fa1)
aux2 <- strsplit(factors4_2, " ")[[1]]
aux3 <- aux2[grep("Q", aux2)]
w <- grep("PA", aux3)
F1 <- aux3[1:w[1]]
F2 <- aux3[(w[1]+1):w[2]]
F3 <- aux3[(w[2]+1):w[3]]
F4 <- aux3[(w[3]+1):length(aux3)]
F1[length(F1)] <- substr(F1[length(F1)], 1, 4)
F2[length(F2)] <- substr(F2[length(F2)], 1, 4)
F3[length(F3)] <- substr(F3[length(F3)], 1, 4)

# Alphas
alpha_F1 <- psych::alpha(X[,F1]); alpha_F1$total[1]

##  raw_alpha
##   0.907973

alpha_F2 <- psych::alpha(X[,F2]); alpha_F2$total[1]

##  raw_alpha
##  0.9073911

alpha_F3 <- psych::alpha(X[,F3]); alpha_F3$total[1]

##  raw_alpha
##  0.8737413

alpha_F4 <- psych::alpha(X[,F4]); alpha_F4$total[1]

##  raw_alpha
##  0.8686325

# Correlation between factors
F1m <- apply(X[,F1], 1, mean)
F2m <- apply(X[,F2], 1, mean)
F3m <- apply(X[,F3], 1, mean)
F4m <- apply(X[,F4], 1, mean)
factors <- data.frame(F1m, F2m, F3m, F4m)
round(cor(factors),2)

##      F1m  F2m  F3m  F4m
## F1m 1.00 0.64 0.70 0.69
## F2m 0.64 1.00 0.48 0.64
## F3m 0.70 0.48 1.00 0.68
## F4m 0.69 0.64 0.68 1.00

5 factors (25 items)

The initial work on a 5-factor solution suggested a 4-factor model. However, working on the 6-factor solution led to five factors after an initial elimination of items with loadings less than 0.32 and/or with cross loadings. To further reduce the number of items, we excluded items with lower factor loadings, lower communality, and higher complexity. This led to two solutions, one with 25 items (5 in each factor) and one with 20 items (4 in each factor).

nfactors <- 5
remove <- c("Q5", "Q6", "Q7", "Q8", "Q9", "Q10", "Q11", "Q12", "Q13", "Q101", "Q102", "Q103", "Q104", "Q105", "Q122", "Q146", "Q147", "Q148", "Q150", "Q151", "Q152",  "Q161", "Q162",  "Q163",  "Q165", "Q166", "Q167", "Q181", "Q183", "Q201", "Q203", "Q204", "Q205", "Q206", "Q182", "Q185",  "Q184", "Q168", "Q182", "Q185", "Q3") # solution 1 from 6 factors
#remove <- NA
cols <- setdiff(names(X), remove)
cor2 <- cor(X[,cols])
fa1 <- fa(r = cor2, n.obs = nrow(X[,cols]), nfactors = nfactors,
 fm = "pa", max.iter = 100, rotate =  "oblimin") 
print(fa1$loadings, cutoff = 0.319)

## 
## Loadings:
##      PA1    PA2    PA4    PA3    PA5   
## Q1                                0.407
## Q2                                0.753
## Q4                                0.685
## Q14                 0.780              
## Q15                 0.636              
## Q16                 0.649              
## Q17                        0.554       
## Q18                        0.550       
## Q19                        0.711       
## Q20                        0.692       
## Q100                              0.665
## Q106                0.694              
## Q107         0.586                     
## Q145         0.805                     
## Q149                              0.573
## Q164                0.415              
## Q169         0.778                     
## Q179         0.685                     
## Q180         0.847                     
## Q186  0.752                            
## Q187  0.692                            
## Q198  0.675                            
## Q199  0.676                            
## Q200  0.742                            
## Q202                       0.620       
## 
##                  PA1   PA2   PA4   PA3   PA5
## SS loadings    2.843 2.919 2.251 2.139 2.166
## Proportion Var 0.114 0.117 0.090 0.086 0.087
## Cumulative Var 0.114 0.230 0.321 0.406 0.493

fa1$RMSEA

##      RMSEA      lower      upper confidence 
## 0.05696234 0.04931259 0.06491407 0.90000000

fa1$TLI

## [1] 0.9451039

fa1$Vaccounted

##                             PA1       PA2       PA4       PA3       PA5
## SS loadings           3.7237489 3.5215641 3.1251538 3.0237646 2.9108697
## Proportion Var        0.1489500 0.1408626 0.1250062 0.1209506 0.1164348
## Cumulative Var        0.1489500 0.2898125 0.4148187 0.5357693 0.6522040
## Proportion Explained  0.2283794 0.2159793 0.1916672 0.1854490 0.1785251
## Cumulative Proportion 0.2283794 0.4443587 0.6360259 0.8214749 1.0000000

# Factors
factors5_1 <- efa_to_cfa(fa1)
aux2 <- strsplit(factors5_1, " ")[[1]]
aux3 <- aux2[grep("Q", aux2)]
w <- grep("PA", aux3)
F1 <- aux3[1:w[1]]
F2 <- aux3[(w[1]+1):w[2]]
F3 <- aux3[(w[2]+1):w[3]]
F4 <- aux3[(w[3]+1):w[4]]
F5 <- aux3[(w[4]+1):length(aux3)]
F1[length(F1)] <- substr(F1[length(F1)], 1, 4)
F2[length(F2)] <- substr(F2[length(F2)], 1, 4)
F3[length(F3)] <- substr(F3[length(F3)], 1, 4)
F4[length(F4)] <- substr(F4[length(F4)], 1, 4)

# Alphas
alpha_F1 <- psych::alpha(X[,F1]); alpha_F1$total[1]

##  raw_alpha
##  0.9240188

alpha_F2 <- psych::alpha(X[,F2]); alpha_F2$total[1]

##  raw_alpha
##  0.8986201

alpha_F3 <- psych::alpha(X[,F3]); alpha_F3$total[1]

##  raw_alpha
##  0.8920414

alpha_F4 <- psych::alpha(X[,F4]); alpha_F4$total[1]

##  raw_alpha
##  0.8802723

alpha_F5 <- psych::alpha(X[,F5]); alpha_F5$total[1]

##  raw_alpha
##  0.8718099

# Correlation between factors
F1m <- apply(X[,F1], 1, mean)
F2m <- apply(X[,F2], 1, mean)
F3m <- apply(X[,F3], 1, mean)
F4m <- apply(X[,F4], 1, mean)
F5m <- apply(X[,F5], 1, mean)
factors <- data.frame(F1m, F2m, F3m, F4m, F5m)
round(cor(factors), 2)

##      F1m  F2m  F3m  F4m  F5m
## F1m 1.00 0.62 0.74 0.68 0.66
## F2m 0.62 1.00 0.64 0.64 0.50
## F3m 0.74 0.64 1.00 0.69 0.69
## F4m 0.68 0.64 0.69 1.00 0.71
## F5m 0.66 0.50 0.69 0.71 1.00

nfactors <- 5
remove <- c("Q1", "Q3", "Q5", "Q6", "Q7", "Q8", "Q9", "Q10", "Q11", "Q12", "Q13", "Q101", "Q102", "Q103", "Q104", "Q105", "Q107", "Q122", "Q146", "Q148", "Q150", "Q151", "Q152",  "Q161", "Q162", "Q163",   "Q165", "Q166", "Q167", "Q181", "Q182", "Q183", "Q185", "Q199", "Q201", "Q203", "Q204", "Q205", "Q206", "Q168", "Q147", "Q17", "Q164", "Q187") #  solution 2 from 6 factors
#remove <- NA
cols <- setdiff(names(X), remove)
cor2 <- cor(X[,cols])
fa1 <- fa(r = cor2, n.obs = nrow(X[,cols]), nfactors = nfactors,
 fm = "pa", max.iter = 100, rotate =  "oblimin") 
print(fa1$loadings, cutoff = 0.319)

## 
## Loadings:
##      PA2    PA5    PA1    PA4    PA3   
## Q2           0.798                     
## Q4           0.708                     
## Q14                        0.721       
## Q15                        0.613       
## Q16                        0.648       
## Q18                               0.471
## Q19                               0.691
## Q20                               0.699
## Q100         0.685                     
## Q106                       0.753       
## Q145  0.787                            
## Q149         0.590                     
## Q169  0.737                            
## Q179  0.739                            
## Q180  0.903                            
## Q184                0.551              
## Q186                0.747              
## Q198                0.578              
## Q200                0.760              
## Q202                              0.552
## 
##                  PA2   PA5   PA1   PA4   PA3
## SS loadings    2.640 2.163 1.953 1.989 1.615
## Proportion Var 0.132 0.108 0.098 0.099 0.081
## Cumulative Var 0.132 0.240 0.338 0.437 0.518

fa1$RMSEA

##      RMSEA      lower      upper confidence 
## 0.04765279 0.03631801 0.05897779 0.90000000

fa1$TLI

## [1] 0.9688624

fa1$Vaccounted

##                             PA2       PA5       PA1       PA4       PA3
## SS loadings           3.1128909 2.8084939 2.6650009 2.6299627 2.3070079
## Proportion Var        0.1556445 0.1404247 0.1332500 0.1314981 0.1153504
## Cumulative Var        0.1556445 0.2960692 0.4293193 0.5608174 0.6761678
## Proportion Explained  0.2301863 0.2076773 0.1970665 0.1944756 0.1705943
## Cumulative Proportion 0.2301863 0.4378635 0.6349301 0.8294057 1.0000000

# Factors
factors5_2 <- efa_to_cfa(fa1)
aux2 <- strsplit(factors5_2, " ")[[1]]
aux3 <- aux2[grep("Q", aux2)]
w <- grep("PA", aux3)
F1 <- aux3[1:w[1]]
F2 <- aux3[(w[1]+1):w[2]]
F3 <- aux3[(w[2]+1):w[3]]
F4 <- aux3[(w[3]+1):w[4]]
F5 <- aux3[(w[4]+1):length(aux3)]
F1[length(F1)] <- substr(F1[length(F1)], 1, 4)
F2[length(F2)] <- substr(F2[length(F2)], 1, 4)
F3[length(F3)] <- substr(F3[length(F3)], 1, 4)
F4[length(F4)] <- substr(F4[length(F4)], 1, 4)

# Alphas
alpha_F1 <- psych::alpha(X[,F1]); alpha_F1$total[1]

##  raw_alpha
##  0.9073911

alpha_F2 <- psych::alpha(X[,F2]); alpha_F2$total[1]

##  raw_alpha
##  0.8737413

alpha_F3 <- psych::alpha(X[,F3]); alpha_F3$total[1]

##  raw_alpha
##  0.8870889

alpha_F4 <- psych::alpha(X[,F4]); alpha_F4$total[1]

##  raw_alpha
##  0.8765027

alpha_F5 <- psych::alpha(X[,F5]); alpha_F5$total[1]

##  raw_alpha
##  0.8686325

# Correlation between factors
F1m <- apply(X[,F1], 1, mean)
F2m <- apply(X[,F2], 1, mean)
F3m <- apply(X[,F3], 1, mean)
F4m <- apply(X[,F4], 1, mean)
F5m <- apply(X[,F5], 1, mean)
factors <- data.frame(F1m, F2m, F3m, F4m, F5m)
round(cor(factors), 2)

##      F1m  F2m  F3m  F4m  F5m
## F1m 1.00 0.48 0.60 0.60 0.64
## F2m 0.48 1.00 0.68 0.65 0.68
## F3m 0.60 0.68 1.00 0.70 0.67
## F4m 0.60 0.65 0.70 1.00 0.66
## F5m 0.64 0.68 0.67 0.66 1.00

CFA

The six models from the EFA will be tested with a CFA.

Checking multivariate normality on the CFA dataset