Inter-Item Correlation Matrix
cor_mat <- cor(df[, factor_order], use = "pairwise.complete.obs")
cor_long <- melt(cor_mat)
cor_long$Var1 <- factor(cor_long$Var1, levels = factor_order)
cor_long$Var2 <- factor(cor_long$Var2, levels = factor_order)
p_cor <- ggplot(cor_long, aes(x = Var1, y = Var2, fill = value)) +
geom_tile(color = "white", linewidth = 0.2) +
scale_fill_gradient2(low = "#2166ac", mid = "white", high = "#d6604d",
midpoint = 0, limits = c(-1, 1), name = "r") +
theme_bw(base_size = 9) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 7),
axis.text.y = element_text(size = 7),
axis.title = element_blank(),
panel.grid = element_blank()) +
ggtitle("IPIP-50 inter-item correlation matrix\nItems ordered by dimension; R = reverse-scored (already recoded)")
print(p_cor)

Exploratory Factor Analysis (5 Factors)
set.seed(42)
efa <- fa(df, nfactors = 5, rotate = "oblimin", fm = "ml", scores = "regression")
print(efa$loadings, cutoff = 0.30, sort = TRUE)
##
## Loadings:
## ML4 ML1 ML3 ML2 ML5
## E1 0.690
## E2R 0.697
## E3 0.640
## E4R 0.717
## E5 0.732
## E6R 0.546
## E7 0.749
## E8R 0.582
## E9 0.623
## E10R 0.662
## N1 0.671
## N3 0.592
## N5 0.537
## N6 0.746
## N7 0.734
## N8 0.760
## N9 0.732
## N10 0.576
## A4 0.808
## A5R 0.651
## A6 0.615
## A7R 0.582
## A8 0.571
## A9 0.713
## C1 0.599
## C2R 0.543
## C4R 0.530
## C5 0.633
## C6R 0.586
## C7 0.555
## C9 0.641
## O1 0.583
## O2R 0.536
## O3 0.538
## O5 0.608
## O6R 0.501
## O8 0.552
## O10 0.680
## N2R 0.496
## N4R
## A1R 0.432
## A2 0.496
## A3R 0.404
## A10 0.335
## C3 0.395
## C8R 0.453
## C10 0.466
## O4R 0.461
## O7 0.493
## O9 0.347
##
## ML4 ML1 ML3 ML2 ML5
## SS loadings 4.911 4.411 3.600 3.172 3.164
## Proportion Var 0.098 0.088 0.072 0.063 0.063
## Cumulative Var 0.098 0.186 0.258 0.322 0.385
loadings_mat <- as.data.frame(unclass(efa$loadings))
colnames(loadings_mat) <- paste0("F", 1:5)
loadings_mat$item <- rownames(loadings_mat)
loadings_mat$expected <- item_labels[loadings_mat$item]
load_long <- melt(loadings_mat, id.vars = c("item", "expected"),
variable.name = "factor", value.name = "loading")
load_long$item <- factor(load_long$item, levels = factor_order)
p_load <- ggplot(load_long, aes(x = factor, y = item, fill = loading)) +
geom_tile(color = "white", linewidth = 0.3) +
geom_text(aes(label = ifelse(abs(loading) >= 0.30, round(loading, 2), "")),
size = 2.5) +
scale_fill_gradient2(low = "#2166ac", mid = "white", high = "#d6604d",
midpoint = 0, limits = c(-1, 1), name = "loading") +
facet_grid(expected ~ ., scales = "free_y", space = "free_y") +
theme_bw(base_size = 9) +
theme(strip.text.y = element_text(angle = 0, size = 8),
axis.text.y = element_text(size = 7),
axis.title = element_blank(),
panel.grid = element_blank()) +
ggtitle("EFA factor loadings (oblimin rotation, 5 factors)\nValues >= |0.30| shown; R = reverse-scored item")
print(p_load)

Parallel Analysis
fa.parallel(df, fm="ml", fa="fa",
main="Parallel analysis scree plot (BFI-44)")
## Parallel analysis suggests that the number of factors = 10 and the number of components = NA