knitr::opts_chunk$set(
echo = TRUE,
fig.align = "center",
fig.width = 7,
fig.height = 5
)
#Packages einlesen
library(readxl)
## Warning: Paket 'readxl' wurde unter R Version 4.4.3 erstellt
library(dplyr)
## Warning: Paket 'dplyr' wurde unter R Version 4.4.3 erstellt
##
## Attache Paket: 'dplyr'
## Die folgenden Objekte sind maskiert von 'package:stats':
##
## filter, lag
## Die folgenden Objekte sind maskiert von 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
## Warning: Paket 'tidyr' wurde unter R Version 4.4.3 erstellt
library(psych)
## Warning: Paket 'psych' wurde unter R Version 4.4.3 erstellt
library(ggplot2)
## Warning: Paket 'ggplot2' wurde unter R Version 4.4.3 erstellt
##
## Attache Paket: 'ggplot2'
## Die folgenden Objekte sind maskiert von 'package:psych':
##
## %+%, alpha
library(car)
## Warning: Paket 'car' wurde unter R Version 4.4.3 erstellt
## Lade nötiges Paket: carData
## Warning: Paket 'carData' wurde unter R Version 4.4.3 erstellt
##
## Attache Paket: 'car'
## Das folgende Objekt ist maskiert 'package:psych':
##
## logit
## Das folgende Objekt ist maskiert 'package:dplyr':
##
## recode
library(GPArotation)
## Warning: Paket 'GPArotation' wurde unter R Version 4.4.3 erstellt
##
## Attache Paket: 'GPArotation'
## Die folgenden Objekte sind maskiert von 'package:psych':
##
## equamax, varimin
library(knitr)
## Warning: Paket 'knitr' wurde unter R Version 4.4.3 erstellt
options(scipen = 999) # ohne wissenschaftliche Notation (e^)
bzw. der Dateipfad der Source-Dateien
data_agentnarc <- readxl::read_excel("data_agentnarc.xlsx")
codebook <- readxl::read_excel("codebook_agentnarc.xlsx")
dim(data_agentnarc)
## [1] 375 243
names(data_agentnarc)[1:20]
## [1] "session" "session_id" "study_id"
## [4] "iteration" "created" "modified"
## [7] "ended" "expired" "datenschutz"
## [10] "ident_code" "demo_gender" "demo_age"
## [13] "demo_education_self" "demo_work" "demo_position"
## [16] "ffni_arr_1" "ffni_arr_2" "ffni_arr_4"
## [19] "ffni_as_1" "ffni_as_2"
Kurzer Check eines Beispiel-Items
summary(data_agentnarc$agent_3B_1)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.000 2.000 3.000 2.927 4.000 6.000 4
table(data_agentnarc$agent_3B_1, useNA = "ifany")
##
## 1 2 3 4 5 6 <NA>
## 63 79 96 95 31 7 4
Auswahl der Items der Gruppe 3B
group3B_names <- codebook |>
dplyr::filter(grepl("^agent_3B_", Itemname)) |>
dplyr::pull(Itemname)
group3B_names
## [1] "agent_3B_1" "agent_3B_2" "agent_3B_3" "agent_3B_4" "agent_3B_5"
group3B <- data_agentnarc |>
dplyr::select(all_of(group3B_names))
head(group3B)
## # A tibble: 6 × 5
## agent_3B_1 agent_3B_2 agent_3B_3 agent_3B_4 agent_3B_5
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 3 3 2 2 5
## 2 2 4 2 1 1
## 3 3 2 1 1 5
## 4 3 2 2 3 6
## 5 2 2 1 1 4
## 6 1 4 1 1 4
colSums(is.na(group3B)) # Missings pro Item
## agent_3B_1 agent_3B_2 agent_3B_3 agent_3B_4 agent_3B_5
## 4 4 4 4 5
Deskriptive Statistiken
desc <- psych::describe(group3B)
desc_round <- round(desc, 2)
kable(desc_round, caption = "Deskriptive Statistiken für die Items der Gruppe 3B")
Deskriptive Statistiken für die Items der Gruppe 3B
| agent_3B_1 |
1 |
371 |
2.93 |
1.29 |
3 |
2.89 |
1.48 |
1 |
6 |
5 |
0.10 |
-0.78 |
0.07 |
| agent_3B_2 |
2 |
371 |
2.99 |
1.18 |
3 |
3.00 |
1.48 |
1 |
6 |
5 |
-0.06 |
-0.69 |
0.06 |
| agent_3B_3 |
3 |
371 |
1.58 |
0.87 |
1 |
1.41 |
0.00 |
1 |
5 |
4 |
1.48 |
1.52 |
0.05 |
| agent_3B_4 |
4 |
371 |
2.10 |
1.11 |
2 |
1.98 |
1.48 |
1 |
5 |
4 |
0.68 |
-0.60 |
0.06 |
| agent_3B_5 |
5 |
370 |
4.14 |
1.19 |
4 |
4.20 |
1.48 |
1 |
6 |
5 |
-0.51 |
0.13 |
0.06 |
#Korrelationsplot
cor_mat <- cor(group3B, use = "pairwise.complete.obs")
cor.plot(cor_mat, main = "Korrelationsplot – Gruppe 3B")

#Bartlett-Test und KMO
bart <- cortest.bartlett(group3B, n = nrow(group3B))
## R was not square, finding R from data
bart
## $chisq
## [1] 358.672
##
## $p.value
## [1] 0.00000000000000000000000000000000000000000000000000000000000000000000005748671
##
## $df
## [1] 10
kmo <- KMO(group3B)
kmo
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = group3B)
## Overall MSA = 0.76
## MSA for each item =
## agent_3B_1 agent_3B_2 agent_3B_3 agent_3B_4 agent_3B_5
## 0.76 0.76 0.77 0.74 0.81
Parallelanalyse
fa_parallel1 <- fa.parallel(
group3B, fm = "ml", fa = "pc",
n.iter = 2000, SMC = FALSE, sim = TRUE, quant = 0.95, plot = TRUE
)
## Parallel analysis suggests that the number of factors = NA and the number of components = 1
abline(h = 1)

fa_parallel1
## Call: fa.parallel(x = group3B, fm = "ml", fa = "pc", n.iter = 2000,
## SMC = FALSE, sim = TRUE, quant = 0.95, plot = TRUE)
## Parallel analysis suggests that the number of factors = NA and the number of components = 1
##
## Eigen Values of
##
## eigen values of factors
## [1] 1.79 0.12 0.03 -0.06 -0.09
##
## eigen values of simulated factors
## [1] NA
##
## eigen values of components
## [1] 2.36 0.96 0.71 0.51 0.47
##
## eigen values of simulated components
## [1] 1.15 1.06 1.00 0.94 0.86
fa_parallel2 <- fa.parallel(
group3B, fm = "ml", fa = "fa",
n.iter = 2000, SMC = TRUE, sim = FALSE, quant = 0.95, plot = TRUE
)

## Parallel analysis suggests that the number of factors = 1 and the number of components = NA
fa_parallel2
## Call: fa.parallel(x = group3B, fm = "ml", fa = "fa", n.iter = 2000,
## SMC = TRUE, sim = FALSE, quant = 0.95, plot = TRUE)
## Parallel analysis suggests that the number of factors = 1 and the number of components = NA
##
## Eigen Values of
##
## eigen values of factors
## [1] 1.68 0.02 0.01 -0.18 -0.19
##
## eigen values of simulated factors
## [1] NA
##
## eigen values of components
## [1] 2.36 0.96 0.71 0.51 0.47
##
## eigen values of simulated components
## [1] NA
fa_parallel1$pc.values
## [1] 2.3567459 0.9555301 0.7129496 0.5064141 0.4683604
which(fa_parallel1$pc.values > 1)
## [1] 1
Zusammenfassung Kriterien zur Faktoranazahl
pa1 <- 1 # Parallel-Analyse PC
pa2 <- 2 # Parallel-Analyse FA
ew <- 1 # Eigenwert > 1
scree <- 2 # Screeplot
map <- 1 # MAP-Kriterium (z. B. aus VSS)
kriterien <- data.frame(
Kriterium = c("Parallelanalyse (PC)", "Parallelanalyse (FA)",
"Eigenwert > 1", "Screeplot", "MAP"),
empfohlene_Faktoren = c(pa1, pa2, ew, scree, map)
)
kable(kriterien, caption = "Übersicht: Anzahl Faktoren nach verschiedenen Kriterien")
Übersicht: Anzahl Faktoren nach verschiedenen
Kriterien
| Parallelanalyse (PC) |
1 |
| Parallelanalyse (FA) |
2 |
| Eigenwert > 1 |
1 |
| Screeplot |
2 |
| MAP |
1 |
Hilfsfunktion für Item-Itemname-matching
matchItems <- function(efa.result,
codebook,
shortitem = "Namen",
longitem = "Items",
digits = 3,
cut = NULL) {
fs <- psych::fa.sort(efa.result)
# Loadings als Dataframe
loadings <- as.data.frame(unclass(fs$loadings))
colnames(loadings) <- attr(fs$loadings, "dimnames")[[2]]
# Codebook-Spalten als Vektoren
short_vec <- codebook[[shortitem]]
long_vec <- codebook[[longitem]]
# Zuordnung Itemnamen <-> Loadings-Zeilen
matches <- match(rownames(loadings), short_vec)
if (any(is.na(matches))) {
warning("Einige Loadings-Zeilen konnten im Codebook nicht gefunden werden.")
}
Item <- long_vec[matches]
resultEFA <- data.frame(Item, loadings, check.names = FALSE)
# Runden
is.num <- sapply(resultEFA, is.numeric)
resultEFA[is.num] <- lapply(resultEFA[is.num], round, digits)
# Kleinere Ladungen auf NA setzen
if (!is.null(cut)) {
num_mat <- as.matrix(resultEFA[is.num])
num_mat[abs(num_mat) <= cut] <- NA
resultEFA[is.num] <- as.data.frame(num_mat)
}
return(resultEFA)
}
Faktorananalys 1- Faktor Lösung
efa1.1 <- fa(group3B, nfactors = 1, fm = "pa", rotate = "promax")
efa1.1
## Factor Analysis using method = pa
## Call: fa(r = group3B, nfactors = 1, rotate = "promax", fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PA1 h2 u2 com
## agent_3B_1 0.66 0.434 0.57 1
## agent_3B_2 0.66 0.433 0.57 1
## agent_3B_3 0.58 0.339 0.66 1
## agent_3B_4 0.74 0.549 0.45 1
## agent_3B_5 -0.20 0.038 0.96 1
##
## PA1
## SS loadings 1.79
## Proportion Var 0.36
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## df null model = 10 with the objective function = 0.97 with Chi Square = 358.67
## df of the model are 5 and the objective function was 0.04
##
## The root mean square of the residuals (RMSR) is 0.04
## The df corrected root mean square of the residuals is 0.05
##
## The harmonic n.obs is 371 with the empirical chi square 10.11 with prob < 0.072
## The total n.obs was 375 with Likelihood Chi Square = 14.07 with prob < 0.015
##
## Tucker Lewis Index of factoring reliability = 0.948
## RMSEA index = 0.069 and the 90 % confidence intervals are 0.028 0.114
## BIC = -15.57
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy
## PA1
## Correlation of (regression) scores with factors 0.88
## Multiple R square of scores with factors 0.77
## Minimum correlation of possible factor scores 0.53
Spaltennamen via Codebook anpassen
efa1.1_items <- matchItems(
efa1.1,
codebook,
shortitem = "Itemname",
longitem = "Itemtext",
digits = 3,
cut = NULL
)
2-Faktor Testung
efa1.2 <- fa(group3B, nfactors = 2, fm = "pa", rotate = "promax")
## maximum iteration exceeded
efa1.2
## Factor Analysis using method = pa
## Call: fa(r = group3B, nfactors = 2, rotate = "promax", fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PA1 PA2 h2 u2 com
## agent_3B_1 -0.12 0.91 0.678 0.32 1.0
## agent_3B_2 0.28 0.40 0.405 0.59 1.8
## agent_3B_3 0.64 -0.02 0.386 0.61 1.0
## agent_3B_4 0.78 0.02 0.631 0.37 1.0
## agent_3B_5 -0.05 -0.15 0.039 0.96 1.2
##
## PA1 PA2
## SS loadings 1.12 1.02
## Proportion Var 0.22 0.20
## Cumulative Var 0.22 0.43
## Proportion Explained 0.52 0.48
## Cumulative Proportion 0.52 1.00
##
## With factor correlations of
## PA1 PA2
## PA1 1.00 0.75
## PA2 0.75 1.00
##
## Mean item complexity = 1.2
## Test of the hypothesis that 2 factors are sufficient.
##
## df null model = 10 with the objective function = 0.97 with Chi Square = 358.67
## df of the model are 1 and the objective function was 0
##
## The root mean square of the residuals (RMSR) is 0.01
## The df corrected root mean square of the residuals is 0.04
##
## The harmonic n.obs is 371 with the empirical chi square 1.3 with prob < 0.25
## The total n.obs was 375 with Likelihood Chi Square = 1.31 with prob < 0.25
##
## Tucker Lewis Index of factoring reliability = 0.991
## RMSEA index = 0.029 and the 90 % confidence intervals are 0 0.144
## BIC = -4.62
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## PA1 PA2
## Correlation of (regression) scores with factors 0.87 0.88
## Multiple R square of scores with factors 0.76 0.77
## Minimum correlation of possible factor scores 0.51 0.55
efa1.2_items <- matchItems(
efa1.2,
codebook,
shortitem = "Itemname",
longitem = "Itemtext",
digits = 3,
cut = 0.30
)
efa1.2$Phi
## PA1 PA2
## PA1 1.0000000 0.7548069
## PA2 0.7548069 1.0000000
round(efa1.2$Phi, 2)
## PA1 PA2
## PA1 1.00 0.75
## PA2 0.75 1.00
# Tabelle für den Balkenplot
descr_table <- as.data.frame(desc_round)
descr_table$Item <- rownames(descr_table)
names(descr_table)[names(descr_table) == "mean"] <- "Schwierigkeit"
# Items für die Histogramme
my_items <- group3B
# Gruppen-Label
gruppe <- "3B"
# Balkendiagramm der Itemschwierigkeiten
ggplot(descr_table, aes(x = Item, y = Schwierigkeit)) +
geom_col() +
coord_cartesian(ylim = c(1, 6)) + # bei Likert 1–6
labs(
x = "Item",
y = "Mittelwert (Schwierigkeit)",
title = paste0("Itemschwierigkeiten Gruppe ", gruppe)
) +
theme_minimal()

Histogramm
my_items_long <- my_items %>%
pivot_longer(cols = everything(), names_to = "Item", values_to = "Antwort")
ggplot(my_items_long, aes(x = Antwort)) +
geom_histogram(binwidth = 1, boundary = 0.5, closed = "right") +
facet_wrap(~ Item) +
scale_x_continuous(breaks = 1:6) +
labs(
x = "Antwortkategorie",
y = "Häufigkeit",
title = paste0("Antwortverteilungen für Items der Gruppe ", gruppe)
) +
theme_minimal()
## Warning: Removed 21 rows containing non-finite outside the scale range
## (`stat_bin()`).
