knitr::opts_chunk$set(
  echo = TRUE,
  fig.align = "center",
  fig.width = 7,
  fig.height = 5
)

#Packages einlesen

library(readxl)
## Warning: Paket 'readxl' wurde unter R Version 4.4.3 erstellt
library(dplyr)
## Warning: Paket 'dplyr' wurde unter R Version 4.4.3 erstellt
## 
## Attache Paket: 'dplyr'
## Die folgenden Objekte sind maskiert von 'package:stats':
## 
##     filter, lag
## Die folgenden Objekte sind maskiert von 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
## Warning: Paket 'tidyr' wurde unter R Version 4.4.3 erstellt
library(psych)
## Warning: Paket 'psych' wurde unter R Version 4.4.3 erstellt
library(ggplot2)
## Warning: Paket 'ggplot2' wurde unter R Version 4.4.3 erstellt
## 
## Attache Paket: 'ggplot2'
## Die folgenden Objekte sind maskiert von 'package:psych':
## 
##     %+%, alpha
library(car)
## Warning: Paket 'car' wurde unter R Version 4.4.3 erstellt
## Lade nötiges Paket: carData
## Warning: Paket 'carData' wurde unter R Version 4.4.3 erstellt
## 
## Attache Paket: 'car'
## Das folgende Objekt ist maskiert 'package:psych':
## 
##     logit
## Das folgende Objekt ist maskiert 'package:dplyr':
## 
##     recode
library(GPArotation)
## Warning: Paket 'GPArotation' wurde unter R Version 4.4.3 erstellt
## 
## Attache Paket: 'GPArotation'
## Die folgenden Objekte sind maskiert von 'package:psych':
## 
##     equamax, varimin
library(knitr)
## Warning: Paket 'knitr' wurde unter R Version 4.4.3 erstellt
options(scipen = 999) # ohne wissenschaftliche Notation (e^)

bzw. der Dateipfad der Source-Dateien

data_agentnarc     <- readxl::read_excel("data_agentnarc.xlsx")
codebook <- readxl::read_excel("codebook_agentnarc.xlsx")
dim(data_agentnarc)
## [1] 375 243
names(data_agentnarc)[1:20]
##  [1] "session"             "session_id"          "study_id"           
##  [4] "iteration"           "created"             "modified"           
##  [7] "ended"               "expired"             "datenschutz"        
## [10] "ident_code"          "demo_gender"         "demo_age"           
## [13] "demo_education_self" "demo_work"           "demo_position"      
## [16] "ffni_arr_1"          "ffni_arr_2"          "ffni_arr_4"         
## [19] "ffni_as_1"           "ffni_as_2"

Kurzer Check eines Beispiel-Items

summary(data_agentnarc$agent_3B_1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   1.000   2.000   3.000   2.927   4.000   6.000       4
table(data_agentnarc$agent_3B_1, useNA = "ifany")
## 
##    1    2    3    4    5    6 <NA> 
##   63   79   96   95   31    7    4

Auswahl der Items der Gruppe 3B

group3B_names <- codebook |>
dplyr::filter(grepl("^agent_3B_", Itemname)) |>
dplyr::pull(Itemname)
group3B_names
## [1] "agent_3B_1" "agent_3B_2" "agent_3B_3" "agent_3B_4" "agent_3B_5"
group3B <- data_agentnarc |>
dplyr::select(all_of(group3B_names))
head(group3B)
## # A tibble: 6 × 5
##   agent_3B_1 agent_3B_2 agent_3B_3 agent_3B_4 agent_3B_5
##        <dbl>      <dbl>      <dbl>      <dbl>      <dbl>
## 1          3          3          2          2          5
## 2          2          4          2          1          1
## 3          3          2          1          1          5
## 4          3          2          2          3          6
## 5          2          2          1          1          4
## 6          1          4          1          1          4
colSums(is.na(group3B)) # Missings pro Item
## agent_3B_1 agent_3B_2 agent_3B_3 agent_3B_4 agent_3B_5 
##          4          4          4          4          5

Deskriptive Statistiken

desc <- psych::describe(group3B)
desc_round <- round(desc, 2)
kable(desc_round, caption = "Deskriptive Statistiken für die Items der Gruppe 3B")
Deskriptive Statistiken für die Items der Gruppe 3B
vars n mean sd median trimmed mad min max range skew kurtosis se
agent_3B_1 1 371 2.93 1.29 3 2.89 1.48 1 6 5 0.10 -0.78 0.07
agent_3B_2 2 371 2.99 1.18 3 3.00 1.48 1 6 5 -0.06 -0.69 0.06
agent_3B_3 3 371 1.58 0.87 1 1.41 0.00 1 5 4 1.48 1.52 0.05
agent_3B_4 4 371 2.10 1.11 2 1.98 1.48 1 5 4 0.68 -0.60 0.06
agent_3B_5 5 370 4.14 1.19 4 4.20 1.48 1 6 5 -0.51 0.13 0.06
#Korrelationsplot
cor_mat <- cor(group3B, use = "pairwise.complete.obs")
cor.plot(cor_mat, main = "Korrelationsplot – Gruppe 3B")

#Bartlett-Test und KMO
bart <- cortest.bartlett(group3B, n = nrow(group3B))
## R was not square, finding R from data
bart
## $chisq
## [1] 358.672
## 
## $p.value
## [1] 0.00000000000000000000000000000000000000000000000000000000000000000000005748671
## 
## $df
## [1] 10
kmo <- KMO(group3B)
kmo
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = group3B)
## Overall MSA =  0.76
## MSA for each item = 
## agent_3B_1 agent_3B_2 agent_3B_3 agent_3B_4 agent_3B_5 
##       0.76       0.76       0.77       0.74       0.81

Parallelanalyse

fa_parallel1 <- fa.parallel(
group3B, fm = "ml", fa = "pc",
n.iter = 2000, SMC = FALSE, sim = TRUE, quant = 0.95, plot = TRUE
)
## Parallel analysis suggests that the number of factors =  NA  and the number of components =  1
abline(h = 1)

fa_parallel1
## Call: fa.parallel(x = group3B, fm = "ml", fa = "pc", n.iter = 2000, 
##     SMC = FALSE, sim = TRUE, quant = 0.95, plot = TRUE)
## Parallel analysis suggests that the number of factors =  NA  and the number of components =  1 
## 
##  Eigen Values of 
## 
##  eigen values of factors
## [1]  1.79  0.12  0.03 -0.06 -0.09
## 
##  eigen values of simulated factors
## [1] NA
## 
##  eigen values of components 
## [1] 2.36 0.96 0.71 0.51 0.47
## 
##  eigen values of simulated components
## [1] 1.15 1.06 1.00 0.94 0.86
fa_parallel2 <- fa.parallel(
group3B, fm = "ml", fa = "fa",
n.iter = 2000, SMC = TRUE, sim = FALSE, quant = 0.95, plot = TRUE
)

## Parallel analysis suggests that the number of factors =  1  and the number of components =  NA
fa_parallel2
## Call: fa.parallel(x = group3B, fm = "ml", fa = "fa", n.iter = 2000, 
##     SMC = TRUE, sim = FALSE, quant = 0.95, plot = TRUE)
## Parallel analysis suggests that the number of factors =  1  and the number of components =  NA 
## 
##  Eigen Values of 
## 
##  eigen values of factors
## [1]  1.68  0.02  0.01 -0.18 -0.19
## 
##  eigen values of simulated factors
## [1] NA
## 
##  eigen values of components 
## [1] 2.36 0.96 0.71 0.51 0.47
## 
##  eigen values of simulated components
## [1] NA
fa_parallel1$pc.values
## [1] 2.3567459 0.9555301 0.7129496 0.5064141 0.4683604
which(fa_parallel1$pc.values > 1)
## [1] 1

Zusammenfassung Kriterien zur Faktoranazahl

pa1 <- 1 # Parallel-Analyse PC
pa2 <- 2 # Parallel-Analyse FA
ew <- 1 # Eigenwert > 1
scree <- 2 # Screeplot
map <- 1 # MAP-Kriterium (z. B. aus VSS)
kriterien <- data.frame(
Kriterium = c("Parallelanalyse (PC)", "Parallelanalyse (FA)",
"Eigenwert > 1", "Screeplot", "MAP"),
empfohlene_Faktoren = c(pa1, pa2, ew, scree, map)
)
kable(kriterien, caption = "Übersicht: Anzahl Faktoren nach verschiedenen Kriterien")
Übersicht: Anzahl Faktoren nach verschiedenen Kriterien
Kriterium empfohlene_Faktoren
Parallelanalyse (PC) 1
Parallelanalyse (FA) 2
Eigenwert > 1 1
Screeplot 2
MAP 1

Hilfsfunktion für Item-Itemname-matching

matchItems <- function(efa.result,
                       codebook,
                       shortitem = "Namen",
                       longitem  = "Items",
                       digits = 3,
                       cut = NULL) {
  fs <- psych::fa.sort(efa.result) 
  
  # Loadings als Dataframe
  loadings <- as.data.frame(unclass(fs$loadings))
  colnames(loadings) <- attr(fs$loadings, "dimnames")[[2]]
  
  # Codebook-Spalten als Vektoren
  short_vec <- codebook[[shortitem]]
  long_vec  <- codebook[[longitem]]
  
  # Zuordnung Itemnamen <-> Loadings-Zeilen
  matches <- match(rownames(loadings), short_vec)
  
  if (any(is.na(matches))) {
    warning("Einige Loadings-Zeilen konnten im Codebook nicht gefunden werden.")
  }
  
  Item <- long_vec[matches]
  resultEFA <- data.frame(Item, loadings, check.names = FALSE)
  
  # Runden
  is.num <- sapply(resultEFA, is.numeric)
  resultEFA[is.num] <- lapply(resultEFA[is.num], round, digits)
  
  # Kleinere Ladungen auf NA setzen
  if (!is.null(cut)) {
    num_mat <- as.matrix(resultEFA[is.num])
    num_mat[abs(num_mat) <= cut] <- NA
    resultEFA[is.num] <- as.data.frame(num_mat)
  }
  
  return(resultEFA)
}

Faktorananalys 1- Faktor Lösung

efa1.1 <- fa(group3B, nfactors = 1, fm = "pa", rotate = "promax")
efa1.1
## Factor Analysis using method =  pa
## Call: fa(r = group3B, nfactors = 1, rotate = "promax", fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
##              PA1    h2   u2 com
## agent_3B_1  0.66 0.434 0.57   1
## agent_3B_2  0.66 0.433 0.57   1
## agent_3B_3  0.58 0.339 0.66   1
## agent_3B_4  0.74 0.549 0.45   1
## agent_3B_5 -0.20 0.038 0.96   1
## 
##                 PA1
## SS loadings    1.79
## Proportion Var 0.36
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## df null model =  10  with the objective function =  0.97 with Chi Square =  358.67
## df of  the model are 5  and the objective function was  0.04 
## 
## The root mean square of the residuals (RMSR) is  0.04 
## The df corrected root mean square of the residuals is  0.05 
## 
## The harmonic n.obs is  371 with the empirical chi square  10.11  with prob <  0.072 
## The total n.obs was  375  with Likelihood Chi Square =  14.07  with prob <  0.015 
## 
## Tucker Lewis Index of factoring reliability =  0.948
## RMSEA index =  0.069  and the 90 % confidence intervals are  0.028 0.114
## BIC =  -15.57
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy             
##                                                    PA1
## Correlation of (regression) scores with factors   0.88
## Multiple R square of scores with factors          0.77
## Minimum correlation of possible factor scores     0.53

Spaltennamen via Codebook anpassen

efa1.1_items <- matchItems(
efa1.1,
codebook,
shortitem = "Itemname",
longitem = "Itemtext",
digits = 3,
cut = NULL
)

2-Faktor Testung

efa1.2 <- fa(group3B, nfactors = 2, fm = "pa", rotate = "promax")
## maximum iteration exceeded
efa1.2
## Factor Analysis using method =  pa
## Call: fa(r = group3B, nfactors = 2, rotate = "promax", fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
##              PA1   PA2    h2   u2 com
## agent_3B_1 -0.12  0.91 0.678 0.32 1.0
## agent_3B_2  0.28  0.40 0.405 0.59 1.8
## agent_3B_3  0.64 -0.02 0.386 0.61 1.0
## agent_3B_4  0.78  0.02 0.631 0.37 1.0
## agent_3B_5 -0.05 -0.15 0.039 0.96 1.2
## 
##                        PA1  PA2
## SS loadings           1.12 1.02
## Proportion Var        0.22 0.20
## Cumulative Var        0.22 0.43
## Proportion Explained  0.52 0.48
## Cumulative Proportion 0.52 1.00
## 
##  With factor correlations of 
##      PA1  PA2
## PA1 1.00 0.75
## PA2 0.75 1.00
## 
## Mean item complexity =  1.2
## Test of the hypothesis that 2 factors are sufficient.
## 
## df null model =  10  with the objective function =  0.97 with Chi Square =  358.67
## df of  the model are 1  and the objective function was  0 
## 
## The root mean square of the residuals (RMSR) is  0.01 
## The df corrected root mean square of the residuals is  0.04 
## 
## The harmonic n.obs is  371 with the empirical chi square  1.3  with prob <  0.25 
## The total n.obs was  375  with Likelihood Chi Square =  1.31  with prob <  0.25 
## 
## Tucker Lewis Index of factoring reliability =  0.991
## RMSEA index =  0.029  and the 90 % confidence intervals are  0 0.144
## BIC =  -4.62
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                    PA1  PA2
## Correlation of (regression) scores with factors   0.87 0.88
## Multiple R square of scores with factors          0.76 0.77
## Minimum correlation of possible factor scores     0.51 0.55
efa1.2_items <- matchItems(
efa1.2,
codebook,
shortitem = "Itemname",
longitem = "Itemtext",
digits = 3,
cut = 0.30
)
efa1.2$Phi
##           PA1       PA2
## PA1 1.0000000 0.7548069
## PA2 0.7548069 1.0000000
round(efa1.2$Phi, 2)
##      PA1  PA2
## PA1 1.00 0.75
## PA2 0.75 1.00
# Tabelle für den Balkenplot
descr_table <- as.data.frame(desc_round)
descr_table$Item <- rownames(descr_table)
names(descr_table)[names(descr_table) == "mean"] <- "Schwierigkeit"

# Items für die Histogramme
my_items <- group3B

# Gruppen-Label
gruppe <- "3B"

# Balkendiagramm der Itemschwierigkeiten
ggplot(descr_table, aes(x = Item, y = Schwierigkeit)) +
  geom_col() +
  coord_cartesian(ylim = c(1, 6)) +  # bei Likert 1–6
  labs(
    x = "Item",
    y = "Mittelwert (Schwierigkeit)",
    title = paste0("Itemschwierigkeiten Gruppe ", gruppe)
  ) +
  theme_minimal()

Histogramm

my_items_long <- my_items %>%
  pivot_longer(cols = everything(), names_to = "Item", values_to = "Antwort")

ggplot(my_items_long, aes(x = Antwort)) +
  geom_histogram(binwidth = 1, boundary = 0.5, closed = "right") +
  facet_wrap(~ Item) +
  scale_x_continuous(breaks = 1:6) +
  labs(
    x = "Antwortkategorie",
    y = "Häufigkeit",
    title = paste0("Antwortverteilungen für Items der Gruppe ", gruppe)
  ) +
  theme_minimal()
## Warning: Removed 21 rows containing non-finite outside the scale range
## (`stat_bin()`).