Itemanalyse

# Itemanalyse ----

library(psych)

setwd("~/Documents/R Sachen")

# Daten einlesen
dat <- read.csv("Perfektionismus.csv")

# numerische Umwandlung
dat$age <- as.numeric(dat$age)

# Umkodierung 

dat$P105_08_r <- 6 - dat$P105_08

# Item den Facetten/Subskalen zuordnen

dat_EN <- data.frame(dat[c("P105_01", "P105_02", "P105_03", "P105_04", "P105_05", "P105_06", "P105_07", "P105_08_r","P105_09", "P105_10", "P105_11", "P105_12", "P105_13", "P105_14", "P105_15", "P105_16", "P105_17", "P105_18", "P105_19", "P105_20")])
dat_Anspruch <- data.frame(dat[c("P105_01", "P105_02", "P105_03", "P105_04", "P105_05")])
dat_Eltern <- data.frame(dat[c("P105_06", "P105_07", "P105_08_r", "P105_09", "P105_10")])
dat_Sorge <- data.frame(dat[c("P105_11", "P105_12", "P105_13", "P105_14", "P105_15")])
dat_Zweifel <- data.frame(dat[c("P105_16", "P105_17", "P105_18", "P105_19", "P105_20")])

# Itemkennwerte
# inkl. M, SD, Min, Max, Range, Schiefe, Kurtosis
# damit auch Schwierigkeit, Varianz

describe(dat_EN)
# 5.4. Homogenität der Items (bestimmt über Interkorrelationen der Items)

matrix <- cor(dat_Anspruch, use = "pairwise.complete.obs")
round(matrix, 2)
##         P105_01 P105_02 P105_03 P105_04 P105_05
## P105_01    1.00    0.61    0.23    0.37    0.34
## P105_02    0.61    1.00    0.35    0.49    0.39
## P105_03    0.23    0.35    1.00    0.05    0.07
## P105_04    0.37    0.49    0.05    1.00    0.46
## P105_05    0.34    0.39    0.07    0.46    1.00
matrix <- cor(dat_Anspruch, use = "pairwise.complete.obs")
round(matrix, 2)
##         P105_01 P105_02 P105_03 P105_04 P105_05
## P105_01    1.00    0.61    0.23    0.37    0.34
## P105_02    0.61    1.00    0.35    0.49    0.39
## P105_03    0.23    0.35    1.00    0.05    0.07
## P105_04    0.37    0.49    0.05    1.00    0.46
## P105_05    0.34    0.39    0.07    0.46    1.00
matrix <- cor(dat_Eltern, use = "pairwise.complete.obs")
round(matrix, 2)
##           P105_06 P105_07 P105_08_r P105_09 P105_10
## P105_06      1.00    0.67      0.07    0.58    0.40
## P105_07      0.67    1.00      0.07    0.55    0.60
## P105_08_r    0.07    0.07      1.00    0.14    0.22
## P105_09      0.58    0.55      0.14    1.00    0.55
## P105_10      0.40    0.60      0.22    0.55    1.00
matrix <- cor(dat_Eltern, use = "pairwise.complete.obs")
round(matrix, 2)
##           P105_06 P105_07 P105_08_r P105_09 P105_10
## P105_06      1.00    0.67      0.07    0.58    0.40
## P105_07      0.67    1.00      0.07    0.55    0.60
## P105_08_r    0.07    0.07      1.00    0.14    0.22
## P105_09      0.58    0.55      0.14    1.00    0.55
## P105_10      0.40    0.60      0.22    0.55    1.00
# P105_08 - "Ich werde den Leistungsansprüchen meiner Erziehungsberechtigten gerecht" rausschmeißen, da nicht homogen

matrix <- cor(dat_Sorge, use = "pairwise.complete.obs")
round(matrix, 2)
##         P105_11 P105_12 P105_13 P105_14 P105_15
## P105_11    1.00    0.46    0.57    0.53    0.18
## P105_12    0.46    1.00    0.69    0.36    0.39
## P105_13    0.57    0.69    1.00    0.56    0.46
## P105_14    0.53    0.36    0.56    1.00    0.38
## P105_15    0.18    0.39    0.46    0.38    1.00
matrix <- cor(dat_Sorge, use = "pairwise.complete.obs")
round(matrix, 2)
##         P105_11 P105_12 P105_13 P105_14 P105_15
## P105_11    1.00    0.46    0.57    0.53    0.18
## P105_12    0.46    1.00    0.69    0.36    0.39
## P105_13    0.57    0.69    1.00    0.56    0.46
## P105_14    0.53    0.36    0.56    1.00    0.38
## P105_15    0.18    0.39    0.46    0.38    1.00
matrix <- cor(dat_Zweifel, use = "pairwise.complete.obs")
round(matrix, 2)
##         P105_16 P105_17 P105_18 P105_19 P105_20
## P105_16    1.00    0.53    0.25    0.27    0.39
## P105_17    0.53    1.00    0.33    0.32    0.58
## P105_18    0.25    0.33    1.00    0.26    0.40
## P105_19    0.27    0.32    0.26    1.00    0.35
## P105_20    0.39    0.58    0.40    0.35    1.00
matrix <- cor(dat_Zweifel, use = "pairwise.complete.obs")
round(matrix, 2)
##         P105_16 P105_17 P105_18 P105_19 P105_20
## P105_16    1.00    0.53    0.25    0.27    0.39
## P105_17    0.53    1.00    0.33    0.32    0.58
## P105_18    0.25    0.33    1.00    0.26    0.40
## P105_19    0.27    0.32    0.26    1.00    0.35
## P105_20    0.39    0.58    0.40    0.35    1.00
# Reliabilitätsanalyse der Subskalen inkl. Trennschärfe 

psych::alpha(dat_Anspruch) # P105_03 rausschmeißen, da raw_alpha mit 0.72 und ohne 0.76
## 
## Reliability analysis   
## Call: psych::alpha(x = dat_Anspruch)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
##       0.72      0.72    0.71      0.34 2.5 0.051  3.2 0.82     0.36
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.60  0.72  0.81
## Duhachek  0.62  0.72  0.82
## 
##  Reliability if an item is dropped:
##         raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## P105_01      0.63      0.63    0.62      0.30 1.7    0.069 0.0382  0.37
## P105_02      0.57      0.57    0.54      0.25 1.3    0.081 0.0278  0.28
## P105_03      0.76      0.76    0.72      0.44 3.2    0.045 0.0097  0.42
## P105_04      0.67      0.66    0.64      0.33 2.0    0.062 0.0320  0.34
## P105_05      0.68      0.68    0.67      0.35 2.2    0.060 0.0387  0.36
## 
##  Item statistics 
##          n raw.r std.r r.cor r.drop mean  sd
## P105_01 74  0.75  0.74  0.67   0.56  3.0 1.2
## P105_02 74  0.83  0.83  0.82   0.70  3.6 1.2
## P105_03 74  0.49  0.50  0.29   0.23  2.9 1.2
## P105_04 74  0.70  0.69  0.59   0.48  2.7 1.3
## P105_05 74  0.64  0.66  0.52   0.44  3.9 1.1
## 
## Non missing response frequency for each item
##            1    2    3    4    5 miss
## P105_01 0.11 0.30 0.14 0.36 0.09    0
## P105_02 0.08 0.14 0.14 0.43 0.22    0
## P105_03 0.15 0.23 0.23 0.34 0.05    0
## P105_04 0.23 0.27 0.14 0.30 0.07    0
## P105_05 0.07 0.05 0.07 0.55 0.26    0
psych::alpha(dat_Eltern)
## 
## Reliability analysis   
## Call: psych::alpha(x = dat_Eltern)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
##       0.75      0.76    0.77      0.38 3.1 0.045  2.2 0.93     0.47
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.65  0.75  0.83
## Duhachek  0.67  0.75  0.84
## 
##  Reliability if an item is dropped:
##           raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## P105_06        0.68      0.69    0.68      0.36 2.2    0.061 0.0575  0.38
## P105_07        0.65      0.66    0.64      0.33 1.9    0.066 0.0464  0.31
## P105_08_r      0.83      0.84    0.81      0.56 5.1    0.031 0.0084  0.57
## P105_09        0.66      0.67    0.69      0.34 2.0    0.065 0.0691  0.31
## P105_10        0.68      0.68    0.68      0.35 2.1    0.060 0.0802  0.35
## 
##  Item statistics 
##            n raw.r std.r r.cor r.drop mean  sd
## P105_06   74  0.77  0.76  0.72   0.60  2.0 1.3
## P105_07   74  0.81  0.81  0.79   0.67  2.1 1.3
## P105_08_r 74  0.42  0.42  0.18   0.15  2.7 1.3
## P105_09   74  0.80  0.79  0.73   0.64  2.4 1.4
## P105_10   74  0.76  0.78  0.71   0.62  1.6 1.1
## 
## Non missing response frequency for each item
##              1    2    3    4    5 miss
## P105_06   0.50 0.24 0.05 0.12 0.08    0
## P105_07   0.45 0.30 0.04 0.15 0.07    0
## P105_08_r 0.19 0.36 0.20 0.08 0.16    0
## P105_09   0.39 0.22 0.11 0.20 0.08    0
## P105_10   0.72 0.08 0.08 0.09 0.03    0
psych::alpha(dat_Sorge) # P105_15 rausschmeißen, da raw-alpha mit 0.8 und ohne 0.82
## 
## Reliability analysis   
## Call: psych::alpha(x = dat_Sorge)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean sd median_r
##        0.8      0.81     0.8      0.46 4.2 0.037  2.9  1     0.46
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.72   0.8  0.87
## Duhachek  0.73   0.8  0.88
## 
##  Reliability if an item is dropped:
##         raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## P105_11      0.78      0.78    0.76      0.47 3.6    0.043 0.016  0.42
## P105_12      0.76      0.76    0.74      0.45 3.2    0.047 0.021  0.49
## P105_13      0.71      0.72    0.69      0.39 2.5    0.056 0.014  0.39
## P105_14      0.77      0.77    0.75      0.46 3.4    0.046 0.029  0.46
## P105_15      0.82      0.82    0.79      0.53 4.5    0.035 0.012  0.54
## 
##  Item statistics 
##          n raw.r std.r r.cor r.drop mean  sd
## P105_11 74  0.73  0.73  0.64   0.55  2.6 1.4
## P105_12 74  0.77  0.77  0.71   0.62  2.9 1.4
## P105_13 74  0.86  0.87  0.86   0.77  2.9 1.3
## P105_14 74  0.75  0.75  0.67   0.60  3.3 1.4
## P105_15 74  0.65  0.64  0.50   0.44  3.0 1.5
## 
## Non missing response frequency for each item
##            1    2    3    4    5 miss
## P105_11 0.32 0.20 0.11 0.26 0.11    0
## P105_12 0.20 0.27 0.11 0.31 0.11    0
## P105_13 0.20 0.20 0.19 0.30 0.11    0
## P105_14 0.11 0.24 0.12 0.30 0.23    0
## P105_15 0.23 0.16 0.19 0.18 0.24    0
psych::alpha(dat_Zweifel)
## 
## Reliability analysis   
## Call: psych::alpha(x = dat_Zweifel)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
##       0.75      0.74    0.72      0.37 2.9 0.045  2.8 0.94     0.34
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.64  0.75  0.83
## Duhachek  0.66  0.75  0.84
## 
##  Reliability if an item is dropped:
##         raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## P105_16      0.71      0.70    0.66      0.37 2.4    0.055 0.0123  0.34
## P105_17      0.65      0.65    0.59      0.32 1.9    0.066 0.0046  0.31
## P105_18      0.74      0.73    0.69      0.41 2.7    0.048 0.0154  0.37
## P105_19      0.74      0.74    0.70      0.41 2.8    0.049 0.0154  0.39
## P105_20      0.67      0.66    0.61      0.33 1.9    0.062 0.0114  0.29
## 
##  Item statistics 
##          n raw.r std.r r.cor r.drop mean  sd
## P105_16 74  0.71  0.69  0.59   0.50  3.1 1.4
## P105_17 74  0.81  0.79  0.75   0.64  2.7 1.5
## P105_18 74  0.63  0.64  0.48   0.41  3.1 1.3
## P105_19 74  0.60  0.62  0.46   0.40  2.6 1.2
## P105_20 74  0.77  0.77  0.71   0.61  2.7 1.3
## 
## Non missing response frequency for each item
##            1    2    3    4    5 miss
## P105_16 0.16 0.23 0.09 0.34 0.18    0
## P105_17 0.31 0.20 0.15 0.18 0.16    0
## P105_18 0.12 0.24 0.22 0.24 0.18    0
## P105_19 0.18 0.42 0.12 0.24 0.04    0
## P105_20 0.22 0.28 0.16 0.24 0.09    0

Faktorenanalyse

#### Faktorenanalyse ----


# Working Directory setzen und Daten einlesen
setwd("~/Documents/R Sachen")

dat <- read.csv("Perfektionismus.csv") 

# notwendige Packages laden (und ggf. installieren)
library(psych)
library(GPArotation) #install.packages("GPArotation")


#### 1. Auswahl der relevanten Variablen für die Faktorenanalyse ####
# (nach Itemrevision/-selektion muss die Auswahl ggf. angepasst werden!) 

dat_pca <- data.frame(dat[c("P105_01", "P105_02", "P105_04", "P105_05", "P105_06", "P105_07",
                            "P105_09", "P105_10", "P105_11", "P105_12", "P105_13", "P105_14", "P105_16", "P105_17",
                            "P105_18", "P105_19", "P105_20")])
#ungeeignete Items wurden entfernt


#### 2. Voraussetzungen prüfen ####

# Korrelationen der Variablen in einer Heat Map
cor.plot(cor(dat_pca, use = "pairwise.complete.obs"))

#hier ist es irrelevant ob die Korrelationen pos. oder neg. sind, es geht um ihre Höhe 

# Bartlett Test, sollte in jedem Fall signifikant sein (p < 0.05)
cortest.bartlett(dat_pca, n = nrow(dat_pca))
## R was not square, finding R from data
## $chisq
## [1] 640.3596
## 
## $p.value
## [1] 2.231687e-66
## 
## $df
## [1] 136
# KMO (Kaiser-Meyer-Olkin-Kriterium), sollte min. .50 sein. Je höher, desto besser.
kmo <- KMO(dat_pca)
kmo #overall MSA (Measure of Sampling Adequacy)-Wert entspricht dem KMO
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = dat_pca)
## Overall MSA =  0.82
## MSA for each item = 
## P105_01 P105_02 P105_04 P105_05 P105_06 P105_07 P105_09 P105_10 P105_11 P105_12 
##    0.72    0.55    0.80    0.87    0.77    0.74    0.92    0.73    0.84    0.86 
## P105_13 P105_14 P105_16 P105_17 P105_18 P105_19 P105_20 
##    0.89    0.88    0.85    0.82    0.76    0.87    0.86
# ist irgendeiner der MSAi Werte kleiner als .50?
any(kmo$MSAi < 0.50)
## [1] FALSE
#### 3. Faktorenextraktion ####

# Wir starten mit einem Faktorenmodell, dass so viele Hauptkomponenten wie Items umfasst 
# Rotation spielt an dieser Stelle noch KEINE Rolle
pca_full <- principal(dat_pca, nfactors = 17, rotate = "none") #16, weil wir 16 Items in die Analyse geben
pca_full # inspiziere Kommunalität (h^2) und Eigenwerteverlauf (SS loadings)
## Principal Components Analysis
## Call: principal(r = dat_pca, nfactors = 17, rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
##          PC1   PC2   PC3   PC4   PC5   PC6   PC7   PC8   PC9  PC10  PC11  PC12
## P105_01 0.34  0.65  0.26 -0.01  0.28 -0.42  0.07 -0.11  0.04  0.18 -0.12  0.16
## P105_02 0.31  0.79  0.33 -0.05  0.09  0.13 -0.13  0.18  0.00  0.04 -0.06 -0.01
## P105_04 0.55  0.47  0.03  0.25 -0.37  0.00 -0.16 -0.02  0.44 -0.02  0.08 -0.15
## P105_05 0.67  0.29 -0.12  0.34  0.13 -0.02 -0.27  0.07 -0.30 -0.26 -0.01 -0.10
## P105_06 0.51 -0.31  0.58 -0.31  0.00  0.26 -0.05 -0.11  0.12 -0.02 -0.05  0.18
## P105_07 0.60 -0.26  0.58  0.20  0.08  0.20 -0.12 -0.06  0.09 -0.14  0.07  0.03
## P105_09 0.68 -0.39  0.28 -0.18 -0.04 -0.10  0.02  0.06 -0.05  0.14 -0.32 -0.35
## P105_10 0.51 -0.39  0.43  0.30  0.12 -0.38  0.15  0.10 -0.11  0.05  0.24 -0.06
## P105_11 0.72 -0.27 -0.25 -0.16 -0.15  0.04 -0.35  0.23 -0.05  0.06  0.07  0.07
## P105_12 0.74  0.05  0.04 -0.11 -0.32 -0.12  0.36  0.09 -0.06 -0.29  0.07  0.14
## P105_13 0.79  0.08 -0.23 -0.03 -0.30 -0.11  0.06 -0.10 -0.11 -0.14 -0.27  0.11
## P105_14 0.72 -0.06 -0.22  0.02  0.22  0.10 -0.23 -0.46 -0.14  0.05  0.05  0.03
## P105_16 0.52 -0.20 -0.32  0.30  0.47  0.22  0.18  0.30  0.21 -0.03 -0.17  0.11
## P105_17 0.70 -0.22 -0.38  0.12  0.05 -0.09  0.20 -0.26  0.26  0.06  0.03 -0.07
## P105_18 0.51  0.25 -0.17 -0.62  0.36 -0.02  0.09  0.04  0.08 -0.21  0.19 -0.17
## P105_19 0.56  0.36  0.01  0.10 -0.12  0.46  0.40 -0.03 -0.24  0.26  0.10 -0.08
## P105_20 0.78 -0.09 -0.23 -0.15 -0.09 -0.13 -0.15  0.19 -0.01  0.35  0.12  0.13
##          PC13  PC14  PC15  PC16  PC17 h2       u2 com
## P105_01  0.08  0.20  0.02 -0.03 -0.06  1  3.3e-16 4.2
## P105_02 -0.08 -0.17 -0.02 -0.08  0.21  1  5.6e-16 2.4
## P105_04 -0.06  0.06  0.00  0.14 -0.05  1  0.0e+00 5.0
## P105_05  0.24 -0.04 -0.10  0.04 -0.06  1  2.2e-16 4.1
## P105_06  0.21 -0.05 -0.04  0.18  0.02  1 -8.9e-16 4.9
## P105_07 -0.08  0.01  0.07 -0.25 -0.12  1  1.1e-16 3.8
## P105_09 -0.05  0.06 -0.09 -0.01 -0.01  1  7.8e-16 3.7
## P105_10 -0.05 -0.07  0.12  0.13  0.08  1 -6.7e-16 6.3
## P105_11  0.07  0.26  0.13 -0.05  0.12  1  2.2e-15 3.3
## P105_12 -0.10  0.10 -0.21 -0.04  0.04  1  1.1e-16 3.0
## P105_13 -0.05 -0.15  0.25  0.02 -0.04  1  1.1e-16 2.5
## P105_14 -0.26  0.04 -0.08  0.07  0.06  1 -1.1e-16 3.1
## P105_16 -0.10  0.03 -0.01  0.08 -0.01  1  3.3e-16 6.4
## P105_17  0.24 -0.09 -0.03 -0.15  0.12  1  5.6e-16 3.5
## P105_18 -0.02 -0.02  0.09  0.01 -0.07  1  5.6e-16 4.2
## P105_19  0.06  0.08  0.07  0.02 -0.04  1  2.2e-16 5.2
## P105_20 -0.03 -0.20 -0.12 -0.03 -0.14  1  1.1e-15 2.5
## 
##                        PC1  PC2  PC3  PC4  PC5  PC6  PC7  PC8  PC9 PC10 PC11
## SS loadings           6.48 2.20 1.61 1.00 0.90 0.78 0.73 0.56 0.53 0.49 0.36
## Proportion Var        0.38 0.13 0.09 0.06 0.05 0.05 0.04 0.03 0.03 0.03 0.02
## Cumulative Var        0.38 0.51 0.61 0.66 0.72 0.76 0.81 0.84 0.87 0.90 0.92
## Proportion Explained  0.38 0.13 0.09 0.06 0.05 0.05 0.04 0.03 0.03 0.03 0.02
## Cumulative Proportion 0.38 0.51 0.61 0.66 0.72 0.76 0.81 0.84 0.87 0.90 0.92
##                       PC12 PC13 PC14 PC15 PC16 PC17
## SS loadings           0.32 0.29 0.24 0.19 0.18 0.14
## Proportion Var        0.02 0.02 0.01 0.01 0.01 0.01
## Cumulative Var        0.94 0.96 0.97 0.98 0.99 1.00
## Proportion Explained  0.02 0.02 0.01 0.01 0.01 0.01
## Cumulative Proportion 0.94 0.96 0.97 0.98 0.99 1.00
## 
## Mean item complexity =  4
## Test of the hypothesis that 17 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0 
##  with the empirical chi square  0  with prob <  NA 
## 
## Fit based upon off diagonal values = 1
#Wie viele Faktoren müssten wir also laut Kaiser-Kriterium extrahieren?

# Inspiziere Scree-Plot
plot(pca_full$values, type = "b")

# Parallelanalyse
fa1 <- fa.parallel(dat_pca, fa = "pc") #"pc" steht für Hauptkomponentenanalyse

## Parallel analysis suggests that the number of factors =  NA  and the number of components =  2
print(fa1)
## Call: fa.parallel(x = dat_pca, fa = "pc")
## Parallel analysis suggests that the number of factors =  NA  and the number of components =  2 
## 
##  Eigen Values of 
## 
##  eigen values of factors
##  [1]  5.89  1.41  0.93  0.33  0.24  0.13  0.02 -0.01 -0.07 -0.15 -0.19 -0.27
## [13] -0.33 -0.37 -0.47 -0.53 -0.67
## 
##  eigen values of simulated factors
## [1] NA
## 
##  eigen values of components 
##  [1] 6.48 2.20 1.61 1.00 0.90 0.78 0.73 0.56 0.53 0.49 0.36 0.32 0.29 0.24 0.19
## [16] 0.18 0.14
## 
##  eigen values of simulated components
##  [1] 1.94 1.75 1.60 1.42 1.31 1.22 1.10 1.01 0.91 0.85 0.79 0.71 0.63 0.55 0.48
## [16] 0.41 0.34
#Empfehlung aus Bühner, 2021:
fa2 <- fa.parallel(dat_pca, fm = "ml", fa = "pc", n.iter = 2000) #besser wäre n.iter = 10.000

## Parallel analysis suggests that the number of factors =  NA  and the number of components =  2
print(fa2)
## Call: fa.parallel(x = dat_pca, fm = "ml", fa = "pc", n.iter = 2000)
## Parallel analysis suggests that the number of factors =  NA  and the number of components =  2 
## 
##  Eigen Values of 
## 
##  eigen values of factors
##  [1]  5.90  1.40  0.92  0.33  0.25  0.14  0.03 -0.01 -0.06 -0.15 -0.19 -0.27
## [13] -0.33 -0.37 -0.48 -0.56 -0.69
## 
##  eigen values of simulated factors
## [1] NA
## 
##  eigen values of components 
##  [1] 6.48 2.20 1.61 1.00 0.90 0.78 0.73 0.56 0.53 0.49 0.36 0.32 0.29 0.24 0.19
## [16] 0.18 0.14
## 
##  eigen values of simulated components
##  [1] 1.95 1.73 1.57 1.44 1.32 1.21 1.11 1.02 0.93 0.85 0.77 0.70 0.62 0.55 0.48
## [16] 0.41 0.33
# MAP Test - funktioniert nicht!!! 
map <- VSS(dat_pca, n = 17)
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully

## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully

map
## 
## Very Simple Structure
## Call: vss(x = x, n = n, rotate = rotate, diagonal = diagonal, fm = fm, 
##     n.obs = n.obs, plot = plot, title = title, use = use, cor = cor)
## VSS complexity 1 achieves a maximimum of 0.78  with  1  factors
## VSS complexity 2 achieves a maximimum of 0.87  with  3  factors
## 
## The Velicer MAP achieves a minimum of 0.03  with  3  factors 
## BIC achieves a minimum of  -257.01  with  3  factors
## Sample Size adjusted BIC achieves a minimum of  -14.78  with  7  factors
## 
## Statistics by number of factors 
##    vss1 vss2   map dof   chisq    prob sqresid  fit RMSEA  BIC SABIC complex
## 1  0.78 0.00 0.041 119 2.8e+02 2.4e-15    12.1 0.78 0.136 -229 145.6     1.0
## 2  0.65 0.86 0.040 103 2.0e+02 3.6e-08     7.5 0.86 0.112 -243  81.1     1.3
## 3  0.60 0.87 0.034  88 1.2e+02 1.0e-02     4.9 0.91 0.071 -257  20.3     1.5
## 4  0.57 0.84 0.040  74 9.2e+01 7.5e-02     4.1 0.92 0.056 -226   6.9     1.7
## 5  0.46 0.76 0.046  61 7.2e+01 1.5e-01     3.4 0.94 0.048 -190   2.0     2.0
## 6  0.44 0.75 0.053  49 5.5e+01 2.5e-01     2.7 0.95 0.039 -156  -1.3     2.0
## 7  0.37 0.65 0.063  38 2.9e+01 8.5e-01     2.3 0.96 0.000 -135 -14.8     2.2
## 8  0.37 0.61 0.074  28 2.0e+01 8.7e-01     1.9 0.97 0.000 -101 -12.5     2.3
## 9  0.34 0.58 0.091  19 9.7e+00 9.6e-01     1.6 0.97 0.000  -72 -12.2     2.5
## 10 0.33 0.57 0.113  11 3.4e+00 9.8e-01     1.4 0.97 0.000  -44  -9.3     2.6
## 11 0.34 0.56 0.136   4 1.1e+00 9.0e-01     1.0 0.98 0.000  -16  -3.6     2.6
## 12 0.35 0.58 0.169  -2 1.5e-03      NA     1.2 0.98    NA   NA    NA     2.5
## 13 0.33 0.56 0.220  -7 6.8e-08      NA     1.1 0.98    NA   NA    NA     2.5
## 14 0.33 0.55 0.302 -11 1.5e-07      NA     1.0 0.98    NA   NA    NA     2.5
## 15 0.33 0.54 0.480 -14 1.2e-08      NA     1.0 0.98    NA   NA    NA     2.5
## 16 0.33 0.54 1.000 -16 0.0e+00      NA     1.0 0.98    NA   NA    NA     2.5
## 17 0.33 0.54    NA -17 0.0e+00      NA     1.0 0.98    NA   NA    NA     2.5
##     eChisq    SRMR eCRMS eBIC
## 1  3.2e+02 1.3e-01 0.136 -189
## 2  1.5e+02 8.6e-02 0.099 -294
## 3  5.7e+01 5.3e-02 0.066 -322
## 4  3.9e+01 4.4e-02 0.060 -279
## 5  2.6e+01 3.6e-02 0.054 -236
## 6  1.6e+01 2.9e-02 0.048 -194
## 7  8.1e+00 2.0e-02 0.038 -155
## 8  4.4e+00 1.5e-02 0.032 -116
## 9  2.0e+00 9.9e-03 0.026  -80
## 10 5.8e-01 5.4e-03 0.019  -47
## 11 1.3e-01 2.5e-03 0.015  -17
## 12 2.2e-04 1.0e-04    NA   NA
## 13 6.4e-09 5.6e-07    NA   NA
## 14 1.6e-08 8.8e-07    NA   NA
## 15 1.4e-09 2.7e-07    NA   NA
## 16 7.9e-17 6.3e-11    NA   NA
## 17 7.9e-17 6.3e-11    NA   NA
#n bezieht sich hier auf die Anzahl der möglichen Faktoren die man extrahieren kann
#dies entspricht hier also der Anzahl der Items, also 16
#den Plot können Sie ignorieren, wichtig ist der Output in der Console


# Nachdem man sich für eine bestimmte Anzahl an zu extrahierenden Faktoren entschieden hat, 
# wird das Modell mit dieser Anzahl an Faktoren berechnet
pca_red <- principal(dat_pca, nfactors = 2, rotate = "none")
pca_red # inspiziere Kommunalität und Varianzaufklärung (proportion und cumulative var)
## Principal Components Analysis
## Call: principal(r = dat_pca, nfactors = 2, rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
##          PC1   PC2   h2   u2 com
## P105_01 0.34  0.65 0.53 0.47 1.5
## P105_02 0.31  0.79 0.72 0.28 1.3
## P105_04 0.55  0.47 0.52 0.48 2.0
## P105_05 0.67  0.29 0.53 0.47 1.4
## P105_06 0.51 -0.31 0.36 0.64 1.6
## P105_07 0.60 -0.26 0.43 0.57 1.4
## P105_09 0.68 -0.39 0.62 0.38 1.6
## P105_10 0.51 -0.39 0.42 0.58 1.9
## P105_11 0.72 -0.27 0.59 0.41 1.3
## P105_12 0.74  0.05 0.55 0.45 1.0
## P105_13 0.79  0.08 0.63 0.37 1.0
## P105_14 0.72 -0.06 0.52 0.48 1.0
## P105_16 0.52 -0.20 0.32 0.68 1.3
## P105_17 0.70 -0.22 0.55 0.45 1.2
## P105_18 0.51  0.25 0.32 0.68 1.5
## P105_19 0.56  0.36 0.44 0.56 1.7
## P105_20 0.78 -0.09 0.62 0.38 1.0
## 
##                        PC1  PC2
## SS loadings           6.48 2.20
## Proportion Var        0.38 0.13
## Cumulative Var        0.38 0.51
## Proportion Explained  0.75 0.25
## Cumulative Proportion 0.75 1.00
## 
## Mean item complexity =  1.4
## Test of the hypothesis that 2 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.1 
##  with the empirical chi square  190.05  with prob <  3.9e-07 
## 
## Fit based upon off diagonal values = 0.93
# erinnern Sie sich: Kommunalität einer Variable gibt an, welcher Anteil der Varianz 
# dieser Variable durch alle Faktoren insgesamt abgebildet werden kann


#### 4. Rotation ####

# Wir beginnen mit einer orthogonalen Rotation (Varimax)
pca_red_varimax <- principal(dat_pca, nfactors = 2, rotate = "varimax")
print(pca_red_varimax, cut = .2, sort = TRUE)# Output nach Größe sortieren, ansonsten wird es unübersichtlich
## Principal Components Analysis
## Call: principal(r = dat_pca, nfactors = 2, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##         item   RC1   RC2   h2   u2 com
## P105_09    7  0.79       0.62 0.38 1.0
## P105_11    9  0.75       0.59 0.41 1.1
## P105_17   14  0.71       0.55 0.45 1.2
## P105_20   17  0.70  0.35 0.62 0.38 1.5
## P105_07    6  0.65       0.43 0.57 1.1
## P105_10    8  0.65       0.42 0.58 1.0
## P105_14   12  0.63  0.35 0.52 0.48 1.6
## P105_13   11  0.61  0.50 0.63 0.37 1.9
## P105_06    5  0.60       0.36 0.64 1.0
## P105_12   10  0.59  0.45 0.55 0.45 1.9
## P105_16   13  0.55       0.32 0.68 1.1
## P105_02    2        0.83 0.72 0.28 1.1
## P105_01    1        0.73 0.53 0.47 1.0
## P105_04    3        0.70 0.52 0.48 1.2
## P105_19   16  0.27  0.61 0.44 0.56 1.4
## P105_05    4  0.40  0.61 0.53 0.47 1.7
## P105_18   15  0.29  0.49 0.32 0.68 1.6
## 
##                        RC1  RC2
## SS loadings           5.19 3.48
## Proportion Var        0.31 0.20
## Cumulative Var        0.31 0.51
## Proportion Explained  0.60 0.40
## Cumulative Proportion 0.60 1.00
## 
## Mean item complexity =  1.3
## Test of the hypothesis that 2 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.1 
##  with the empirical chi square  190.05  with prob <  3.9e-07 
## 
## Fit based upon off diagonal values = 0.93
# oblique Rotation (Promax u. GeominQ) # Items 13,05,18 rausgeschmissen
pca_red_promax <- principal(dat_pca, nfactors = 2, rotate = "promax")
print(pca_red_promax, cut = .2, sort = TRUE)
## Principal Components Analysis
## Call: principal(r = dat_pca, nfactors = 2, rotate = "promax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##         item   RC1   RC2   h2   u2 com
## P105_09    7  0.85       0.62 0.38 1.1
## P105_11    9  0.79       0.59 0.41 1.0
## P105_17   14  0.75       0.55 0.45 1.0
## P105_10    8  0.72 -0.26 0.42 0.58 1.3
## P105_20   17  0.71       0.62 0.38 1.1
## P105_07    6  0.69       0.43 0.57 1.0
## P105_06    5  0.65       0.36 0.64 1.1
## P105_14   12  0.64       0.52 0.48 1.1
## P105_13   11  0.59  0.34 0.63 0.37 1.6
## P105_16   13  0.58       0.32 0.68 1.0
## P105_12   10  0.57  0.28 0.55 0.45 1.5
## P105_02    2 -0.33  0.94 0.72 0.28 1.2
## P105_01    1 -0.20  0.80 0.53 0.47 1.1
## P105_04    3        0.68 0.52 0.48 1.0
## P105_19   16        0.56 0.44 0.56 1.2
## P105_05    4  0.34  0.51 0.53 0.47 1.7
## P105_18   15  0.23  0.42 0.32 0.68 1.5
## 
##                        RC1  RC2
## SS loadings           5.58 3.09
## Proportion Var        0.33 0.18
## Cumulative Var        0.33 0.51
## Proportion Explained  0.64 0.36
## Cumulative Proportion 0.64 1.00
## 
##  With component correlations of 
##      RC1  RC2
## RC1 1.00 0.43
## RC2 0.43 1.00
## 
## Mean item complexity =  1.2
## Test of the hypothesis that 2 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.1 
##  with the empirical chi square  190.05  with prob <  3.9e-07 
## 
## Fit based upon off diagonal values = 0.93
pca_red_geomin <- principal(dat_pca, nfactors = 2, rotate = "geominQ")
print(pca_red_geomin, cut = .2, sort = TRUE)
## Principal Components Analysis
## Call: principal(r = dat_pca, nfactors = 2, rotate = "geominQ")
## Standardized loadings (pattern matrix) based upon correlation matrix
##         item   TC1   TC2   h2   u2 com
## P105_09    7  0.80       0.62 0.38 1.1
## P105_11    9  0.77       0.59 0.41 1.0
## P105_17   14  0.74       0.55 0.45 1.0
## P105_20   17  0.73       0.62 0.38 1.1
## P105_07    6  0.67       0.43 0.57 1.0
## P105_14   12  0.66       0.52 0.48 1.2
## P105_10    8  0.66 -0.21 0.42 0.58 1.2
## P105_13   11  0.65  0.34 0.63 0.37 1.5
## P105_12   10  0.62  0.29 0.55 0.45 1.4
## P105_06    5  0.61       0.36 0.64 1.1
## P105_16   13  0.57       0.32 0.68 1.0
## P105_02    2        0.87 0.72 0.28 1.1
## P105_01    1        0.74 0.53 0.47 1.0
## P105_04    3  0.23  0.64 0.52 0.48 1.3
## P105_19   16  0.30  0.54 0.44 0.56 1.6
## P105_05    4  0.44  0.50 0.53 0.47 2.0
## P105_18   15  0.31  0.41 0.32 0.68 1.9
## 
##                        TC1  TC2
## SS loadings           5.75 2.92
## Proportion Var        0.34 0.17
## Cumulative Var        0.34 0.51
## Proportion Explained  0.66 0.34
## Cumulative Proportion 0.66 1.00
## 
##  With component correlations of 
##      TC1  TC2
## TC1 1.00 0.21
## TC2 0.21 1.00
## 
## Mean item complexity =  1.3
## Test of the hypothesis that 2 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.1 
##  with the empirical chi square  190.05  with prob <  3.9e-07 
## 
## Fit based upon off diagonal values = 0.93
#idealerweise sind beide Lösungen sehr ähnlich

# Würden Sie ein oder mehrere Items entfernen? Welche(s) und warum?
# Items 13,05,18 rausgeschmissen, da nicht eindimensional - uneindeutig. Item 12 behalten da inhaltlich wertvoll 

#so sieht eine Faktorenanalyse aus -> im Prinzip gleiches Vorgehen wie bei der PCA
efa_promax <- fa(dat_pca, nfactors = 3, fm = "pa", rotate = "promax")
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
print(efa_promax, cut = .2, sort = TRUE)
## Factor Analysis using method =  pa
## Call: fa(r = dat_pca, nfactors = 3, rotate = "promax", fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
##         item   PA1   PA2   PA3   h2    u2 com
## P105_17   14  0.92 -0.22       0.64 0.362 1.1
## P105_11    9  0.78             0.59 0.414 1.1
## P105_20   17  0.78             0.63 0.369 1.0
## P105_13   11  0.77             0.66 0.338 1.1
## P105_14   12  0.68             0.51 0.492 1.0
## P105_16   13  0.59             0.29 0.706 1.1
## P105_12   10  0.48             0.50 0.496 1.7
## P105_05    4  0.48  0.35       0.48 0.519 1.8
## P105_18   15  0.39  0.25       0.27 0.733 1.8
## P105_02    2 -0.43  1.12       0.98 0.018 1.3
## P105_01    1        0.67       0.40 0.602 1.1
## P105_04    3  0.25  0.50       0.42 0.583 1.5
## P105_19   16  0.28  0.41       0.36 0.644 1.8
## P105_07    6              0.89 0.76 0.244 1.1
## P105_06    5              0.78 0.56 0.445 1.0
## P105_10    8              0.63 0.45 0.554 1.1
## P105_09    7  0.32        0.60 0.62 0.377 1.6
## 
##                        PA1  PA2  PA3
## SS loadings           4.43 2.37 2.30
## Proportion Var        0.26 0.14 0.14
## Cumulative Var        0.26 0.40 0.54
## Proportion Explained  0.49 0.26 0.25
## Cumulative Proportion 0.49 0.75 1.00
## 
##  With factor correlations of 
##      PA1  PA2  PA3
## PA1 1.00 0.48 0.53
## PA2 0.48 1.00 0.22
## PA3 0.53 0.22 1.00
## 
## Mean item complexity =  1.3
## Test of the hypothesis that 3 factors are sufficient.
## 
## The degrees of freedom for the null model are  136  and the objective function was  9.63 with Chi Square of  640.36
## The degrees of freedom for the model are 88  and the objective function was  1.89 
## 
## The root mean square of the residuals (RMSR) is  0.05 
## The df corrected root mean square of the residuals is  0.07 
## 
## The harmonic number of observations is  74 with the empirical chi square  57.24  with prob <  1 
## The total number of observations was  74  with Likelihood Chi Square =  121.76  with prob <  0.01 
## 
## Tucker Lewis Index of factoring reliability =  0.892
## RMSEA index =  0.071  and the 90 % confidence intervals are  0.037 0.102
## BIC =  -257
## Fit based upon off diagonal values = 0.98

Skalenanalyse

#### Skalenanalyse und Validierung ----


# Working Directory setzen und Daten einlesen
setwd("~/Documents/R Sachen")

dat <- read.csv("Perfektionismus.csv") 

# notwendige Packages laden (und ggf. installieren)
library(psych)
library(GPArotation) #install.packages("GPArotation")


#### 1. Skalenanalyse ####

# 1.1. Erstellen der Skalenwerte (Subskalenwerte, Globalskalawert)

# Auswahl der relevanten Items pro *SUBSKALA*, die in die Item-/Skalenanalyse einfließen sollen
# (ACHTUNG: nach Itemrevision/-selektion muss die Auswahl angepasst werden!)
Leistung <- data.frame(dat[c("P105_06", "P105_07",
                            "P105_09", "P105_10", "P105_11", "P105_12", "P105_14", "P105_16", "P105_17",
                            "P105_20")])

Sorge <- data.frame(dat[c("P105_01", "P105_02", "P105_04",
                            "P105_19")])

# schauen Sie hier noch mal in Ihre Faktorenanalyse -> Welche Items bilden einen Faktor (also eine Subskala)?


# 1.2. Mittel- oder Summenwerte erstellen
dat$Leistung_x <- rowMeans(Leistung, na.rm = TRUE) #auch bei fehlenden Werten berechnet
dat$Leistung_s <- rowSums(Leistung) #na.rm = TRUE hier nicht verwenden!

dat$Sorge_x <- rowMeans(Sorge, na.rm = TRUE)
dat$Sorge_s <- rowSums(Sorge)


# 1.3. Deskriptive Statistik und Häufigkeitsverteilung der Skalenwerte
describe(dat$Leistung_x)
describe(dat$Leistung_s)
describe(dat$Sorge_x)
describe(dat$Sorge_s)
# usw. für alle Subskalen und ggf. die Gesamtskala (sofern inhaltlich sinnvoll)


# 1.4. Reliabilitätsanalyse der Subskalen (inkl. Itemtrennschärfe)
psych::alpha(Leistung)
## 
## Reliability analysis   
## Call: psych::alpha(x = Leistung)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
##       0.88      0.88     0.9      0.42 7.2 0.021  2.5 0.93      0.4
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.83  0.88  0.92
## Duhachek  0.84  0.88  0.92
## 
##  Reliability if an item is dropped:
##         raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## P105_06      0.87      0.87    0.89      0.43 6.8    0.022 0.014  0.40
## P105_07      0.87      0.86    0.88      0.42 6.4    0.023 0.017  0.40
## P105_09      0.86      0.86    0.88      0.40 6.0    0.025 0.018  0.38
## P105_10      0.87      0.87    0.89      0.43 6.7    0.023 0.018  0.40
## P105_11      0.86      0.86    0.88      0.41 6.2    0.025 0.017  0.39
## P105_12      0.87      0.87    0.89      0.42 6.5    0.024 0.020  0.40
## P105_14      0.86      0.87    0.89      0.42 6.4    0.024 0.019  0.39
## P105_16      0.88      0.88    0.90      0.44 7.1    0.022 0.016  0.42
## P105_17      0.86      0.86    0.88      0.41 6.3    0.024 0.018  0.39
## P105_20      0.86      0.86    0.88      0.40 6.1    0.025 0.016  0.39
## 
##  Item statistics 
##          n raw.r std.r r.cor r.drop mean  sd
## P105_06 74  0.61  0.62  0.58   0.51  2.0 1.3
## P105_07 74  0.69  0.70  0.68   0.60  2.1 1.3
## P105_09 74  0.77  0.78  0.75   0.70  2.4 1.4
## P105_10 74  0.63  0.64  0.60   0.54  1.6 1.1
## P105_11 74  0.75  0.74  0.72   0.67  2.6 1.4
## P105_12 74  0.68  0.68  0.62   0.59  2.9 1.4
## P105_14 74  0.70  0.69  0.65   0.61  3.3 1.4
## P105_16 74  0.58  0.57  0.50   0.46  3.1 1.4
## P105_17 74  0.73  0.71  0.68   0.64  2.7 1.5
## P105_20 74  0.76  0.76  0.75   0.69  2.7 1.3
## 
## Non missing response frequency for each item
##            1    2    3    4    5 miss
## P105_06 0.50 0.24 0.05 0.12 0.08    0
## P105_07 0.45 0.30 0.04 0.15 0.07    0
## P105_09 0.39 0.22 0.11 0.20 0.08    0
## P105_10 0.72 0.08 0.08 0.09 0.03    0
## P105_11 0.32 0.20 0.11 0.26 0.11    0
## P105_12 0.20 0.27 0.11 0.31 0.11    0
## P105_14 0.11 0.24 0.12 0.30 0.23    0
## P105_16 0.16 0.23 0.09 0.34 0.18    0
## P105_17 0.31 0.20 0.15 0.18 0.16    0
## P105_20 0.22 0.28 0.16 0.24 0.09    0
psych::alpha(Sorge)
## 
## Reliability analysis   
## Call: psych::alpha(x = Sorge)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
##       0.75      0.75    0.71      0.43   3 0.048    3 0.92     0.41
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.64  0.75  0.83
## Duhachek  0.66  0.75  0.84
## 
##  Reliability if an item is dropped:
##         raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## P105_01      0.70      0.70    0.61      0.44 2.4    0.060 0.0022  0.43
## P105_02      0.61      0.61    0.52      0.35 1.6    0.077 0.0051  0.37
## P105_04      0.70      0.70    0.64      0.43 2.3    0.061 0.0294  0.43
## P105_19      0.74      0.74    0.68      0.49 2.9    0.053 0.0141  0.49
## 
##  Item statistics 
##          n raw.r std.r r.cor r.drop mean  sd
## P105_01 74  0.74  0.74  0.63   0.52  3.0 1.2
## P105_02 74  0.84  0.84  0.79   0.68  3.6 1.2
## P105_04 74  0.76  0.75  0.61   0.53  2.7 1.3
## P105_19 74  0.68  0.69  0.52   0.45  2.6 1.2
## 
## Non missing response frequency for each item
##            1    2    3    4    5 miss
## P105_01 0.11 0.30 0.14 0.36 0.09    0
## P105_02 0.08 0.14 0.14 0.43 0.22    0
## P105_04 0.23 0.27 0.14 0.30 0.07    0
## P105_19 0.18 0.42 0.12 0.24 0.04    0
# In welchem Fall macht es keinen Sinn Cronbachs Alpha für die Gesamtskala zu berechnen?

#Umkodierung
dat$P104_02_r <- 6 - dat$P104_02
dat$P104_05_r <- 6 - dat$P104_05
dat$P104_06_r <- 6 - dat$P104_06
dat$P104_08_r <- 6 - dat$P104_08
dat$P104_09_r <- 6 - dat$P104_09

dat$P101_02_r <- 6 - dat$P101_02
dat$P101_07_r <- 6 - dat$P101_07
dat$P101_08_r <- 6 - dat$P101_08
dat$P101_16_r <- 6 - dat$P101_16
dat$P101_17_r <- 6 - dat$P101_17



# 2.1. Auswahl der Items einer Validierungsskala (bzw. -subskala)
rses_k1 <- data.frame(dat[c("P102_01", "P102_02", "P102_03", "P102_04", "P102_05", 
                         "P102_06", "P102_07", "P102_08", "P102_09", "P102_10", "P102_11", "P102_12")]) # konvergentes Maß - Neurotizismus

rses_k2 <- data.frame(dat[c("P103_01", "P103_02", "P103_03", "P103_04", "P103_05", 
                         "P103_06", "P103_07", "P103_08", "P103_09", "P103_10")]) # konvergentes Maß - Gewissenhaftigkeit

rses_k3 <- data.frame(dat[c("P101_01", "P101_02_r", "P101_03", "P101_04", "P101_05", 
                         "P101_06", "P101_07_r", "P101_08_r", "P101_09", "P101_10", "P101_11", "P101_12", "P101_13", "P101_14", "P101_15", 
                         "P101_16_r", "P101_17_r", "P101_18", "P101_19", "P101_20", "P101_21", "P101_22", "P101_23", "P101_24", "P101_25", 
                         "P101_26", "P101_27", "P101_28", "P101_29", "P101_30", "P101_31", "P101_32", "P101_33", "P101_34", "P101_35")]) # konvergentes Maß - Perfektionismus

rses_d1 <- data.frame(dat[c("P104_01", "P104_02_r", "P104_03", "P104_04", "P104_05_r", 
                         "P104_06_r", "P104_07", "P104_08_r", "P104_09_r", "P104_10")]) # divvergentes Maß - Selbstwert

# 2.2. Berichte Reliabilität der Validierungsskala
psych::alpha(rses_k1)
## 
## Reliability analysis   
## Call: psych::alpha(x = rses_k1)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
##       0.93      0.93    0.95      0.53  13 0.012  3.1 0.97     0.53
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.90  0.93  0.95
## Duhachek  0.91  0.93  0.95
## 
##  Reliability if an item is dropped:
##         raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## P102_01      0.92      0.92    0.94      0.52  12    0.014 0.013  0.51
## P102_02      0.92      0.92    0.94      0.52  12    0.013 0.013  0.51
## P102_03      0.92      0.92    0.94      0.52  12    0.013 0.014  0.53
## P102_04      0.92      0.92    0.94      0.52  12    0.014 0.013  0.51
## P102_05      0.93      0.93    0.94      0.54  13    0.012 0.012  0.56
## P102_06      0.93      0.93    0.94      0.54  13    0.013 0.010  0.55
## P102_07      0.92      0.92    0.94      0.52  12    0.014 0.012  0.51
## P102_08      0.92      0.92    0.93      0.51  12    0.014 0.012  0.51
## P102_09      0.93      0.93    0.94      0.54  13    0.012 0.013  0.56
## P102_10      0.92      0.92    0.93      0.52  12    0.014 0.012  0.51
## P102_11      0.93      0.93    0.94      0.54  13    0.012 0.012  0.55
## P102_12      0.92      0.92    0.94      0.52  12    0.014 0.014  0.53
## 
##  Item statistics 
##          n raw.r std.r r.cor r.drop mean  sd
## P102_01 74  0.80  0.80  0.79   0.75  3.2 1.2
## P102_02 74  0.76  0.77  0.75   0.72  3.7 1.2
## P102_03 74  0.77  0.78  0.76   0.73  2.9 1.2
## P102_04 74  0.79  0.79  0.77   0.75  3.0 1.3
## P102_05 74  0.64  0.64  0.61   0.57  3.0 1.2
## P102_06 74  0.70  0.70  0.68   0.63  2.8 1.3
## P102_07 74  0.80  0.80  0.79   0.75  3.5 1.3
## P102_08 74  0.84  0.84  0.83   0.80  3.2 1.3
## P102_09 74  0.66  0.65  0.61   0.58  3.3 1.4
## P102_10 74  0.82  0.82  0.81   0.78  3.1 1.3
## P102_11 74  0.66  0.66  0.62   0.59  2.7 1.4
## P102_12 74  0.79  0.78  0.76   0.73  2.7 1.4
## 
## Non missing response frequency for each item
##            1    2    3    4    5 miss
## P102_01 0.11 0.20 0.22 0.34 0.14    0
## P102_02 0.07 0.15 0.04 0.47 0.27    0
## P102_03 0.14 0.30 0.22 0.24 0.11    0
## P102_04 0.15 0.30 0.12 0.31 0.12    0
## P102_05 0.11 0.30 0.23 0.23 0.14    0
## P102_06 0.22 0.23 0.22 0.24 0.09    0
## P102_07 0.09 0.15 0.16 0.38 0.22    0
## P102_08 0.15 0.19 0.14 0.36 0.16    0
## P102_09 0.15 0.15 0.16 0.30 0.24    0
## P102_10 0.14 0.24 0.19 0.27 0.16    0
## P102_11 0.24 0.24 0.16 0.24 0.11    0
## P102_12 0.27 0.23 0.16 0.20 0.14    0
psych::alpha(rses_k2)
## 
## Reliability analysis   
## Call: psych::alpha(x = rses_k2)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N  ase mean   sd median_r
##       0.88      0.89     0.9      0.44 7.8 0.02  3.1 0.85     0.45
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.84  0.88  0.92
## Duhachek  0.84  0.88  0.92
## 
##  Reliability if an item is dropped:
##         raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## P103_01      0.88      0.88    0.89      0.45 7.4    0.021 0.017  0.45
## P103_02      0.88      0.88    0.89      0.45 7.3    0.022 0.021  0.45
## P103_03      0.89      0.89    0.90      0.47 8.0    0.019 0.015  0.47
## P103_04      0.87      0.87    0.89      0.43 6.7    0.024 0.019  0.43
## P103_05      0.87      0.87    0.88      0.43 6.7    0.023 0.016  0.41
## P103_06      0.87      0.87    0.89      0.44 7.0    0.022 0.018  0.45
## P103_07      0.87      0.88    0.89      0.44 7.0    0.023 0.017  0.45
## P103_08      0.86      0.87    0.88      0.42 6.5    0.024 0.018  0.39
## P103_09      0.87      0.87    0.89      0.42 6.6    0.024 0.017  0.43
## P103_10      0.88      0.88    0.89      0.45 7.3    0.022 0.018  0.45
## 
##  Item statistics 
##          n raw.r std.r r.cor r.drop mean  sd
## P103_01 74  0.63  0.64  0.59   0.53  3.0 1.2
## P103_02 74  0.65  0.66  0.61   0.56  2.6 1.1
## P103_03 74  0.55  0.53  0.45   0.42  2.8 1.4
## P103_04 74  0.77  0.76  0.73   0.69  2.8 1.3
## P103_05 74  0.77  0.77  0.77   0.70  3.9 1.2
## P103_06 74  0.71  0.72  0.69   0.63  3.9 1.1
## P103_07 74  0.72  0.71  0.68   0.63  2.6 1.3
## P103_08 74  0.81  0.81  0.80   0.75  3.9 1.1
## P103_09 74  0.78  0.78  0.76   0.71  3.6 1.3
## P103_10 74  0.65  0.65  0.61   0.57  2.5 1.1
## 
## Non missing response frequency for each item
##            1    2    3    4    5 miss
## P103_01 0.12 0.28 0.16 0.38 0.05    0
## P103_02 0.18 0.31 0.26 0.22 0.04    0
## P103_03 0.26 0.22 0.09 0.32 0.11    0
## P103_04 0.19 0.31 0.18 0.20 0.12    0
## P103_05 0.07 0.08 0.09 0.39 0.36    0
## P103_06 0.05 0.09 0.11 0.43 0.31    0
## P103_07 0.26 0.28 0.15 0.23 0.08    0
## P103_08 0.07 0.05 0.08 0.53 0.27    0
## P103_09 0.09 0.16 0.14 0.31 0.30    0
## P103_10 0.19 0.38 0.23 0.16 0.04    0
psych::alpha(rses_k3)
## Warning in psych::alpha(rses_k3): Some items were negatively correlated with the total scale and probably 
## should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option
## Some items ( P101_17_r P101_31 ) were negatively correlated with the total scale and 
## probably should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option
## 
## Reliability analysis   
## Call: psych::alpha(x = rses_k3)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
##       0.88      0.88    0.96      0.17   7 0.019  2.5 0.54     0.17
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.83  0.88  0.91
## Duhachek  0.84  0.88  0.92
## 
##  Reliability if an item is dropped:
##           raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## P101_01        0.87      0.87    0.96      0.17 6.8    0.020 0.070  0.17
## P101_02_r      0.88      0.88    0.96      0.18 7.3    0.019 0.066  0.18
## P101_03        0.87      0.87    0.96      0.17 6.7    0.020 0.070  0.17
## P101_04        0.87      0.87    0.96      0.16 6.5    0.020 0.069  0.17
## P101_05        0.87      0.87    0.96      0.16 6.7    0.020 0.070  0.17
## P101_06        0.87      0.87    0.96      0.16 6.6    0.020 0.070  0.17
## P101_07_r      0.88      0.88    0.96      0.18 7.3    0.018 0.066  0.18
## P101_08_r      0.88      0.88    0.96      0.18 7.4    0.019 0.066  0.18
## P101_09        0.87      0.87    0.96      0.16 6.5    0.020 0.069  0.17
## P101_10        0.87      0.87    0.96      0.17 6.7    0.020 0.071  0.17
## P101_11        0.87      0.87    0.96      0.16 6.6    0.020 0.070  0.17
## P101_12        0.87      0.87    0.96      0.16 6.7    0.020 0.069  0.17
## P101_13        0.87      0.87    0.96      0.16 6.6    0.020 0.070  0.17
## P101_14        0.87      0.87    0.96      0.16 6.6    0.020 0.070  0.17
## P101_15        0.87      0.87    0.96      0.16 6.6    0.020 0.070  0.17
## P101_16_r      0.88      0.88    0.96      0.17 7.2    0.019 0.069  0.18
## P101_17_r      0.89      0.89    0.97      0.19 8.0    0.017 0.060  0.18
## P101_18        0.87      0.87    0.96      0.16 6.5    0.020 0.069  0.17
## P101_19        0.87      0.87    0.96      0.16 6.6    0.020 0.069  0.17
## P101_20        0.87      0.87    0.96      0.17 6.7    0.020 0.069  0.17
## P101_21        0.87      0.87    0.96      0.16 6.5    0.020 0.069  0.17
## P101_22        0.87      0.87    0.96      0.16 6.5    0.020 0.069  0.17
## P101_23        0.87      0.87    0.96      0.16 6.6    0.020 0.069  0.17
## P101_24        0.88      0.87    0.96      0.17 6.9    0.019 0.070  0.17
## P101_25        0.87      0.86    0.96      0.16 6.4    0.021 0.068  0.16
## P101_26        0.88      0.87    0.96      0.17 6.9    0.019 0.070  0.17
## P101_27        0.88      0.88    0.96      0.18 7.3    0.019 0.066  0.18
## P101_28        0.87      0.87    0.96      0.16 6.6    0.020 0.069  0.17
## P101_29        0.88      0.88    0.96      0.18 7.3    0.018 0.066  0.18
## P101_30        0.87      0.87    0.96      0.17 6.7    0.020 0.070  0.17
## P101_31        0.89      0.88    0.96      0.18 7.5    0.018 0.064  0.18
## P101_32        0.87      0.87    0.96      0.17 6.8    0.020 0.071  0.17
## P101_33        0.88      0.87    0.96      0.17 6.9    0.019 0.070  0.17
## P101_34        0.87      0.87    0.96      0.16 6.7    0.020 0.069  0.17
## P101_35        0.87      0.87    0.96      0.16 6.6    0.020 0.069  0.17
## 
##  Item statistics 
##            n  raw.r  std.r  r.cor  r.drop mean   sd
## P101_01   74  0.488  0.490  0.484  0.4345  2.5 1.27
## P101_02_r 74  0.054  0.059  0.046 -0.0029  2.4 1.08
## P101_03   74  0.507  0.505  0.494  0.4520  1.9 1.33
## P101_04   74  0.662  0.652  0.653  0.6178  2.3 1.38
## P101_05   74  0.544  0.540  0.535  0.4914  2.2 1.33
## P101_06   74  0.607  0.610  0.603  0.5653  3.2 1.17
## P101_07_r 74  0.106  0.101  0.093  0.0387  2.7 1.29
## P101_08_r 74  0.021  0.026  0.014 -0.0303  2.2 0.98
## P101_09   74  0.663  0.657  0.656  0.6198  2.3 1.35
## P101_10   74  0.511  0.515  0.500  0.4651  3.3 1.11
## P101_11   74  0.574  0.581  0.576  0.5360  1.8 1.04
## P101_12   74  0.530  0.528  0.524  0.4744  3.0 1.38
## P101_13   74  0.581  0.579  0.573  0.5328  2.4 1.29
## P101_14   74  0.574  0.579  0.572  0.5290  2.2 1.20
## P101_15   74  0.635  0.641  0.638  0.5980  1.8 1.11
## P101_16_r 74  0.168  0.163  0.144  0.1012  3.0 1.28
## P101_17_r 74 -0.434 -0.438 -0.465 -0.4871  2.9 1.29
## P101_18   74  0.664  0.664  0.663  0.6238  2.6 1.28
## P101_19   74  0.591  0.590  0.589  0.5439  3.0 1.30
## P101_20   74  0.509  0.515  0.515  0.4612  2.0 1.16
## P101_21   74  0.655  0.655  0.650  0.6160  2.3 1.20
## P101_22   74  0.692  0.690  0.691  0.6510  2.1 1.37
## P101_23   74  0.621  0.625  0.617  0.5855  1.8 1.05
## P101_24   74  0.412  0.407  0.399  0.3510  3.0 1.32
## P101_25   74  0.764  0.762  0.765  0.7337  2.2 1.28
## P101_26   74  0.403  0.405  0.389  0.3461  2.1 1.24
## P101_27   74  0.073  0.080  0.067  0.0220  4.0 0.96
## P101_28   74  0.588  0.587  0.576  0.5412  2.3 1.29
## P101_29   74  0.071  0.075  0.064  0.0122  3.6 1.11
## P101_30   74  0.507  0.509  0.496  0.4587  2.9 1.18
## P101_31   74 -0.056 -0.057 -0.068 -0.1211  3.3 1.25
## P101_32   74  0.476  0.474  0.461  0.4225  2.5 1.25
## P101_33   74  0.399  0.399  0.378  0.3404  2.9 1.27
## P101_34   74  0.559  0.559  0.551  0.5157  2.3 1.12
## P101_35   74  0.571  0.570  0.568  0.5248  2.0 1.24
## 
## Non missing response frequency for each item
##              1    2    3    4    5 miss
## P101_01   0.27 0.32 0.14 0.20 0.07    0
## P101_02_r 0.23 0.35 0.22 0.19 0.01    0
## P101_03   0.57 0.18 0.08 0.09 0.08    0
## P101_04   0.43 0.16 0.16 0.16 0.08    0
## P101_05   0.45 0.22 0.12 0.15 0.07    0
## P101_06   0.08 0.24 0.16 0.41 0.11    0
## P101_07_r 0.20 0.34 0.15 0.22 0.09    0
## P101_08_r 0.26 0.46 0.16 0.11 0.01    0
## P101_09   0.38 0.28 0.11 0.14 0.09    0
## P101_10   0.07 0.19 0.19 0.45 0.11    0
## P101_11   0.54 0.24 0.12 0.08 0.01    0
## P101_12   0.18 0.20 0.22 0.22 0.19    0
## P101_13   0.32 0.28 0.15 0.18 0.07    0
## P101_14   0.36 0.30 0.18 0.11 0.05    0
## P101_15   0.58 0.23 0.07 0.09 0.03    0
## P101_16_r 0.14 0.28 0.20 0.24 0.14    0
## P101_17_r 0.12 0.35 0.12 0.27 0.14    0
## P101_18   0.27 0.20 0.27 0.18 0.08    0
## P101_19   0.18 0.18 0.27 0.24 0.14    0
## P101_20   0.49 0.24 0.12 0.12 0.03    0
## P101_21   0.31 0.35 0.07 0.26 0.01    0
## P101_22   0.51 0.18 0.08 0.16 0.07    0
## P101_23   0.54 0.24 0.09 0.12 0.00    0
## P101_24   0.12 0.31 0.16 0.23 0.18    0
## P101_25   0.41 0.27 0.08 0.20 0.04    0
## P101_26   0.43 0.24 0.14 0.15 0.04    0
## P101_27   0.01 0.08 0.12 0.45 0.34    0
## P101_28   0.35 0.27 0.11 0.23 0.04    0
## P101_29   0.01 0.23 0.12 0.42 0.22    0
## P101_30   0.12 0.31 0.26 0.22 0.09    0
## P101_31   0.07 0.30 0.12 0.34 0.18    0
## P101_32   0.28 0.28 0.18 0.20 0.05    0
## P101_33   0.15 0.27 0.18 0.30 0.11    0
## P101_34   0.27 0.38 0.11 0.24 0.00    0
## P101_35   0.47 0.26 0.08 0.15 0.04    0
psych::alpha(rses_d1)
## 
## Reliability analysis   
## Call: psych::alpha(x = rses_d1)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
##       0.87      0.87    0.93      0.39 6.4 0.024  3.6 0.84      0.4
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.82  0.87  0.91
## Duhachek  0.82  0.87  0.91
## 
##  Reliability if an item is dropped:
##           raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## P104_01        0.84      0.84    0.91      0.37 5.3    0.028 0.085  0.37
## P104_02_r      0.85      0.85    0.92      0.39 5.8    0.027 0.074  0.41
## P104_03        0.87      0.87    0.92      0.42 6.5    0.024 0.056  0.41
## P104_04        0.86      0.86    0.92      0.40 6.1    0.025 0.076  0.41
## P104_05_r      0.85      0.85    0.92      0.39 5.8    0.028 0.087  0.40
## P104_06_r      0.85      0.86    0.92      0.40 5.9    0.027 0.073  0.41
## P104_07        0.86      0.85    0.92      0.39 5.8    0.026 0.079  0.41
## P104_08_r      0.86      0.86    0.93      0.41 6.3    0.025 0.073  0.41
## P104_09_r      0.84      0.84    0.91      0.37 5.3    0.029 0.084  0.37
## P104_10        0.84      0.84    0.91      0.36 5.1    0.029 0.086  0.34
## 
##  Item statistics 
##            n raw.r std.r r.cor r.drop mean   sd
## P104_01   74  0.77  0.79  0.78   0.70  3.4 1.24
## P104_02_r 74  0.70  0.66  0.64   0.60  3.2 1.43
## P104_03   74  0.46  0.51  0.50   0.36  4.1 0.98
## P104_04   74  0.57  0.61  0.58   0.47  3.9 1.11
## P104_05_r 74  0.69  0.67  0.63   0.60  3.8 1.19
## P104_06_r 74  0.69  0.65  0.62   0.58  3.2 1.39
## P104_07   74  0.63  0.66  0.64   0.53  4.0 1.20
## P104_08_r 74  0.60  0.55  0.51   0.47  2.9 1.38
## P104_09_r 74  0.80  0.78  0.76   0.73  3.9 1.22
## P104_10   74  0.81  0.82  0.81   0.74  3.5 1.29
## 
## Non missing response frequency for each item
##              1    2    3    4    5 miss
## P104_01   0.14 0.14 0.08 0.54 0.11    0
## P104_02_r 0.14 0.30 0.04 0.30 0.23    0
## P104_03   0.05 0.01 0.05 0.50 0.38    0
## P104_04   0.05 0.07 0.15 0.41 0.32    0
## P104_05_r 0.03 0.18 0.12 0.31 0.36    0
## P104_06_r 0.12 0.27 0.14 0.23 0.24    0
## P104_07   0.07 0.05 0.15 0.28 0.45    0
## P104_08_r 0.16 0.34 0.15 0.16 0.19    0
## P104_09_r 0.05 0.11 0.15 0.27 0.42    0
## P104_10   0.11 0.15 0.15 0.36 0.23    0
# 2.3. Erstelle die Skalenwerte der Validierungsskalen
dat$rses_k1_x <- rowMeans(rses_k1, na.rm = TRUE) 
dat$rses_k1_s <- rowSums(rses_k1)

dat$rses_k2_x <- rowMeans(rses_k2, na.rm = TRUE) 
dat$rses_k2_s <- rowSums(rses_k2)

dat$rses_k3_x <- rowMeans(rses_k3, na.rm = TRUE) 
dat$rses_k3_s <- rowSums(rses_k3)

dat$rses_d1_x <- rowMeans(rses_d1, na.rm = TRUE) 
dat$rses_d1_s <- rowSums(rses_d1)

# 2.4. Berechne Korrelationskoeffizienten, zur Untersuchung der Konstruktvalidität (konvergent, diskriminant)
valid <- data.frame(dat[c("Leistung_x", "Sorge_x", "rses_k1_x", "rses_k2_x", "rses_k3_x", "rses_d1_x")])
corr.test(valid)
## Call:corr.test(x = valid)
## Correlation matrix 
##            Leistung_x Sorge_x rses_k1_x rses_k2_x rses_k3_x rses_d1_x
## Leistung_x       1.00    0.34      0.65     -0.07      0.54     -0.61
## Sorge_x          0.34    1.00      0.54      0.41      0.34      0.04
## rses_k1_x        0.65    0.54      1.00      0.21      0.44     -0.46
## rses_k2_x       -0.07    0.41      0.21      1.00     -0.16      0.35
## rses_k3_x        0.54    0.34      0.44     -0.16      1.00     -0.47
## rses_d1_x       -0.61    0.04     -0.46      0.35     -0.47      1.00
## Sample Size 
## [1] 74
## Probability values (Entries above the diagonal are adjusted for multiple tests.) 
##            Leistung_x Sorge_x rses_k1_x rses_k2_x rses_k3_x rses_d1_x
## Leistung_x       0.00    0.02      0.00      1.00      0.00      0.00
## Sorge_x          0.00    0.00      0.00      0.00      0.02      1.00
## rses_k1_x        0.00    0.00      0.00      0.28      0.00      0.00
## rses_k2_x        0.58    0.00      0.07      0.00      0.51      0.02
## rses_k3_x        0.00    0.00      0.00      0.17      0.00      0.00
## rses_d1_x        0.00    0.74      0.00      0.00      0.00      0.00
## 
##  To see confidence intervals of the correlations, print with the short=FALSE option