podatki <- read.table("./Statistika.csv", header=TRUE, sep=";", dec=",")
head(podatki)
## ID V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18
## 1 1 3 5 1 4 3 3 4 2 2 2 2 3 3 2 4 4 4 3
## 2 2 2 3 3 2 2 2 2 2 2 3 2 2 2 2 2 3 4 4
## 3 3 2 4 3 2 2 2 2 2 2 4 2 2 2 2 2 2 4 4
## 4 4 3 3 1 4 5 3 5 5 5 3 5 5 5 5 5 4 3 3
## 5 5 2 5 5 2 2 1 2 2 1 5 1 2 2 2 2 1 5 5
## 6 6 3 5 3 4 3 2 3 3 3 2 4 4 4 3 3 4 3 4
Opis spremenljivk:
library(pastecs)
round(stat.desc(podatki[-1], basic = FALSE), 2)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18
## median 2.00 5.00 3.00 3.00 3.00 2.00 3.00 2.00 2.00 3.00 2.00 3.00 3.00 2.00 2.00 4.00 3.00 4.00
## mean 2.37 4.37 2.54 2.81 2.71 2.19 2.93 2.22 2.24 2.82 2.44 2.85 2.88 2.45 2.56 3.76 2.90 3.44
## SE.mean 0.02 0.03 0.03 0.03 0.03 0.03 0.03 0.03 0.03 0.04 0.03 0.03 0.03 0.03 0.03 0.03 0.03 0.03
## CI.mean.0.95 0.05 0.05 0.06 0.05 0.05 0.06 0.06 0.05 0.05 0.07 0.05 0.06 0.05 0.05 0.06 0.06 0.06 0.06
## var 0.71 0.75 1.12 0.91 0.93 1.17 1.23 0.78 0.77 1.53 0.92 1.00 0.85 0.79 1.06 1.19 1.09 1.08
## std.dev 0.84 0.87 1.06 0.95 0.96 1.08 1.11 0.88 0.88 1.24 0.96 1.00 0.92 0.89 1.03 1.09 1.04 1.04
## coef.var 0.35 0.20 0.42 0.34 0.36 0.49 0.38 0.40 0.39 0.44 0.39 0.35 0.32 0.36 0.40 0.29 0.36 0.30
podatki_FA <- podatki[-1]
R <- cor(podatki_FA)
library(psych)
corPlot(R)
library(psych)
cortest.bartlett(R, n=nrow(podatki))
## $chisq
## [1] 6665.548
##
## $p.value
## [1] 0
##
## $df
## [1] 153
library(psych)
KMO(R)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = R)
## Overall MSA = 0.9
## MSA for each item =
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18
## 0.92 0.84 0.93 0.94 0.94 0.86 0.92 0.84 0.88 0.77 0.93 0.93 0.93 0.89 0.93 0.92 0.86 0.76
det(R)
## [1] 0.003730843
fa.parallel(podatki_FA,
sim = FALSE,
fa = "fa")
## Parallel analysis suggests that the number of factors = 4 and the number of components = NA
library(psych)
library(GPArotation)
faktorska <- fa(podatki_FA,
covar = FALSE,
fm = "minres",
nfactors = 4,
rotate = "oblimin",
impute = "mean")
print.psych(faktorska,
cut = 0.3,
sort = TRUE)
## Factor Analysis using method = minres
## Call: fa(r = podatki_FA, nfactors = 4, rotate = "oblimin", covar = FALSE,
## impute = "mean", fm = "minres")
## Standardized loadings (pattern matrix) based upon correlation matrix
## item MR1 MR3 MR4 MR2 h2 u2 com
## V6 6 0.82 0.55 0.45 1.1
## V15 15 0.64 0.54 0.46 1.1
## V7 7 0.55 0.48 0.52 1.2
## V11 11 0.55 0.47 0.53 1.2
## V12 12 0.51 0.47 0.53 1.5
## V8 8 0.82 0.63 0.37 1.0
## V9 9 0.75 0.62 0.38 1.1
## V14 14 0.69 0.59 0.41 1.0
## V1 1 0.66 0.46 0.54 1.0
## V13 13 0.56 0.48 0.52 1.2
## V4 4 0.53 0.42 0.58 1.2
## V5 5 0.50 0.35 0.65 1.1
## V10 10 0.59 0.35 0.65 1.1
## V17 17 0.52 0.30 0.70 1.2
## V2 2 -0.52 0.27 0.73 1.1
## V18 18 0.40 0.15 0.85 1.6
## V3 3 -0.35 0.39 0.44 0.56 2.1
## V16 16 -0.37 0.23 0.77 1.3
##
## MR1 MR3 MR4 MR2
## SS loadings 2.33 2.01 1.98 1.50
## Proportion Var 0.13 0.11 0.11 0.08
## Cumulative Var 0.13 0.24 0.35 0.43
## Proportion Explained 0.30 0.26 0.25 0.19
## Cumulative Proportion 0.30 0.56 0.81 1.00
##
## With factor correlations of
## MR1 MR3 MR4 MR2
## MR1 1.00 0.45 0.54 -0.37
## MR3 0.45 1.00 0.53 -0.18
## MR4 0.54 0.53 1.00 -0.32
## MR2 -0.37 -0.18 -0.32 1.00
##
## Mean item complexity = 1.2
## Test of the hypothesis that 4 factors are sufficient.
##
## df null model = 153 with the objective function = 5.59 with Chi Square = 6665.55
## df of the model are 87 and the objective function was 0.19
##
## The root mean square of the residuals (RMSR) is 0.02
## The df corrected root mean square of the residuals is 0.03
##
## The harmonic n.obs is 1200 with the empirical chi square 149.69 with prob < 3.4e-05
## The total n.obs was 1200 with Likelihood Chi Square = 228.08 with prob < 1.4e-14
##
## Tucker Lewis Index of factoring reliability = 0.962
## RMSEA index = 0.037 and the 90 % confidence intervals are 0.031 0.043
## BIC = -388.75
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## MR1 MR3 MR4 MR2
## Correlation of (regression) scores with factors 0.91 0.91 0.88 0.83
## Multiple R square of scores with factors 0.83 0.84 0.78 0.69
## Minimum correlation of possible factor scores 0.65 0.67 0.56 0.37
podatki_FA <- podatki[ , !colnames(podatki) %in% c("ID", "V3", "V18")]
library(psych)
library(GPArotation)
faktorska <- fa(podatki_FA,
covar = FALSE,
fm = "minres",
nfactors = 4,
rotate = "oblimin",
impute = "mean")
print.psych(faktorska,
cut = 0.3,
sort = TRUE)
## Factor Analysis using method = minres
## Call: fa(r = podatki_FA, nfactors = 4, rotate = "oblimin", covar = FALSE,
## impute = "mean", fm = "minres")
## Standardized loadings (pattern matrix) based upon correlation matrix
## item MR1 MR2 MR4 MR3 h2 u2 com
## V6 5 0.81 0.54 0.46 1.1
## V15 14 0.65 0.54 0.46 1.1
## V7 6 0.56 0.48 0.52 1.1
## V11 10 0.54 0.48 0.52 1.2
## V12 11 0.51 0.47 0.53 1.5
## V8 7 0.82 0.63 0.37 1.0
## V9 8 0.76 0.62 0.38 1.1
## V14 13 0.69 0.59 0.41 1.0
## V1 1 0.71 0.49 0.51 1.0
## V13 12 0.59 0.48 0.52 1.1
## V4 3 0.53 0.41 0.59 1.2
## V5 4 0.52 0.35 0.65 1.1
## V10 9 -0.62 0.37 0.63 1.0
## V2 2 0.55 0.30 0.70 1.0
## V17 16 -0.49 0.27 0.73 1.2
## V16 15 0.34 0.21 0.79 1.4
##
## MR1 MR2 MR4 MR3
## SS loadings 2.28 1.95 1.85 1.15
## Proportion Var 0.14 0.12 0.12 0.07
## Cumulative Var 0.14 0.26 0.38 0.45
## Proportion Explained 0.32 0.27 0.26 0.16
## Cumulative Proportion 0.32 0.58 0.84 1.00
##
## With factor correlations of
## MR1 MR2 MR4 MR3
## MR1 1.00 0.45 0.56 0.36
## MR2 0.45 1.00 0.56 0.14
## MR4 0.56 0.56 1.00 0.30
## MR3 0.36 0.14 0.30 1.00
##
## Mean item complexity = 1.1
## Test of the hypothesis that 4 factors are sufficient.
##
## df null model = 120 with the objective function = 5.01 with Chi Square = 5981.3
## df of the model are 62 and the objective function was 0.14
##
## The root mean square of the residuals (RMSR) is 0.02
## The df corrected root mean square of the residuals is 0.03
##
## The harmonic n.obs is 1200 with the empirical chi square 95.9 with prob < 0.0037
## The total n.obs was 1200 with Likelihood Chi Square = 162.91 with prob < 5.3e-11
##
## Tucker Lewis Index of factoring reliability = 0.967
## RMSEA index = 0.037 and the 90 % confidence intervals are 0.03 0.044
## BIC = -276.67
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## MR1 MR2 MR4 MR3
## Correlation of (regression) scores with factors 0.91 0.91 0.88 0.80
## Multiple R square of scores with factors 0.82 0.84 0.78 0.64
## Minimum correlation of possible factor scores 0.65 0.67 0.57 0.28
Spremenljivke:
fa.diagram(faktorska)
Matrika_ostankov <- faktorska$residual
Ostanki <- as.matrix(Matrika_ostankov[upper.tri(Matrika_ostankov)])
head(Ostanki)
## [,1]
## [1,] -0.014623739
## [2,] 0.003416266
## [3,] 0.005674986
## [4,] 0.003381767
## [5,] 0.007719287
## [6,] 0.040066020
VisokiOstanki <- abs(Ostanki) > 0.05
head(VisokiOstanki)
## [,1]
## [1,] FALSE
## [2,] FALSE
## [3,] FALSE
## [4,] FALSE
## [5,] FALSE
## [6,] FALSE
sum(VisokiOstanki)/nrow(VisokiOstanki)
## [1] 0
print(faktorska$weights)
## MR1 MR2 MR4 MR3
## V1 -0.014222278 0.0344848525 0.302546204 -0.014385964
## V2 -0.005142622 -0.0033854478 0.016806847 0.278394535
## V4 0.029818078 0.0393028722 0.197412591 -0.016766698
## V5 0.025213864 0.0183527214 0.177013735 0.009605647
## V6 0.311832283 -0.0011115267 -0.057197799 -0.014565190
## V7 0.187526663 0.0223139596 0.059763810 0.038949935
## V8 -0.029816225 0.3652326822 0.045737521 -0.095816400
## V9 0.032542308 0.3259514711 0.006587123 0.069994752
## V10 -0.006579935 0.0335090797 -0.003333121 -0.353160841
## V11 0.180972403 0.0491305613 0.025916954 0.064199378
## V12 0.167878183 0.0007192768 0.105041429 0.025106797
## V13 0.024136941 0.0213725209 0.246347152 0.096806340
## V14 0.029482337 0.2794546990 0.066142430 -0.030329701
## V15 0.251068144 0.0007308693 0.060864207 0.047533917
## V16 0.024760852 0.0045834924 0.034606056 0.154538505
## V17 -0.020545164 -0.0191580849 0.014692609 -0.245313477
head(faktorska$scores)
## MR1 MR2 MR4 MR3
## [1,] 0.7872002 -0.3105402 0.7352525 0.35417847
## [2,] -0.7503960 -0.5035133 -1.0474842 -1.28023460
## [3,] -0.7877672 -0.4827468 -1.0644233 -1.41267777
## [4,] 2.4996409 3.6360477 2.6026848 -0.04466547
## [5,] -1.4128356 -0.9429129 -1.0447329 -2.00546364
## [6,] 0.7060360 1.0272124 1.2848289 0.81503494
podatki$F1_Rac <- faktorska$scores[ , 1]
podatki$F2_Matem <- faktorska$scores[ , 2]
podatki$F3_Stat <- faktorska$scores[ , 3]
podatki$F4_R <- faktorska$scores[ , 4]
print(podatki[455,])
## ID V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 F1_Rac F2_Matem F3_Stat F4_R
## 455 455 4 5 1 4 1 2 5 1 1 1 1 5 5 1 1 5 1 1 0.0496012 -1.456233 1.299551 1.931835
F4_R <- podatki[, c("V10", "V2", "V17", "V16")]
head(F4_R)
## V10 V2 V17 V16
## 1 2 5 4 4
## 2 3 3 4 3
## 3 4 4 4 2
## 4 3 3 3 4
## 5 5 5 5 1
## 6 2 5 3 4
library(psych)
alpha(F4_R,
check.keys = TRUE)
## Warning in alpha(F4_R, check.keys = TRUE): Some items were negatively correlated with the first principal component and were automatically reversed.
## This is indicated by a negative sign for the variable name.
##
## Reliability analysis
## Call: alpha(x = F4_R, check.keys = TRUE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.58 0.59 0.52 0.26 1.4 0.02 2.4 0.71 0.26
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.54 0.58 0.62
## Duhachek 0.54 0.58 0.62
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## V10 0.48 0.49 0.39 0.24 0.96 0.026 0.00159 0.24
## V2- 0.50 0.50 0.40 0.25 1.01 0.025 0.00074 0.24
## V17 0.50 0.51 0.41 0.26 1.03 0.025 0.00478 0.23
## V16- 0.55 0.56 0.46 0.30 1.29 0.022 0.00088 0.28
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## V10 1200 0.73 0.69 0.53 0.40 2.8 1.24
## V2- 1200 0.63 0.68 0.51 0.39 1.6 0.87
## V17 1200 0.67 0.68 0.49 0.38 2.9 1.04
## V16- 1200 0.64 0.63 0.40 0.31 2.2 1.09
##
## Non missing response frequency for each item
## 1 2 3 4 5 miss
## V10 0.20 0.21 0.23 0.29 0.07 0
## V2 0.01 0.05 0.08 0.30 0.56 0
## V17 0.10 0.25 0.34 0.26 0.05 0
## V16 0.03 0.13 0.21 0.33 0.30 0