library(polycor)
library(psych)
##
## Attaching package: 'psych'
## The following object is masked from 'package:polycor':
##
## polyserial
library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
library(EFA.dimensions)
## **************************************************************************************************
## EFA.dimensions 0.1.8.1
##
## Please contact Brian O'Connor at brian.oconnor@ubc.ca if you have questions or suggestions.
## **************************************************************************************************
data(data_RSE)
hw4 <- na.omit(data_RSE)
At first, I propose to check data for adequacy.
KMO(hw4)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = hw4)
## Overall MSA = 0.91
## MSA for each item =
## Q1 Q2 Q3 Q4 Q5 Q6 Q7 Q8 Q9 Q10
## 0.87 0.88 0.94 0.94 0.96 0.92 0.91 0.90 0.91 0.90
cortest.bartlett(hw4)
## R was not square, finding R from data
## $chisq
## [1] 1809.435
##
## $p.value
## [1] 0
##
## $df
## [1] 45
Interpretation: Kaiser-Meyer-Olkin factor adequacy shows overall value 0.91, while each variable have at least 0.87, which is quite good as it is higher than 0.8. And cortest.bartlett test shows that variables are correlated, p-value is 0. The data is good for factor analysis.
names(hw4) <- c("satisfied","nogood","goodqualities","dothingsasmostppl","notmuchproud","useless","equalasothers", "moreselfrespect","failure","positiveatt")
hw4.cor <- hetcor(hw4)
hw4.cor <- hw4.cor$correlations
hw4.cor
## satisfied nogood goodqualities dothingsasmostppl
## satisfied 1.0000000 0.6501116 -0.5255283 0.5162663
## nogood 0.6501116 1.0000000 -0.5282311 0.5459068
## goodqualities -0.5255283 -0.5282311 1.0000000 -0.4564678
## dothingsasmostppl 0.5162663 0.5459068 -0.4564678 1.0000000
## notmuchproud -0.4502033 -0.4368299 0.5838548 -0.3696428
## useless 0.5872099 0.5695468 -0.6145452 0.5030354
## equalasothers 0.5606642 0.5486654 -0.6666836 0.4810535
## moreselfrespect -0.4616223 -0.2883896 0.5476708 -0.2776520
## failure -0.3885451 -0.4408003 0.6379461 -0.4131382
## positiveatt -0.3651180 -0.4376588 0.6594017 -0.3377111
## notmuchproud useless equalasothers moreselfrespect
## satisfied -0.4502033 0.5872099 0.5606642 -0.4616223
## nogood -0.4368299 0.5695468 0.5486654 -0.2883896
## goodqualities 0.5838548 -0.6145452 -0.6666836 0.5476708
## dothingsasmostppl -0.3696428 0.5030354 0.4810535 -0.2776520
## notmuchproud 1.0000000 -0.5432471 -0.5116933 0.4605953
## useless -0.5432471 1.0000000 0.7628509 -0.5567646
## equalasothers -0.5116933 0.7628509 1.0000000 -0.5594032
## moreselfrespect 0.4605953 -0.5567646 -0.5594032 1.0000000
## failure 0.5622629 -0.6242323 -0.5732747 0.5770295
## positiveatt 0.5682384 -0.5839417 -0.5762772 0.5823472
## failure positiveatt
## satisfied -0.3885451 -0.3651180
## nogood -0.4408003 -0.4376588
## goodqualities 0.6379461 0.6594017
## dothingsasmostppl -0.4131382 -0.3377111
## notmuchproud 0.5622629 0.5682384
## useless -0.6242323 -0.5839417
## equalasothers -0.5732747 -0.5762772
## moreselfrespect 0.5770295 0.5823472
## failure 1.0000000 0.7312997
## positiveatt 0.7312997 1.0000000
Interpretation: here we see that variables are correlated as was already established previously, and we also see the coefficients. For instance, there is a moderate negative correlation (-0.38) between being satisfied with yourself and feeling as a failure, suggesting that the more person is satisfied with themselves, the less they feel as a failure and vice versa. Also there is a moderate positive correlation (0.56) between feeling satisfied with yourself and feeling as a person of worth (equalasothers).
fa.parallel(hw4, fa="both", n.iter=100)
## Parallel analysis suggests that the number of factors = 2 and the number of components = 1
Interpretation: the plot suggests that the optimal number of factors is 1 or 2, but as we cannot have 1 factor, I propose to stop at 2 of them.
# no rotation
fa(hw4, nfactors=2, rotate="none", fm="ml", cor = 'poly')
## Factor Analysis using method = ml
## Call: fa(r = hw4, nfactors = 2, rotate = "none", fm = "ml", cor = "poly")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML1 ML2 h2 u2 com
## satisfied -0.73 0.48 0.76 0.24 1.7
## nogood -0.76 0.37 0.71 0.29 1.5
## goodqualities 0.84 0.11 0.72 0.28 1.0
## dothingsasmostppl -0.64 0.33 0.52 0.48 1.5
## notmuchproud 0.71 0.11 0.51 0.49 1.0
## useless -0.84 0.18 0.74 0.26 1.1
## equalasothers -0.82 0.17 0.70 0.30 1.1
## moreselfrespect 0.72 0.16 0.54 0.46 1.1
## failure 0.84 0.28 0.79 0.21 1.2
## positiveatt 0.83 0.39 0.84 0.16 1.4
##
## ML1 ML2
## SS loadings 6.03 0.82
## Proportion Var 0.60 0.08
## Cumulative Var 0.60 0.68
## Proportion Explained 0.88 0.12
## Cumulative Proportion 0.88 1.00
##
## Mean item complexity = 1.3
## Test of the hypothesis that 2 factors are sufficient.
##
## df null model = 45 with the objective function = 8.15 with Chi Square = 2402.81
## df of the model are 26 and the objective function was 0.61
##
## The root mean square of the residuals (RMSR) is 0.03
## The df corrected root mean square of the residuals is 0.05
##
## The harmonic n.obs is 300 with the empirical chi square 32.69 with prob < 0.17
## The total n.obs was 300 with Likelihood Chi Square = 179.43 with prob < 7.1e-25
##
## Tucker Lewis Index of factoring reliability = 0.887
## RMSEA index = 0.14 and the 90 % confidence intervals are 0.121 0.16
## BIC = 31.13
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## ML1 ML2
## Correlation of (regression) scores with factors 0.98 0.88
## Multiple R square of scores with factors 0.96 0.77
## Minimum correlation of possible factor scores 0.92 0.54
# rotation varimax
fa(hw4, nfactors=2, rotate="varimax", fm="ml", cor = 'poly')
## Factor Analysis using method = ml
## Call: fa(r = hw4, nfactors = 2, rotate = "varimax", fm = "ml", cor = "poly")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML1 ML2 h2 u2 com
## satisfied -0.26 0.84 0.76 0.24 1.2
## nogood -0.34 0.77 0.71 0.29 1.4
## goodqualities 0.72 -0.46 0.72 0.28 1.7
## dothingsasmostppl -0.27 0.66 0.52 0.48 1.3
## notmuchproud 0.61 -0.37 0.51 0.49 1.6
## useless -0.53 0.68 0.74 0.26 1.9
## equalasothers -0.52 0.66 0.70 0.30 1.9
## moreselfrespect 0.65 -0.34 0.54 0.46 1.5
## failure 0.83 -0.33 0.79 0.21 1.3
## positiveatt 0.89 -0.23 0.84 0.16 1.1
##
## ML1 ML2
## SS loadings 3.60 3.25
## Proportion Var 0.36 0.32
## Cumulative Var 0.36 0.68
## Proportion Explained 0.53 0.47
## Cumulative Proportion 0.53 1.00
##
## Mean item complexity = 1.5
## Test of the hypothesis that 2 factors are sufficient.
##
## df null model = 45 with the objective function = 8.15 with Chi Square = 2402.81
## df of the model are 26 and the objective function was 0.61
##
## The root mean square of the residuals (RMSR) is 0.03
## The df corrected root mean square of the residuals is 0.05
##
## The harmonic n.obs is 300 with the empirical chi square 32.69 with prob < 0.17
## The total n.obs was 300 with Likelihood Chi Square = 179.43 with prob < 7.1e-25
##
## Tucker Lewis Index of factoring reliability = 0.887
## RMSEA index = 0.14 and the 90 % confidence intervals are 0.121 0.16
## BIC = 31.13
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## ML1 ML2
## Correlation of (regression) scores with factors 0.94 0.92
## Multiple R square of scores with factors 0.88 0.85
## Minimum correlation of possible factor scores 0.76 0.70
factor.plot(fa(hw4, nfactors=2, rotate="varimax", fm="ml", cor = 'poly'))
fa.diagram(fa(hw4, nfactors=2, rotate="varimax", fm="ml", cor = 'poly'))
# rotation oblimin
fa(hw4, nfactors=2, rotate="oblimin", fm="ml")
## Factor Analysis using method = ml
## Call: fa(r = hw4, nfactors = 2, rotate = "oblimin", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML1 ML2 h2 u2 com
## satisfied 0.06 0.85 0.65 0.35 1.0
## nogood -0.02 0.76 0.59 0.41 1.0
## goodqualities 0.59 -0.29 0.65 0.35 1.4
## dothingsasmostppl -0.03 0.64 0.43 0.57 1.0
## notmuchproud 0.53 -0.21 0.47 0.53 1.3
## useless -0.42 0.50 0.69 0.31 1.9
## equalasothers -0.41 0.48 0.66 0.34 2.0
## moreselfrespect 0.63 -0.09 0.48 0.52 1.0
## failure 0.85 0.02 0.70 0.30 1.0
## positiveatt 0.92 0.10 0.74 0.26 1.0
##
## ML1 ML2
## SS loadings 3.36 2.72
## Proportion Var 0.34 0.27
## Cumulative Var 0.34 0.61
## Proportion Explained 0.55 0.45
## Cumulative Proportion 0.55 1.00
##
## With factor correlations of
## ML1 ML2
## ML1 1.00 -0.65
## ML2 -0.65 1.00
##
## Mean item complexity = 1.3
## Test of the hypothesis that 2 factors are sufficient.
##
## df null model = 45 with the objective function = 6.14 with Chi Square = 1809.44
## df of the model are 26 and the objective function was 0.33
##
## The root mean square of the residuals (RMSR) is 0.03
## The df corrected root mean square of the residuals is 0.04
##
## The harmonic n.obs is 300 with the empirical chi square 29.01 with prob < 0.31
## The total n.obs was 300 with Likelihood Chi Square = 95.66 with prob < 6.7e-10
##
## Tucker Lewis Index of factoring reliability = 0.931
## RMSEA index = 0.094 and the 90 % confidence intervals are 0.075 0.115
## BIC = -52.64
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## ML1 ML2
## Correlation of (regression) scores with factors 0.95 0.93
## Multiple R square of scores with factors 0.91 0.87
## Minimum correlation of possible factor scores 0.82 0.74
Interpretation: Without rotation: TLI 0.087 (not good, should be more than 0.9), RMSEA 0.14 (not good, should be smaller than 0.08), RMSR is 0.03 (quite close to 0), proportion var is not satisfying 60% for 1 and 8% for 2 factor, as for proportion explained 1st factor accounts for 88% of total variance and thus is a dominant factor Varimax: TLI 0.93 (better than no rotation, and quite good on itself), RMSEA 0.094 (not good, but better than with no rotation), RMSR is 0.03 (the same as in no rotation), proportion var is 33% for 1 factor and 28%, which is better distributed between factors than in no rotation, but in sum is lower, as for proportion explained 1st factor accounts for 54% of total variance and 2nd - 46%, which is quite good distribution Oblimin is almost identical to varimax. So, I chose the model with varimax rotation, as it is better than model without rotation.
On the diagram with the varimax rotation we see that the variables are equally distributed between 2 factors. For the first factor there are “positiveatt”, “failure”, “goodqualities”, “moreselfrespect”, “notmuchproud”, while for the second one: “satisfied”,“nogood”,“dothingsasmostppl”, “useless”,“equalasothers”.
ML1 <- as.data.frame(hw4[c("positiveatt", "failure", "goodqualities",
"moreselfrespect", "notmuchproud")])
alpha(ML1)
##
## Reliability analysis
## Call: alpha(x = ML1)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.88 0.88 0.86 0.59 7.2 0.011 2.4 0.84 0.58
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.86 0.88 0.9
## Duhachek 0.86 0.88 0.9
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## positiveatt 0.84 0.84 0.80 0.56 5.1 0.016 0.0034 0.57
## failure 0.84 0.84 0.80 0.57 5.2 0.015 0.0042 0.58
## goodqualities 0.85 0.85 0.81 0.58 5.5 0.014 0.0075 0.57
## moreselfrespect 0.87 0.87 0.84 0.62 6.6 0.012 0.0043 0.61
## notmuchproud 0.87 0.87 0.84 0.62 6.6 0.012 0.0045 0.61
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## positiveatt 300 0.87 0.86 0.83 0.78 2.4 1.09
## failure 300 0.86 0.86 0.82 0.76 2.7 1.03
## goodqualities 300 0.83 0.84 0.78 0.73 2.0 0.95
## moreselfrespect 300 0.77 0.77 0.68 0.64 2.6 1.01
## notmuchproud 300 0.77 0.77 0.68 0.64 2.2 1.01
##
## Non missing response frequency for each item
## 0 1 2 3 4 miss
## positiveatt 0.00 0.26 0.26 0.28 0.20 0
## failure 0.01 0.15 0.25 0.36 0.23 0
## goodqualities 0.00 0.33 0.37 0.21 0.09 0
## moreselfrespect 0.00 0.17 0.29 0.33 0.21 0
## notmuchproud 0.01 0.29 0.36 0.21 0.13 0
ML2 <- as.data.frame(hw4[c("satisfied","nogood","dothingsasmostppl", "useless","equalasothers")])
alpha(ML2)
##
## Reliability analysis
## Call: alpha(x = ML2)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.87 0.87 0.86 0.57 6.7 0.012 3 0.7 0.55
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.84 0.87 0.89
## Duhachek 0.84 0.87 0.89
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r
## satisfied 0.84 0.84 0.82 0.57 5.3 0.015 0.0101
## nogood 0.84 0.84 0.82 0.57 5.3 0.015 0.0106
## dothingsasmostppl 0.86 0.86 0.84 0.61 6.3 0.013 0.0067
## useless 0.82 0.83 0.79 0.55 4.9 0.017 0.0032
## equalasothers 0.83 0.84 0.80 0.56 5.1 0.016 0.0029
## med.r
## satisfied 0.55
## nogood 0.54
## dothingsasmostppl 0.58
## useless 0.55
## equalasothers 0.56
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## satisfied 300 0.81 0.82 0.75 0.70 3.2 0.80
## nogood 300 0.80 0.82 0.75 0.70 3.3 0.73
## dothingsasmostppl 300 0.74 0.75 0.65 0.60 3.0 0.81
## useless 300 0.86 0.84 0.81 0.76 2.7 0.95
## equalasothers 300 0.85 0.83 0.79 0.73 2.6 0.99
##
## Non missing response frequency for each item
## 0 1 2 3 4 miss
## satisfied 0.00 0.03 0.12 0.45 0.39 0
## nogood 0.00 0.02 0.06 0.47 0.44 0
## dothingsasmostppl 0.01 0.03 0.17 0.51 0.28 0
## useless 0.01 0.10 0.28 0.38 0.23 0
## equalasothers 0.01 0.15 0.27 0.38 0.20 0
Interpretation: 1st factor’s alpha is equal to 0.86, which is a quite good reliability. While 2nd factor’s alpha equals to 0.84, also quite good estimate.
fa_hw4 <- fa(hw4, nfactors=2, rotate="varimax", fm="ml", cor = 'poly')
fsl_hw4 <- factor.scores(hw4, fa_hw4)
fs_hw4 <- data.frame(fsl_hw4$scores)
names(fs_hw4) = c('factor 1', 'factor 2')
hw4[,1:2] = fs_hw4
hw4_f = merge(hw4, fs_hw4, by = 'row.names')
hist(hw4_f$`factor 1`)
hist(hw4_f$`factor 2`)
table1(~ hw4_f$`factor 1` + hw4_f$`factor 2`)
Overall (N=300) |
|
---|---|
hw4_f$`factor 1` | |
Mean (SD) | -0.0000000000000000487 (1.00) |
Median [Min, Max] | 0.0245 [-5.28, 2.18] |
hw4_f$`factor 2` | |
Mean (SD) | -0.000000000000000144 (1.00) |
Median [Min, Max] | 0.0718 [-7.07, 1.97] |
Interpretation: The historgram of the 1st factor shows us that the
data is not distributed normally and has some outliers on the left. As
for the 2nd factor we can see that data is visibly left skewed, and,
thus, not normally distributed.
For the 1st factor the distribution of values is between -5.28 and 2.18,
while the mean is around 0 (-0.0000000000000000487), and median being
0.0245. For the 2nd factor the distribution is between -7.07 and 1.97,
while mean is also close to 0 (-0.000000000000000144), and median
0.0718.