library(polycor)
library(psych)
## 
## Attaching package: 'psych'
## The following object is masked from 'package:polycor':
## 
##     polyserial
library(table1)
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
library(EFA.dimensions)
## **************************************************************************************************
## EFA.dimensions 0.1.8.1
## 
## Please contact Brian O'Connor at brian.oconnor@ubc.ca if you have questions or suggestions.
## **************************************************************************************************
data(data_RSE)
hw4 <- na.omit(data_RSE)

At first, I propose to check data for adequacy.

KMO(hw4)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = hw4)
## Overall MSA =  0.91
## MSA for each item = 
##   Q1   Q2   Q3   Q4   Q5   Q6   Q7   Q8   Q9  Q10 
## 0.87 0.88 0.94 0.94 0.96 0.92 0.91 0.90 0.91 0.90
cortest.bartlett(hw4)
## R was not square, finding R from data
## $chisq
## [1] 1809.435
## 
## $p.value
## [1] 0
## 
## $df
## [1] 45

Interpretation: Kaiser-Meyer-Olkin factor adequacy shows overall value 0.91, while each variable have at least 0.87, which is quite good as it is higher than 0.8. And cortest.bartlett test shows that variables are correlated, p-value is 0. The data is good for factor analysis.

1 Conduct EFA: present your correlation matrix, choose the number of factors.

names(hw4) <- c("satisfied","nogood","goodqualities","dothingsasmostppl","notmuchproud","useless","equalasothers", "moreselfrespect","failure","positiveatt")

hw4.cor <- hetcor(hw4)
hw4.cor <- hw4.cor$correlations
hw4.cor
##                    satisfied     nogood goodqualities dothingsasmostppl
## satisfied          1.0000000  0.6501116    -0.5255283         0.5162663
## nogood             0.6501116  1.0000000    -0.5282311         0.5459068
## goodqualities     -0.5255283 -0.5282311     1.0000000        -0.4564678
## dothingsasmostppl  0.5162663  0.5459068    -0.4564678         1.0000000
## notmuchproud      -0.4502033 -0.4368299     0.5838548        -0.3696428
## useless            0.5872099  0.5695468    -0.6145452         0.5030354
## equalasothers      0.5606642  0.5486654    -0.6666836         0.4810535
## moreselfrespect   -0.4616223 -0.2883896     0.5476708        -0.2776520
## failure           -0.3885451 -0.4408003     0.6379461        -0.4131382
## positiveatt       -0.3651180 -0.4376588     0.6594017        -0.3377111
##                   notmuchproud    useless equalasothers moreselfrespect
## satisfied           -0.4502033  0.5872099     0.5606642      -0.4616223
## nogood              -0.4368299  0.5695468     0.5486654      -0.2883896
## goodqualities        0.5838548 -0.6145452    -0.6666836       0.5476708
## dothingsasmostppl   -0.3696428  0.5030354     0.4810535      -0.2776520
## notmuchproud         1.0000000 -0.5432471    -0.5116933       0.4605953
## useless             -0.5432471  1.0000000     0.7628509      -0.5567646
## equalasothers       -0.5116933  0.7628509     1.0000000      -0.5594032
## moreselfrespect      0.4605953 -0.5567646    -0.5594032       1.0000000
## failure              0.5622629 -0.6242323    -0.5732747       0.5770295
## positiveatt          0.5682384 -0.5839417    -0.5762772       0.5823472
##                      failure positiveatt
## satisfied         -0.3885451  -0.3651180
## nogood            -0.4408003  -0.4376588
## goodqualities      0.6379461   0.6594017
## dothingsasmostppl -0.4131382  -0.3377111
## notmuchproud       0.5622629   0.5682384
## useless           -0.6242323  -0.5839417
## equalasothers     -0.5732747  -0.5762772
## moreselfrespect    0.5770295   0.5823472
## failure            1.0000000   0.7312997
## positiveatt        0.7312997   1.0000000

Interpretation: here we see that variables are correlated as was already established previously, and we also see the coefficients. For instance, there is a moderate negative correlation (-0.38) between being satisfied with yourself and feeling as a failure, suggesting that the more person is satisfied with themselves, the less they feel as a failure and vice versa. Also there is a moderate positive correlation (0.56) between feeling satisfied with yourself and feeling as a person of worth (equalasothers).

fa.parallel(hw4, fa="both", n.iter=100) 

## Parallel analysis suggests that the number of factors =  2  and the number of components =  1

Interpretation: the plot suggests that the optimal number of factors is 1 or 2, but as we cannot have 1 factor, I propose to stop at 2 of them.

2 Interpret the resulting factor structure. What type of rotation did you choose and why? Show the diagram.

# no rotation
fa(hw4, nfactors=2, rotate="none", fm="ml",  cor = 'poly') 
## Factor Analysis using method =  ml
## Call: fa(r = hw4, nfactors = 2, rotate = "none", fm = "ml", cor = "poly")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                     ML1  ML2   h2   u2 com
## satisfied         -0.73 0.48 0.76 0.24 1.7
## nogood            -0.76 0.37 0.71 0.29 1.5
## goodqualities      0.84 0.11 0.72 0.28 1.0
## dothingsasmostppl -0.64 0.33 0.52 0.48 1.5
## notmuchproud       0.71 0.11 0.51 0.49 1.0
## useless           -0.84 0.18 0.74 0.26 1.1
## equalasothers     -0.82 0.17 0.70 0.30 1.1
## moreselfrespect    0.72 0.16 0.54 0.46 1.1
## failure            0.84 0.28 0.79 0.21 1.2
## positiveatt        0.83 0.39 0.84 0.16 1.4
## 
##                        ML1  ML2
## SS loadings           6.03 0.82
## Proportion Var        0.60 0.08
## Cumulative Var        0.60 0.68
## Proportion Explained  0.88 0.12
## Cumulative Proportion 0.88 1.00
## 
## Mean item complexity =  1.3
## Test of the hypothesis that 2 factors are sufficient.
## 
## df null model =  45  with the objective function =  8.15 with Chi Square =  2402.81
## df of  the model are 26  and the objective function was  0.61 
## 
## The root mean square of the residuals (RMSR) is  0.03 
## The df corrected root mean square of the residuals is  0.05 
## 
## The harmonic n.obs is  300 with the empirical chi square  32.69  with prob <  0.17 
## The total n.obs was  300  with Likelihood Chi Square =  179.43  with prob <  7.1e-25 
## 
## Tucker Lewis Index of factoring reliability =  0.887
## RMSEA index =  0.14  and the 90 % confidence intervals are  0.121 0.16
## BIC =  31.13
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                    ML1  ML2
## Correlation of (regression) scores with factors   0.98 0.88
## Multiple R square of scores with factors          0.96 0.77
## Minimum correlation of possible factor scores     0.92 0.54
# rotation varimax
fa(hw4, nfactors=2, rotate="varimax", fm="ml",  cor = 'poly') 
## Factor Analysis using method =  ml
## Call: fa(r = hw4, nfactors = 2, rotate = "varimax", fm = "ml", cor = "poly")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                     ML1   ML2   h2   u2 com
## satisfied         -0.26  0.84 0.76 0.24 1.2
## nogood            -0.34  0.77 0.71 0.29 1.4
## goodqualities      0.72 -0.46 0.72 0.28 1.7
## dothingsasmostppl -0.27  0.66 0.52 0.48 1.3
## notmuchproud       0.61 -0.37 0.51 0.49 1.6
## useless           -0.53  0.68 0.74 0.26 1.9
## equalasothers     -0.52  0.66 0.70 0.30 1.9
## moreselfrespect    0.65 -0.34 0.54 0.46 1.5
## failure            0.83 -0.33 0.79 0.21 1.3
## positiveatt        0.89 -0.23 0.84 0.16 1.1
## 
##                        ML1  ML2
## SS loadings           3.60 3.25
## Proportion Var        0.36 0.32
## Cumulative Var        0.36 0.68
## Proportion Explained  0.53 0.47
## Cumulative Proportion 0.53 1.00
## 
## Mean item complexity =  1.5
## Test of the hypothesis that 2 factors are sufficient.
## 
## df null model =  45  with the objective function =  8.15 with Chi Square =  2402.81
## df of  the model are 26  and the objective function was  0.61 
## 
## The root mean square of the residuals (RMSR) is  0.03 
## The df corrected root mean square of the residuals is  0.05 
## 
## The harmonic n.obs is  300 with the empirical chi square  32.69  with prob <  0.17 
## The total n.obs was  300  with Likelihood Chi Square =  179.43  with prob <  7.1e-25 
## 
## Tucker Lewis Index of factoring reliability =  0.887
## RMSEA index =  0.14  and the 90 % confidence intervals are  0.121 0.16
## BIC =  31.13
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                    ML1  ML2
## Correlation of (regression) scores with factors   0.94 0.92
## Multiple R square of scores with factors          0.88 0.85
## Minimum correlation of possible factor scores     0.76 0.70
factor.plot(fa(hw4, nfactors=2, rotate="varimax", fm="ml",  cor = 'poly'))

fa.diagram(fa(hw4, nfactors=2, rotate="varimax", fm="ml",  cor = 'poly'))

# rotation oblimin
fa(hw4, nfactors=2, rotate="oblimin", fm="ml") 
## Factor Analysis using method =  ml
## Call: fa(r = hw4, nfactors = 2, rotate = "oblimin", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                     ML1   ML2   h2   u2 com
## satisfied          0.06  0.85 0.65 0.35 1.0
## nogood            -0.02  0.76 0.59 0.41 1.0
## goodqualities      0.59 -0.29 0.65 0.35 1.4
## dothingsasmostppl -0.03  0.64 0.43 0.57 1.0
## notmuchproud       0.53 -0.21 0.47 0.53 1.3
## useless           -0.42  0.50 0.69 0.31 1.9
## equalasothers     -0.41  0.48 0.66 0.34 2.0
## moreselfrespect    0.63 -0.09 0.48 0.52 1.0
## failure            0.85  0.02 0.70 0.30 1.0
## positiveatt        0.92  0.10 0.74 0.26 1.0
## 
##                        ML1  ML2
## SS loadings           3.36 2.72
## Proportion Var        0.34 0.27
## Cumulative Var        0.34 0.61
## Proportion Explained  0.55 0.45
## Cumulative Proportion 0.55 1.00
## 
##  With factor correlations of 
##       ML1   ML2
## ML1  1.00 -0.65
## ML2 -0.65  1.00
## 
## Mean item complexity =  1.3
## Test of the hypothesis that 2 factors are sufficient.
## 
## df null model =  45  with the objective function =  6.14 with Chi Square =  1809.44
## df of  the model are 26  and the objective function was  0.33 
## 
## The root mean square of the residuals (RMSR) is  0.03 
## The df corrected root mean square of the residuals is  0.04 
## 
## The harmonic n.obs is  300 with the empirical chi square  29.01  with prob <  0.31 
## The total n.obs was  300  with Likelihood Chi Square =  95.66  with prob <  6.7e-10 
## 
## Tucker Lewis Index of factoring reliability =  0.931
## RMSEA index =  0.094  and the 90 % confidence intervals are  0.075 0.115
## BIC =  -52.64
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                    ML1  ML2
## Correlation of (regression) scores with factors   0.95 0.93
## Multiple R square of scores with factors          0.91 0.87
## Minimum correlation of possible factor scores     0.82 0.74

Interpretation: Without rotation: TLI 0.087 (not good, should be more than 0.9), RMSEA 0.14 (not good, should be smaller than 0.08), RMSR is 0.03 (quite close to 0), proportion var is not satisfying 60% for 1 and 8% for 2 factor, as for proportion explained 1st factor accounts for 88% of total variance and thus is a dominant factor Varimax: TLI 0.93 (better than no rotation, and quite good on itself), RMSEA 0.094 (not good, but better than with no rotation), RMSR is 0.03 (the same as in no rotation), proportion var is 33% for 1 factor and 28%, which is better distributed between factors than in no rotation, but in sum is lower, as for proportion explained 1st factor accounts for 54% of total variance and 2nd - 46%, which is quite good distribution Oblimin is almost identical to varimax. So, I chose the model with varimax rotation, as it is better than model without rotation.

On the diagram with the varimax rotation we see that the variables are equally distributed between 2 factors. For the first factor there are “positiveatt”, “failure”, “goodqualities”, “moreselfrespect”, “notmuchproud”, while for the second one: “satisfied”,“nogood”,“dothingsasmostppl”, “useless”,“equalasothers”.

3 Analyze the model fit of EFA; Chronbach’s alpha.

ML1 <- as.data.frame(hw4[c("positiveatt", "failure",  "goodqualities",  
                   "moreselfrespect",  "notmuchproud")])
alpha(ML1)
## 
## Reliability analysis   
## Call: alpha(x = ML1)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
##       0.88      0.88    0.86      0.59 7.2 0.011  2.4 0.84     0.58
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.86  0.88   0.9
## Duhachek  0.86  0.88   0.9
## 
##  Reliability if an item is dropped:
##                 raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## positiveatt          0.84      0.84    0.80      0.56 5.1    0.016 0.0034  0.57
## failure              0.84      0.84    0.80      0.57 5.2    0.015 0.0042  0.58
## goodqualities        0.85      0.85    0.81      0.58 5.5    0.014 0.0075  0.57
## moreselfrespect      0.87      0.87    0.84      0.62 6.6    0.012 0.0043  0.61
## notmuchproud         0.87      0.87    0.84      0.62 6.6    0.012 0.0045  0.61
## 
##  Item statistics 
##                   n raw.r std.r r.cor r.drop mean   sd
## positiveatt     300  0.87  0.86  0.83   0.78  2.4 1.09
## failure         300  0.86  0.86  0.82   0.76  2.7 1.03
## goodqualities   300  0.83  0.84  0.78   0.73  2.0 0.95
## moreselfrespect 300  0.77  0.77  0.68   0.64  2.6 1.01
## notmuchproud    300  0.77  0.77  0.68   0.64  2.2 1.01
## 
## Non missing response frequency for each item
##                    0    1    2    3    4 miss
## positiveatt     0.00 0.26 0.26 0.28 0.20    0
## failure         0.01 0.15 0.25 0.36 0.23    0
## goodqualities   0.00 0.33 0.37 0.21 0.09    0
## moreselfrespect 0.00 0.17 0.29 0.33 0.21    0
## notmuchproud    0.01 0.29 0.36 0.21 0.13    0
ML2 <- as.data.frame(hw4[c("satisfied","nogood","dothingsasmostppl", "useless","equalasothers")])
alpha(ML2)
## 
## Reliability analysis   
## Call: alpha(x = ML2)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
##       0.87      0.87    0.86      0.57 6.7 0.012    3 0.7     0.55
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.84  0.87  0.89
## Duhachek  0.84  0.87  0.89
## 
##  Reliability if an item is dropped:
##                   raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r
## satisfied              0.84      0.84    0.82      0.57 5.3    0.015 0.0101
## nogood                 0.84      0.84    0.82      0.57 5.3    0.015 0.0106
## dothingsasmostppl      0.86      0.86    0.84      0.61 6.3    0.013 0.0067
## useless                0.82      0.83    0.79      0.55 4.9    0.017 0.0032
## equalasothers          0.83      0.84    0.80      0.56 5.1    0.016 0.0029
##                   med.r
## satisfied          0.55
## nogood             0.54
## dothingsasmostppl  0.58
## useless            0.55
## equalasothers      0.56
## 
##  Item statistics 
##                     n raw.r std.r r.cor r.drop mean   sd
## satisfied         300  0.81  0.82  0.75   0.70  3.2 0.80
## nogood            300  0.80  0.82  0.75   0.70  3.3 0.73
## dothingsasmostppl 300  0.74  0.75  0.65   0.60  3.0 0.81
## useless           300  0.86  0.84  0.81   0.76  2.7 0.95
## equalasothers     300  0.85  0.83  0.79   0.73  2.6 0.99
## 
## Non missing response frequency for each item
##                      0    1    2    3    4 miss
## satisfied         0.00 0.03 0.12 0.45 0.39    0
## nogood            0.00 0.02 0.06 0.47 0.44    0
## dothingsasmostppl 0.01 0.03 0.17 0.51 0.28    0
## useless           0.01 0.10 0.28 0.38 0.23    0
## equalasothers     0.01 0.15 0.27 0.38 0.20    0

Interpretation: 1st factor’s alpha is equal to 0.86, which is a quite good reliability. While 2nd factor’s alpha equals to 0.84, also quite good estimate.

4 Save the factors scores and do descriptive statistics for each factors (mean, min, max, and histograms).

fa_hw4 <- fa(hw4, nfactors=2, rotate="varimax", fm="ml",  cor = 'poly') 
fsl_hw4 <- factor.scores(hw4, fa_hw4)
fs_hw4 <- data.frame(fsl_hw4$scores)
names(fs_hw4) = c('factor 1', 'factor 2')

hw4[,1:2] = fs_hw4
hw4_f = merge(hw4, fs_hw4, by = 'row.names') 

hist(hw4_f$`factor 1`)

hist(hw4_f$`factor 2`)

table1(~ hw4_f$`factor 1` + hw4_f$`factor 2`)
Overall
(N=300)
hw4_f$`factor 1`
Mean (SD) -0.0000000000000000487 (1.00)
Median [Min, Max] 0.0245 [-5.28, 2.18]
hw4_f$`factor 2`
Mean (SD) -0.000000000000000144 (1.00)
Median [Min, Max] 0.0718 [-7.07, 1.97]

Interpretation: The historgram of the 1st factor shows us that the data is not distributed normally and has some outliers on the left. As for the 2nd factor we can see that data is visibly left skewed, and, thus, not normally distributed.
For the 1st factor the distribution of values is between -5.28 and 2.18, while the mean is around 0 (-0.0000000000000000487), and median being 0.0245. For the 2nd factor the distribution is between -7.07 and 1.97, while mean is also close to 0 (-0.000000000000000144), and median 0.0718.