Speed Dating dataset (Kaggle) “What influences love at first sight?” About the experiment. https://www.kaggle.com/annavictoria/speed-dating-experiment
dating <-read.csv("03 Speed Dating Data.csv")
names(dating)
## [1] "iid" "id" "gender" "idg" "condtn" "wave"
## [7] "round" "position" "positin1" "order" "partner" "pid"
## [13] "match" "int_corr" "samerace" "age_o" "race_o" "pf_o_att"
## [19] "pf_o_sin" "pf_o_int" "pf_o_fun" "pf_o_amb" "pf_o_sha" "dec_o"
## [25] "attr_o" "sinc_o" "intel_o" "fun_o" "amb_o" "shar_o"
## [31] "like_o" "prob_o" "met_o" "age" "field" "field_cd"
## [37] "undergra" "mn_sat" "tuition" "race" "imprace" "imprelig"
## [43] "from" "zipcode" "income" "goal" "date" "go_out"
## [49] "career" "career_c" "sports" "tvsports" "exercise" "dining"
## [55] "museums" "art" "hiking" "gaming" "clubbing" "reading"
## [61] "tv" "theater" "movies" "concerts" "music" "shopping"
## [67] "yoga" "exphappy" "expnum" "attr1_1" "sinc1_1" "intel1_1"
## [73] "fun1_1" "amb1_1" "shar1_1" "attr4_1" "sinc4_1" "intel4_1"
## [79] "fun4_1" "amb4_1" "shar4_1" "attr2_1" "sinc2_1" "intel2_1"
## [85] "fun2_1" "amb2_1" "shar2_1" "attr3_1" "sinc3_1" "fun3_1"
## [91] "intel3_1" "amb3_1" "attr5_1" "sinc5_1" "intel5_1" "fun5_1"
## [97] "amb5_1" "dec" "attr" "sinc" "intel" "fun"
## [103] "amb" "shar" "like" "prob" "met" "match_es"
## [109] "attr1_s" "sinc1_s" "intel1_s" "fun1_s" "amb1_s" "shar1_s"
## [115] "attr3_s" "sinc3_s" "intel3_s" "fun3_s" "amb3_s" "satis_2"
## [121] "length" "numdat_2" "attr7_2" "sinc7_2" "intel7_2" "fun7_2"
## [127] "amb7_2" "shar7_2" "attr1_2" "sinc1_2" "intel1_2" "fun1_2"
## [133] "amb1_2" "shar1_2" "attr4_2" "sinc4_2" "intel4_2" "fun4_2"
## [139] "amb4_2" "shar4_2" "attr2_2" "sinc2_2" "intel2_2" "fun2_2"
## [145] "amb2_2" "shar2_2" "attr3_2" "sinc3_2" "intel3_2" "fun3_2"
## [151] "amb3_2" "attr5_2" "sinc5_2" "intel5_2" "fun5_2" "amb5_2"
## [157] "you_call" "them_cal" "date_3" "numdat_3" "num_in_3" "attr1_3"
## [163] "sinc1_3" "intel1_3" "fun1_3" "amb1_3" "shar1_3" "attr7_3"
## [169] "sinc7_3" "intel7_3" "fun7_3" "amb7_3" "shar7_3" "attr4_3"
## [175] "sinc4_3" "intel4_3" "fun4_3" "amb4_3" "shar4_3" "attr2_3"
## [181] "sinc2_3" "intel2_3" "fun2_3" "amb2_3" "shar2_3" "attr3_3"
## [187] "sinc3_3" "intel3_3" "fun3_3" "amb3_3" "attr5_3" "sinc5_3"
## [193] "intel5_3" "fun5_3" "amb5_3"
dating1<- dating[c("imprace","imprelig", "date", "go_out", "sports",
"tvsports", "exercise", "dining" , "museums", "art",
"hiking", "gaming", "clubbing",
"reading", "tv", "theater", "movies", "concerts",
"music", "shopping", "yoga", "exphappy" , "attr1_1",
"sinc1_1", "intel1_1", "fun1_1", "amb1_1",
"shar1_1", "attr2_1", "sinc2_1", "intel2_1",
"fun2_1", "amb2_1", "shar2_1", "attr3_1", "sinc3_1",
"intel3_1", "fun3_1", "amb3_1")]
dating1 <- as.data.frame(dating1)
dim(dating1)
## [1] 8378 39
summary(dating1)
## imprace imprelig date go_out
## Min. : 0.000 Min. : 1.000 Min. :1.000 Min. :1.000
## 1st Qu.: 1.000 1st Qu.: 1.000 1st Qu.:4.000 1st Qu.:1.000
## Median : 3.000 Median : 3.000 Median :5.000 Median :2.000
## Mean : 3.785 Mean : 3.652 Mean :5.007 Mean :2.158
## 3rd Qu.: 6.000 3rd Qu.: 6.000 3rd Qu.:6.000 3rd Qu.:3.000
## Max. :10.000 Max. :10.000 Max. :7.000 Max. :7.000
## NA's :79 NA's :79 NA's :97 NA's :79
## sports tvsports exercise dining
## Min. : 1.000 Min. : 1.000 Min. : 1.000 Min. : 1.000
## 1st Qu.: 4.000 1st Qu.: 2.000 1st Qu.: 5.000 1st Qu.: 7.000
## Median : 7.000 Median : 4.000 Median : 6.000 Median : 8.000
## Mean : 6.425 Mean : 4.575 Mean : 6.246 Mean : 7.784
## 3rd Qu.: 9.000 3rd Qu.: 7.000 3rd Qu.: 8.000 3rd Qu.: 9.000
## Max. :10.000 Max. :10.000 Max. :10.000 Max. :10.000
## NA's :79 NA's :79 NA's :79 NA's :79
## museums art hiking gaming
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 6.000 1st Qu.: 5.000 1st Qu.: 4.000 1st Qu.: 2.000
## Median : 7.000 Median : 7.000 Median : 6.000 Median : 3.000
## Mean : 6.986 Mean : 6.715 Mean : 5.737 Mean : 3.881
## 3rd Qu.: 9.000 3rd Qu.: 8.000 3rd Qu.: 8.000 3rd Qu.: 6.000
## Max. :10.000 Max. :10.000 Max. :10.000 Max. :14.000
## NA's :79 NA's :79 NA's :79 NA's :79
## clubbing reading tv theater
## Min. : 0.000 Min. : 1.000 Min. : 1.000 Min. : 0.000
## 1st Qu.: 4.000 1st Qu.: 7.000 1st Qu.: 3.000 1st Qu.: 5.000
## Median : 6.000 Median : 8.000 Median : 6.000 Median : 7.000
## Mean : 5.746 Mean : 7.679 Mean : 5.304 Mean : 6.776
## 3rd Qu.: 8.000 3rd Qu.: 9.000 3rd Qu.: 7.000 3rd Qu.: 9.000
## Max. :10.000 Max. :13.000 Max. :10.000 Max. :10.000
## NA's :79 NA's :79 NA's :79 NA's :79
## movies concerts music shopping
## Min. : 0.00 Min. : 0.000 Min. : 1.000 Min. : 1.000
## 1st Qu.: 7.00 1st Qu.: 5.000 1st Qu.: 7.000 1st Qu.: 4.000
## Median : 8.00 Median : 7.000 Median : 8.000 Median : 6.000
## Mean : 7.92 Mean : 6.825 Mean : 7.851 Mean : 5.631
## 3rd Qu.: 9.00 3rd Qu.: 8.000 3rd Qu.: 9.000 3rd Qu.: 8.000
## Max. :10.00 Max. :10.000 Max. :10.000 Max. :10.000
## NA's :79 NA's :79 NA's :79 NA's :79
## yoga exphappy attr1_1 sinc1_1
## Min. : 0.000 Min. : 1.000 Min. : 0.00 Min. : 0.00
## 1st Qu.: 2.000 1st Qu.: 5.000 1st Qu.: 15.00 1st Qu.:15.00
## Median : 4.000 Median : 6.000 Median : 20.00 Median :18.18
## Mean : 4.339 Mean : 5.534 Mean : 22.51 Mean :17.40
## 3rd Qu.: 7.000 3rd Qu.: 7.000 3rd Qu.: 25.00 3rd Qu.:20.00
## Max. :10.000 Max. :10.000 Max. :100.00 Max. :60.00
## NA's :79 NA's :101 NA's :79 NA's :79
## intel1_1 fun1_1 amb1_1 shar1_1
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.:17.39 1st Qu.:15.00 1st Qu.: 5.00 1st Qu.: 9.52
## Median :20.00 Median :18.00 Median :10.00 Median :10.64
## Mean :20.27 Mean :17.46 Mean :10.68 Mean :11.85
## 3rd Qu.:23.81 3rd Qu.:20.00 3rd Qu.:15.00 3rd Qu.:16.00
## Max. :50.00 Max. :50.00 Max. :53.00 Max. :30.00
## NA's :79 NA's :89 NA's :99 NA's :121
## attr2_1 sinc2_1 intel2_1 fun2_1
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 20.00 1st Qu.:10.00 1st Qu.:10.00 1st Qu.:15.00
## Median : 25.00 Median :15.00 Median :15.00 Median :20.00
## Mean : 30.36 Mean :13.27 Mean :14.42 Mean :18.42
## 3rd Qu.: 40.00 3rd Qu.:18.75 3rd Qu.:20.00 3rd Qu.:20.00
## Max. :100.00 Max. :50.00 Max. :40.00 Max. :50.00
## NA's :79 NA's :79 NA's :79 NA's :79
## amb2_1 shar2_1 attr3_1 sinc3_1
## Min. : 0.00 Min. : 0.00 Min. : 2.000 Min. : 2.000
## 1st Qu.: 6.00 1st Qu.:10.00 1st Qu.: 6.000 1st Qu.: 8.000
## Median :10.00 Median :10.00 Median : 7.000 Median : 8.000
## Mean :11.74 Mean :11.85 Mean : 7.085 Mean : 8.295
## 3rd Qu.:15.00 3rd Qu.:15.63 3rd Qu.: 8.000 3rd Qu.: 9.000
## Max. :50.00 Max. :30.00 Max. :10.000 Max. :10.000
## NA's :89 NA's :89 NA's :105 NA's :105
## intel3_1 fun3_1 amb3_1
## Min. : 3.000 Min. : 2.000 Min. : 2.000
## 1st Qu.: 8.000 1st Qu.: 7.000 1st Qu.: 7.000
## Median : 8.000 Median : 8.000 Median : 8.000
## Mean : 8.404 Mean : 7.704 Mean : 7.578
## 3rd Qu.: 9.000 3rd Qu.: 9.000 3rd Qu.: 9.000
## Max. :10.000 Max. :10.000 Max. :10.000
## NA's :105 NA's :105 NA's :105
Are there latent factors which explain correlations of the observed variables?
# fa does not work with NA
dating12 <- na.omit(dating1)
library(psych)
# How many factors should be extracted?
fa.parallel(dating12, fa="both", n.iter=100)
## Parallel analysis suggests that the number of factors = 15 and the number of components = 13
ev <- eigen(cor(dating12)) # get eigenvalues
ev$values # should be above 1
## [1] 4.240805334 3.225700726 3.001311052 2.075174070 2.006394000 1.753167212
## [7] 1.541976071 1.403448949 1.366740253 1.229150383 1.116849931 1.087840739
## [13] 1.070198555 1.023434932 0.953842841 0.910671530 0.860457948 0.802899771
## [19] 0.754484520 0.735743309 0.704943845 0.692297120 0.629971899 0.601912397
## [25] 0.572104868 0.553155599 0.533458896 0.499442939 0.466896390 0.418553029
## [31] 0.407736154 0.392303528 0.363466985 0.307868448 0.281192832 0.266506679
## [37] 0.127837805 0.013417701 0.006640759
nS <- nScree(x=ev$values) # get data for scree plot
plotnScree(nS) # most tests say we should extract 3 factors
Let’s try to use the maximum number of factors.
fa(dating12, nfactors=15, rotate="none", fm="ml")
## Factor Analysis using method = ml
## Call: fa(r = dating12, nfactors = 15, rotate = "none", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML1 ML11 ML2 ML10 ML12 ML13 ML3 ML4 ML14 ML8 ML15
## imprace -0.06 -0.01 0.00 0.14 0.18 -0.10 0.01 0.08 0.26 0.01 -0.08
## imprelig -0.01 -0.09 0.17 0.04 0.11 -0.02 0.09 0.11 0.19 -0.02 0.02
## date 0.02 -0.09 0.23 -0.03 -0.05 -0.13 0.04 -0.06 -0.07 0.05 -0.20
## go_out 0.05 0.03 0.12 -0.01 -0.07 -0.18 0.05 -0.15 -0.04 -0.08 -0.20
## sports 0.05 -0.04 -0.23 -0.03 0.17 0.72 -0.01 -0.07 0.22 0.08 -0.27
## tvsports 0.08 -0.01 -0.16 0.05 0.36 0.29 0.10 -0.14 0.29 0.13 -0.27
## exercise -0.11 -0.04 -0.06 0.12 0.11 0.37 -0.03 0.04 0.24 0.04 -0.07
## dining -0.08 0.29 0.07 0.28 0.14 0.02 -0.02 0.10 0.11 -0.04 0.24
## museums -0.06 0.83 0.16 0.33 -0.16 0.00 0.04 0.07 0.10 -0.12 -0.04
## art -0.07 0.77 0.12 0.34 -0.12 0.00 0.05 0.07 0.02 -0.06 -0.01
## hiking 0.07 0.21 0.07 0.08 -0.05 0.27 -0.05 -0.04 -0.11 0.07 -0.09
## gaming 0.14 0.01 -0.14 0.06 0.28 0.01 -0.01 -0.03 0.06 0.00 -0.07
## clubbing -0.05 0.08 -0.06 0.19 0.11 0.08 -0.05 -0.02 -0.01 0.05 0.07
## reading 0.00 0.24 0.14 0.04 -0.09 -0.03 0.00 0.07 0.03 -0.13 0.11
## tv 0.04 0.01 0.11 0.15 0.58 -0.42 0.05 -0.03 0.44 0.08 -0.12
## theater -0.06 0.44 0.26 0.32 0.17 -0.20 0.00 0.05 -0.03 -0.05 0.01
## movies 0.01 0.30 0.15 0.17 0.37 -0.22 0.01 -0.02 -0.04 -0.03 -0.02
## concerts -0.03 0.46 0.10 0.20 0.49 0.09 0.09 0.01 -0.52 0.02 -0.05
## music -0.02 0.33 0.05 0.18 0.45 0.11 0.04 0.02 -0.38 0.08 0.10
## shopping -0.16 0.14 0.08 0.34 0.38 -0.20 -0.02 0.02 0.21 0.07 0.10
## yoga -0.02 0.23 0.11 0.17 0.08 0.08 0.05 0.05 -0.07 0.07 0.11
## exphappy 0.13 0.13 -0.16 -0.01 0.15 0.20 -0.07 -0.02 0.01 0.04 0.02
## attr1_1 -0.58 0.03 -0.65 -0.04 0.00 -0.01 -0.03 -0.15 0.00 -0.04 0.00
## sinc1_1 0.36 0.02 0.53 -0.04 0.00 0.00 -0.07 -0.68 0.00 0.01 0.00
## intel1_1 -0.03 0.06 0.12 -0.31 0.03 0.01 -0.07 0.18 0.01 -0.61 0.01
## fun1_1 0.08 0.11 -0.08 -0.24 0.00 0.00 -0.39 0.19 0.00 0.73 0.00
## amb1_1 0.23 -0.30 0.22 0.77 -0.05 0.01 0.02 0.21 -0.03 0.02 -0.02
## shar1_1 0.43 0.02 0.40 -0.04 0.00 0.00 0.56 0.47 0.00 0.01 0.00
## attr2_1 -0.91 0.01 0.22 0.02 0.00 0.00 0.24 -0.11 0.00 0.02 0.00
## sinc2_1 0.69 0.01 -0.10 0.01 0.00 0.00 0.01 -0.33 0.00 -0.05 0.00
## intel2_1 0.56 0.00 -0.45 0.02 0.00 0.00 -0.18 0.05 0.00 0.04 0.00
## fun2_1 0.03 0.01 0.32 0.01 0.00 0.00 -0.83 0.34 0.00 -0.08 0.00
## amb2_1 0.50 0.02 -0.51 0.00 0.00 0.00 0.20 0.05 0.00 -0.05 0.00
## shar2_1 0.43 -0.02 0.22 -0.09 0.01 -0.01 0.20 0.16 0.00 0.11 -0.01
## attr3_1 -0.07 0.05 -0.21 0.20 0.10 0.25 -0.05 0.04 0.20 0.00 0.46
## sinc3_1 0.09 0.10 0.22 0.08 0.15 0.15 0.02 -0.20 0.12 0.02 0.25
## intel3_1 0.06 0.04 -0.13 0.00 0.08 0.23 0.07 0.11 0.23 -0.11 0.42
## fun3_1 -0.06 0.12 -0.17 0.22 0.23 0.24 0.02 0.00 0.16 0.31 0.46
## amb3_1 -0.03 -0.07 -0.18 0.40 0.19 0.20 0.05 0.09 0.12 0.00 0.29
## ML5 ML7 ML9 ML6 h2 u2 com
## imprace 0.03 0.03 0.03 0.02 0.151 0.8487 3.6
## imprelig 0.13 0.05 0.03 0.00 0.128 0.8722 5.7
## date 0.00 0.02 0.01 -0.08 0.145 0.8554 4.1
## go_out 0.01 -0.08 -0.05 -0.04 0.138 0.8621 5.6
## sports -0.11 -0.12 0.01 0.01 0.768 0.2325 2.1
## tvsports -0.10 -0.01 -0.06 0.03 0.466 0.5338 5.8
## exercise -0.03 -0.03 0.03 -0.01 0.248 0.7522 2.8
## dining 0.06 0.03 0.09 0.09 0.297 0.7032 5.1
## museums 0.10 0.11 0.14 0.02 0.933 0.0667 1.7
## art 0.05 0.10 0.14 0.07 0.793 0.2069 1.7
## hiking -0.03 -0.03 0.04 0.00 0.164 0.8357 3.8
## gaming -0.10 -0.05 -0.06 0.06 0.153 0.8467 3.1
## clubbing -0.05 0.06 -0.03 0.04 0.086 0.9141 4.7
## reading 0.14 0.04 0.10 -0.01 0.155 0.8445 5.0
## tv -0.01 0.02 -0.01 0.01 0.778 0.2222 3.2
## theater 0.11 0.06 0.15 0.08 0.482 0.5184 4.3
## movies 0.08 0.07 0.08 0.03 0.350 0.6501 3.9
## concerts 0.06 -0.07 0.05 0.07 0.817 0.1826 3.7
## music 0.05 -0.07 0.08 0.01 0.533 0.4674 3.8
## shopping 0.03 0.09 0.05 0.03 0.416 0.5840 4.6
## yoga 0.05 0.03 0.09 0.05 0.146 0.8536 5.2
## exphappy -0.08 0.00 0.00 0.07 0.142 0.8582 6.0
## attr1_1 -0.32 -0.33 0.02 0.05 0.995 0.0050 3.2
## sinc1_1 0.17 -0.24 -0.02 -0.16 0.995 0.0050 3.1
## intel1_1 0.46 0.46 0.04 0.18 0.979 0.0209 4.0
## fun1_1 -0.02 0.39 -0.08 -0.10 0.979 0.0207 2.9
## amb1_1 0.13 0.26 0.11 -0.10 0.942 0.0581 2.4
## shar1_1 -0.17 -0.26 -0.10 0.09 0.995 0.0050 4.6
## attr2_1 0.12 0.06 -0.17 0.05 0.995 0.0050 1.4
## sinc2_1 -0.52 0.22 -0.10 0.27 0.995 0.0050 3.1
## intel2_1 0.49 -0.17 -0.08 0.40 0.995 0.0050 4.4
## fun2_1 -0.20 -0.14 -0.08 -0.14 0.995 0.0052 2.0
## amb2_1 0.08 0.03 -0.08 -0.66 0.995 0.0053 3.1
## shar2_1 -0.11 -0.12 0.79 0.05 0.972 0.0282 2.2
## attr3_1 -0.09 0.04 0.19 0.05 0.471 0.5291 3.9
## sinc3_1 0.09 -0.11 0.06 -0.04 0.266 0.7342 7.2
## intel3_1 0.03 0.00 0.06 -0.02 0.342 0.6581 3.2
## fun3_1 -0.01 0.10 0.02 0.01 0.545 0.4546 4.7
## amb3_1 0.00 0.09 0.08 -0.01 0.398 0.6021 4.3
##
## ML1 ML11 ML2 ML10 ML12 ML13 ML3 ML4 ML14 ML8 ML15
## SS loadings 2.89 2.34 2.21 1.77 1.64 1.49 1.40 1.25 1.15 1.14 1.13
## Proportion Var 0.07 0.06 0.06 0.05 0.04 0.04 0.04 0.03 0.03 0.03 0.03
## Cumulative Var 0.07 0.13 0.19 0.24 0.28 0.32 0.35 0.38 0.41 0.44 0.47
## Proportion Explained 0.13 0.11 0.10 0.08 0.07 0.07 0.06 0.06 0.05 0.05 0.05
## Cumulative Proportion 0.13 0.24 0.34 0.42 0.49 0.56 0.62 0.68 0.73 0.78 0.83
## ML5 ML7 ML9 ML6
## SS loadings 1.12 0.90 0.88 0.83
## Proportion Var 0.03 0.02 0.02 0.02
## Cumulative Var 0.50 0.52 0.55 0.57
## Proportion Explained 0.05 0.04 0.04 0.04
## Cumulative Proportion 0.88 0.92 0.96 1.00
##
## Mean item complexity = 3.8
## Test of the hypothesis that 15 factors are sufficient.
##
## df null model = 741 with the objective function = 17.61 with Chi Square = 143979.6
## df of the model are 261 and the objective function was 1.4
##
## The root mean square of the residuals (RMSR) is 0.03
## The df corrected root mean square of the residuals is 0.05
##
## The harmonic n.obs is 8191 with the empirical chi square 11699 with prob < 0
## The total n.obs was 8191 with Likelihood Chi Square = 11448.44 with prob < 0
##
## Tucker Lewis Index of factoring reliability = 0.778
## RMSEA index = 0.072 and the 90 % confidence intervals are 0.071 0.073
## BIC = 9096.62
## Fit based upon off diagonal values = 0.95
## Measures of factor score adequacy
## ML1 ML11 ML2 ML10 ML12 ML13
## Correlation of (regression) scores with factors 1 0.97 1.00 0.98 0.92 0.90
## Multiple R square of scores with factors 1 0.95 1.00 0.96 0.84 0.81
## Minimum correlation of possible factor scores 1 0.90 0.99 0.92 0.68 0.62
## ML3 ML4 ML14 ML8 ML15 ML5
## Correlation of (regression) scores with factors 1.00 1.00 0.89 0.99 0.83 1.00
## Multiple R square of scores with factors 1.00 0.99 0.79 0.98 0.69 0.99
## Minimum correlation of possible factor scores 0.99 0.99 0.58 0.96 0.38 0.99
## ML7 ML9 ML6
## Correlation of (regression) scores with factors 0.99 0.99 1.00
## Multiple R square of scores with factors 0.99 0.97 0.99
## Minimum correlation of possible factor scores 0.98 0.95 0.99
Too many factors. It is extremely difficult to interpret such a result.
Let’s try to reduce the number of factors.
fa(dating12, nfactors=5, rotate="none", fm="ml")
## Factor Analysis using method = ml
## Call: fa(r = dating12, nfactors = 5, rotate = "none", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML3 ML4 ML2 ML1 ML5 h2 u2 com
## imprace 0.06 0.09 -0.04 -0.03 0.24 0.073 0.9269 1.5
## imprelig 0.01 -0.05 -0.04 -0.05 0.30 0.096 0.9041 1.2
## date -0.06 -0.35 -0.06 -0.01 0.08 0.138 0.8619 1.2
## go_out 0.02 -0.31 0.01 -0.03 -0.03 0.097 0.9033 1.0
## sports -0.14 0.40 0.14 0.04 -0.12 0.214 0.7857 1.7
## tvsports -0.07 0.29 0.19 -0.07 0.11 0.143 0.8566 2.3
## exercise 0.00 0.31 -0.07 -0.03 0.06 0.105 0.8951 1.2
## dining 0.41 0.25 -0.10 -0.02 0.21 0.292 0.7077 2.4
## museums 0.91 -0.04 -0.09 -0.06 -0.13 0.853 0.1467 1.1
## art 0.90 0.01 -0.08 -0.07 -0.15 0.843 0.1574 1.1
## hiking 0.21 0.03 0.01 0.07 -0.09 0.060 0.9401 1.7
## gaming -0.02 0.21 0.14 0.09 0.11 0.085 0.9146 2.8
## clubbing 0.12 0.26 -0.04 -0.01 0.08 0.091 0.9089 1.7
## reading 0.30 -0.13 -0.06 0.01 -0.01 0.108 0.8917 1.5
## tv 0.09 0.06 0.00 -0.02 0.50 0.262 0.7383 1.1
## theater 0.61 -0.09 -0.14 -0.04 0.29 0.482 0.5176 1.6
## movies 0.38 -0.04 -0.04 -0.02 0.32 0.252 0.7481 2.0
## concerts 0.45 0.06 -0.02 -0.08 0.16 0.233 0.7671 1.4
## music 0.32 0.18 -0.02 -0.03 0.19 0.171 0.8292 2.3
## shopping 0.28 0.26 -0.16 -0.09 0.42 0.353 0.6467 3.0
## yoga 0.32 0.08 -0.03 -0.05 0.12 0.127 0.8726 1.5
## exphappy 0.07 0.26 0.13 0.12 -0.05 0.109 0.8912 2.2
## attr1_1 -0.21 0.47 -0.14 -0.31 -0.42 0.555 0.4454 3.4
## sinc1_1 0.02 -0.37 0.09 0.12 0.19 0.196 0.8040 1.9
## intel1_1 0.12 -0.26 -0.08 -0.04 -0.02 0.089 0.9106 1.7
## fun1_1 -0.05 0.15 -0.10 0.28 -0.04 0.114 0.8858 2.0
## amb1_1 0.18 -0.04 0.05 0.16 0.42 0.239 0.7606 1.7
## shar1_1 0.10 -0.31 0.26 0.09 0.24 0.245 0.7552 3.3
## attr2_1 -0.01 0.00 -0.61 -0.79 0.00 0.995 0.0049 1.9
## sinc2_1 -0.02 -0.02 0.57 0.36 -0.01 0.452 0.5481 1.7
## intel2_1 -0.01 0.10 0.52 0.32 -0.07 0.391 0.6086 1.8
## fun2_1 0.00 0.00 -0.63 0.77 0.00 0.995 0.0050 1.9
## amb2_1 -0.07 0.10 0.59 0.21 -0.11 0.419 0.5811 1.4
## shar2_1 0.15 -0.19 0.41 0.26 0.21 0.342 0.6576 3.1
## attr3_1 0.12 0.53 0.04 0.01 0.06 0.302 0.6978 1.2
## sinc3_1 0.14 0.03 0.02 0.00 0.25 0.087 0.9133 1.6
## intel3_1 0.06 0.31 0.12 0.02 0.07 0.117 0.8835 1.5
## fun3_1 0.16 0.58 0.06 -0.09 0.21 0.411 0.5893 1.5
## amb3_1 0.11 0.46 0.08 -0.05 0.25 0.296 0.7040 1.8
##
## ML3 ML4 ML2 ML1 ML5
## SS loadings 3.20 2.48 2.21 1.86 1.68
## Proportion Var 0.08 0.06 0.06 0.05 0.04
## Cumulative Var 0.08 0.15 0.20 0.25 0.29
## Proportion Explained 0.28 0.22 0.19 0.16 0.15
## Cumulative Proportion 0.28 0.50 0.69 0.85 1.00
##
## Mean item complexity = 1.8
## Test of the hypothesis that 5 factors are sufficient.
##
## df null model = 741 with the objective function = 17.61 with Chi Square = 143979.6
## df of the model are 556 and the objective function was 9.91
##
## The root mean square of the residuals (RMSR) is 0.07
## The df corrected root mean square of the residuals is 0.08
##
## The harmonic n.obs is 8191 with the empirical chi square 56272.09 with prob < 0
## The total n.obs was 8191 with Likelihood Chi Square = 80980.15 with prob < 0
##
## Tucker Lewis Index of factoring reliability = 0.251
## RMSEA index = 0.133 and the 90 % confidence intervals are 0.132 0.134
## BIC = 75970.16
## Fit based upon off diagonal values = 0.77
## Measures of factor score adequacy
## ML3 ML4 ML2 ML1 ML5
## Correlation of (regression) scores with factors 0.96 0.88 1.00 1.00 0.85
## Multiple R square of scores with factors 0.93 0.78 0.99 1.00 0.73
## Minimum correlation of possible factor scores 0.86 0.56 0.99 0.99 0.45
factor.plot(fa(dating12, nfactors=5, rotate="none", fm="ml"))
fa.diagram(fa(dating12, nfactors=5, rotate="none", fm="ml"))
Low Cumulative Var = 0.29. We have RMSR = 0.07. (should be closer to 0) RMSEA index = 0.133 (<.08 acceptable, <.05 excellent) Tucker Lewis Index= 0.215 (>.90 acceptable, >.95 excellent) The sad result of fa.
Try to use a rotation. Compare the results.
fa(dating12, nfactors=5, rotate="varimax", fm="ml")
## Factor Analysis using method = ml
## Call: fa(r = dating12, nfactors = 5, rotate = "varimax", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML3 ML2 ML4 ML5 ML1 h2 u2 com
## imprace 0.03 -0.05 0.09 0.25 0.00 0.073 0.9269 1.4
## imprelig -0.03 -0.05 -0.04 0.30 -0.04 0.096 0.9041 1.2
## date -0.07 -0.03 -0.35 0.09 -0.02 0.138 0.8619 1.2
## go_out 0.02 0.02 -0.30 -0.01 -0.06 0.097 0.9033 1.1
## sports -0.13 0.09 0.41 -0.16 0.01 0.214 0.7857 1.6
## tvsports -0.10 0.09 0.32 0.08 -0.13 0.143 0.8566 1.8
## exercise 0.00 -0.10 0.30 0.05 0.04 0.105 0.8951 1.3
## dining 0.39 -0.08 0.25 0.26 0.04 0.292 0.7077 2.7
## museums 0.92 -0.04 -0.04 0.00 -0.04 0.853 0.1467 1.0
## art 0.92 -0.04 0.01 -0.03 -0.05 0.843 0.1574 1.0
## hiking 0.22 0.06 0.03 -0.07 0.06 0.060 0.9401 1.5
## gaming -0.05 0.15 0.23 0.09 0.02 0.085 0.9146 2.2
## clubbing 0.12 -0.05 0.26 0.09 0.03 0.091 0.9089 1.8
## reading 0.30 -0.01 -0.14 0.04 0.02 0.108 0.8917 1.5
## tv 0.02 0.01 0.08 0.50 -0.03 0.262 0.7383 1.1
## theater 0.57 -0.08 -0.09 0.37 0.00 0.482 0.5176 1.8
## movies 0.34 0.00 -0.03 0.37 -0.02 0.252 0.7481 2.0
## concerts 0.42 -0.03 0.07 0.21 -0.07 0.233 0.7671 1.6
## music 0.30 -0.02 0.19 0.22 -0.02 0.171 0.8292 2.6
## shopping 0.24 -0.17 0.26 0.45 0.01 0.353 0.6467 2.5
## yoga 0.30 -0.03 0.09 0.16 -0.03 0.127 0.8726 1.8
## exphappy 0.07 0.16 0.27 -0.05 0.06 0.109 0.8912 1.9
## attr1_1 -0.13 -0.35 0.44 -0.45 -0.12 0.555 0.4454 3.2
## sinc1_1 -0.01 0.18 -0.35 0.20 0.01 0.196 0.8040 2.2
## intel1_1 0.13 -0.06 -0.26 0.01 -0.02 0.089 0.9106 1.6
## fun1_1 -0.04 0.06 0.12 -0.05 0.30 0.114 0.8858 1.5
## amb1_1 0.11 0.16 -0.02 0.44 0.08 0.239 0.7606 1.5
## shar1_1 0.04 0.31 -0.27 0.25 -0.11 0.245 0.7552 3.3
## attr2_1 0.05 -0.94 -0.05 0.02 -0.32 0.995 0.0049 1.2
## sinc2_1 -0.07 0.67 0.04 -0.03 -0.01 0.452 0.5481 1.0
## intel2_1 -0.04 0.60 0.15 -0.10 0.00 0.391 0.6086 1.2
## fun2_1 0.04 -0.10 -0.11 0.04 0.99 0.995 0.0050 1.1
## amb2_1 -0.10 0.59 0.16 -0.15 -0.13 0.419 0.5811 1.5
## shar2_1 0.09 0.52 -0.14 0.22 -0.04 0.342 0.6576 1.6
## attr3_1 0.11 0.01 0.53 0.06 0.04 0.302 0.6978 1.1
## sinc3_1 0.11 0.03 0.05 0.27 -0.02 0.087 0.9133 1.4
## intel3_1 0.04 0.09 0.32 0.06 -0.02 0.117 0.8835 1.3
## fun3_1 0.13 -0.03 0.59 0.20 -0.06 0.411 0.5893 1.4
## amb3_1 0.08 0.02 0.48 0.24 -0.05 0.296 0.7040 1.6
##
## ML3 ML2 ML4 ML5 ML1
## SS loadings 3.01 2.73 2.52 1.90 1.27
## Proportion Var 0.08 0.07 0.06 0.05 0.03
## Cumulative Var 0.08 0.15 0.21 0.26 0.29
## Proportion Explained 0.26 0.24 0.22 0.17 0.11
## Cumulative Proportion 0.26 0.50 0.72 0.89 1.00
##
## Mean item complexity = 1.6
## Test of the hypothesis that 5 factors are sufficient.
##
## df null model = 741 with the objective function = 17.61 with Chi Square = 143979.6
## df of the model are 556 and the objective function was 9.91
##
## The root mean square of the residuals (RMSR) is 0.07
## The df corrected root mean square of the residuals is 0.08
##
## The harmonic n.obs is 8191 with the empirical chi square 56272.09 with prob < 0
## The total n.obs was 8191 with Likelihood Chi Square = 80980.15 with prob < 0
##
## Tucker Lewis Index of factoring reliability = 0.251
## RMSEA index = 0.133 and the 90 % confidence intervals are 0.132 0.134
## BIC = 75970.16
## Fit based upon off diagonal values = 0.77
## Measures of factor score adequacy
## ML3 ML2 ML4 ML5 ML1
## Correlation of (regression) scores with factors 0.96 1.00 0.88 0.85 1.00
## Multiple R square of scores with factors 0.93 0.99 0.78 0.73 0.99
## Minimum correlation of possible factor scores 0.85 0.98 0.57 0.46 0.99
factor.plot(fa(dating12, nfactors=5, rotate="varimax", fm="ml"))
fa.diagram(fa(dating12, nfactors=5, rotate="varimax", fm="ml"))
The same result, so the factors are related. Try another rotation -
oblique (oblimin)
fa(dating12, nfactors=5, rotate="oblimin", fm="ml")
## Factor Analysis using method = ml
## Call: fa(r = dating12, nfactors = 5, rotate = "oblimin", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML3 ML2 ML4 ML5 ML1 h2 u2 com
## imprace -0.05 -0.08 0.21 0.18 0.01 0.073 0.9269 2.4
## imprelig -0.12 -0.12 0.11 0.29 -0.03 0.096 0.9041 2.0
## date -0.07 -0.11 -0.27 0.23 -0.01 0.138 0.8619 2.5
## go_out 0.05 -0.03 -0.28 0.13 -0.06 0.097 0.9033 1.6
## sports -0.10 0.18 0.27 -0.31 -0.02 0.214 0.7857 2.9
## tvsports -0.13 0.11 0.31 -0.04 -0.15 0.143 0.8566 2.2
## exercise -0.04 -0.05 0.30 -0.10 0.06 0.105 0.8951 1.4
## dining 0.29 -0.06 0.37 0.11 0.06 0.292 0.7077 2.2
## museums 0.93 0.01 -0.03 -0.01 0.00 0.853 0.1467 1.0
## art 0.93 0.01 0.00 -0.06 -0.02 0.843 0.1574 1.0
## hiking 0.24 0.09 0.00 -0.08 0.05 0.060 0.9401 1.6
## gaming -0.08 0.17 0.24 0.00 -0.01 0.085 0.9146 2.1
## clubbing 0.07 -0.01 0.28 -0.05 0.04 0.091 0.9089 1.2
## reading 0.29 -0.02 -0.10 0.09 0.03 0.108 0.8917 1.4
## tv -0.14 -0.08 0.32 0.44 -0.03 0.262 0.7383 2.1
## theater 0.46 -0.13 0.12 0.36 0.04 0.482 0.5176 2.2
## movies 0.23 -0.06 0.16 0.35 -0.01 0.252 0.7481 2.3
## concerts 0.36 -0.04 0.17 0.15 -0.05 0.233 0.7671 1.9
## music 0.22 -0.01 0.28 0.11 -0.01 0.171 0.8292 2.3
## shopping 0.07 -0.19 0.47 0.27 0.05 0.353 0.6467 2.1
## yoga 0.25 -0.03 0.16 0.10 -0.01 0.127 0.8726 2.1
## exphappy 0.07 0.22 0.21 -0.15 0.03 0.109 0.8912 3.0
## attr1_1 -0.04 -0.20 0.17 -0.65 -0.07 0.555 0.4454 1.4
## sinc1_1 -0.04 0.08 -0.22 0.36 -0.02 0.196 0.8040 1.8
## intel1_1 0.14 -0.10 -0.23 0.11 -0.01 0.089 0.9106 2.7
## fun1_1 -0.05 0.13 0.10 -0.11 0.29 0.114 0.8858 2.1
## amb1_1 -0.01 0.08 0.20 0.43 0.06 0.239 0.7606 1.5
## shar1_1 0.01 0.19 -0.13 0.40 -0.16 0.245 0.7552 2.0
## attr2_1 0.00 -0.97 -0.01 -0.07 -0.16 0.995 0.0049 1.1
## sinc2_1 -0.02 0.66 -0.01 0.05 -0.13 0.452 0.5481 1.1
## intel2_1 0.01 0.62 0.06 -0.08 -0.11 0.391 0.6086 1.1
## fun2_1 -0.01 0.02 -0.01 -0.01 1.00 0.995 0.0050 1.0
## amb2_1 -0.03 0.60 0.03 -0.12 -0.23 0.419 0.5811 1.4
## shar2_1 0.06 0.43 -0.04 0.34 -0.12 0.342 0.6576 2.1
## attr3_1 0.06 0.10 0.51 -0.19 0.03 0.302 0.6978 1.4
## sinc3_1 0.03 -0.01 0.17 0.23 -0.02 0.087 0.9133 1.9
## intel3_1 0.01 0.14 0.31 -0.07 -0.04 0.117 0.8835 1.5
## fun3_1 0.03 0.03 0.62 -0.08 -0.06 0.411 0.5893 1.1
## amb3_1 -0.03 0.05 0.54 0.02 -0.05 0.296 0.7040 1.0
##
## ML3 ML2 ML4 ML5 ML1
## SS loadings 2.72 2.69 2.59 2.16 1.28
## Proportion Var 0.07 0.07 0.07 0.06 0.03
## Cumulative Var 0.07 0.14 0.21 0.26 0.29
## Proportion Explained 0.24 0.24 0.23 0.19 0.11
## Cumulative Proportion 0.24 0.47 0.70 0.89 1.00
##
## With factor correlations of
## ML3 ML2 ML4 ML5 ML1
## ML3 1.00 -0.13 0.18 0.22 0.02
## ML2 -0.13 1.00 -0.06 0.09 0.04
## ML4 0.18 -0.06 1.00 -0.03 -0.06
## ML5 0.22 0.09 -0.03 1.00 0.09
## ML1 0.02 0.04 -0.06 0.09 1.00
##
## Mean item complexity = 1.8
## Test of the hypothesis that 5 factors are sufficient.
##
## df null model = 741 with the objective function = 17.61 with Chi Square = 143979.6
## df of the model are 556 and the objective function was 9.91
##
## The root mean square of the residuals (RMSR) is 0.07
## The df corrected root mean square of the residuals is 0.08
##
## The harmonic n.obs is 8191 with the empirical chi square 56272.09 with prob < 0
## The total n.obs was 8191 with Likelihood Chi Square = 80980.15 with prob < 0
##
## Tucker Lewis Index of factoring reliability = 0.251
## RMSEA index = 0.133 and the 90 % confidence intervals are 0.132 0.134
## BIC = 75970.16
## Fit based upon off diagonal values = 0.77
## Measures of factor score adequacy
## ML3 ML2 ML4 ML5 ML1
## Correlation of (regression) scores with factors 0.96 1.00 0.88 0.88 1.00
## Multiple R square of scores with factors 0.93 0.99 0.78 0.77 0.99
## Minimum correlation of possible factor scores 0.85 0.99 0.56 0.54 0.99
factor.plot(fa(dating12, nfactors=5, rotate="oblimin", fm="ml"))
fa.diagram(fa(dating12, nfactors=5, rotate="oblimin", fm="ml"))
The result is the same, so we need to exclude variables to improve the EFA
Communality is the sum of squared factor loadings for the variables, so to exclude variables we are looking at this parameter (if variable do not belong to either of the factors, this factor should not be presented). Factor loadings have to be 0.4 or more to be acceptable. That means that a threshold for communality of the variable should be 0.16
oblfa = fa(dating12, nfactors=5, rotate="oblimin", fm="ml")
oblfa
## Factor Analysis using method = ml
## Call: fa(r = dating12, nfactors = 5, rotate = "oblimin", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML3 ML2 ML4 ML5 ML1 h2 u2 com
## imprace -0.05 -0.08 0.21 0.18 0.01 0.073 0.9269 2.4
## imprelig -0.12 -0.12 0.11 0.29 -0.03 0.096 0.9041 2.0
## date -0.07 -0.11 -0.27 0.23 -0.01 0.138 0.8619 2.5
## go_out 0.05 -0.03 -0.28 0.13 -0.06 0.097 0.9033 1.6
## sports -0.10 0.18 0.27 -0.31 -0.02 0.214 0.7857 2.9
## tvsports -0.13 0.11 0.31 -0.04 -0.15 0.143 0.8566 2.2
## exercise -0.04 -0.05 0.30 -0.10 0.06 0.105 0.8951 1.4
## dining 0.29 -0.06 0.37 0.11 0.06 0.292 0.7077 2.2
## museums 0.93 0.01 -0.03 -0.01 0.00 0.853 0.1467 1.0
## art 0.93 0.01 0.00 -0.06 -0.02 0.843 0.1574 1.0
## hiking 0.24 0.09 0.00 -0.08 0.05 0.060 0.9401 1.6
## gaming -0.08 0.17 0.24 0.00 -0.01 0.085 0.9146 2.1
## clubbing 0.07 -0.01 0.28 -0.05 0.04 0.091 0.9089 1.2
## reading 0.29 -0.02 -0.10 0.09 0.03 0.108 0.8917 1.4
## tv -0.14 -0.08 0.32 0.44 -0.03 0.262 0.7383 2.1
## theater 0.46 -0.13 0.12 0.36 0.04 0.482 0.5176 2.2
## movies 0.23 -0.06 0.16 0.35 -0.01 0.252 0.7481 2.3
## concerts 0.36 -0.04 0.17 0.15 -0.05 0.233 0.7671 1.9
## music 0.22 -0.01 0.28 0.11 -0.01 0.171 0.8292 2.3
## shopping 0.07 -0.19 0.47 0.27 0.05 0.353 0.6467 2.1
## yoga 0.25 -0.03 0.16 0.10 -0.01 0.127 0.8726 2.1
## exphappy 0.07 0.22 0.21 -0.15 0.03 0.109 0.8912 3.0
## attr1_1 -0.04 -0.20 0.17 -0.65 -0.07 0.555 0.4454 1.4
## sinc1_1 -0.04 0.08 -0.22 0.36 -0.02 0.196 0.8040 1.8
## intel1_1 0.14 -0.10 -0.23 0.11 -0.01 0.089 0.9106 2.7
## fun1_1 -0.05 0.13 0.10 -0.11 0.29 0.114 0.8858 2.1
## amb1_1 -0.01 0.08 0.20 0.43 0.06 0.239 0.7606 1.5
## shar1_1 0.01 0.19 -0.13 0.40 -0.16 0.245 0.7552 2.0
## attr2_1 0.00 -0.97 -0.01 -0.07 -0.16 0.995 0.0049 1.1
## sinc2_1 -0.02 0.66 -0.01 0.05 -0.13 0.452 0.5481 1.1
## intel2_1 0.01 0.62 0.06 -0.08 -0.11 0.391 0.6086 1.1
## fun2_1 -0.01 0.02 -0.01 -0.01 1.00 0.995 0.0050 1.0
## amb2_1 -0.03 0.60 0.03 -0.12 -0.23 0.419 0.5811 1.4
## shar2_1 0.06 0.43 -0.04 0.34 -0.12 0.342 0.6576 2.1
## attr3_1 0.06 0.10 0.51 -0.19 0.03 0.302 0.6978 1.4
## sinc3_1 0.03 -0.01 0.17 0.23 -0.02 0.087 0.9133 1.9
## intel3_1 0.01 0.14 0.31 -0.07 -0.04 0.117 0.8835 1.5
## fun3_1 0.03 0.03 0.62 -0.08 -0.06 0.411 0.5893 1.1
## amb3_1 -0.03 0.05 0.54 0.02 -0.05 0.296 0.7040 1.0
##
## ML3 ML2 ML4 ML5 ML1
## SS loadings 2.72 2.69 2.59 2.16 1.28
## Proportion Var 0.07 0.07 0.07 0.06 0.03
## Cumulative Var 0.07 0.14 0.21 0.26 0.29
## Proportion Explained 0.24 0.24 0.23 0.19 0.11
## Cumulative Proportion 0.24 0.47 0.70 0.89 1.00
##
## With factor correlations of
## ML3 ML2 ML4 ML5 ML1
## ML3 1.00 -0.13 0.18 0.22 0.02
## ML2 -0.13 1.00 -0.06 0.09 0.04
## ML4 0.18 -0.06 1.00 -0.03 -0.06
## ML5 0.22 0.09 -0.03 1.00 0.09
## ML1 0.02 0.04 -0.06 0.09 1.00
##
## Mean item complexity = 1.8
## Test of the hypothesis that 5 factors are sufficient.
##
## df null model = 741 with the objective function = 17.61 with Chi Square = 143979.6
## df of the model are 556 and the objective function was 9.91
##
## The root mean square of the residuals (RMSR) is 0.07
## The df corrected root mean square of the residuals is 0.08
##
## The harmonic n.obs is 8191 with the empirical chi square 56272.09 with prob < 0
## The total n.obs was 8191 with Likelihood Chi Square = 80980.15 with prob < 0
##
## Tucker Lewis Index of factoring reliability = 0.251
## RMSEA index = 0.133 and the 90 % confidence intervals are 0.132 0.134
## BIC = 75970.16
## Fit based upon off diagonal values = 0.77
## Measures of factor score adequacy
## ML3 ML2 ML4 ML5 ML1
## Correlation of (regression) scores with factors 0.96 1.00 0.88 0.88 1.00
## Multiple R square of scores with factors 0.93 0.99 0.78 0.77 0.99
## Minimum correlation of possible factor scores 0.85 0.99 0.56 0.54 0.99
com = data.frame(oblfa$communality)
library(dplyr)
arrange(com, oblfa.communality)
## oblfa.communality
## hiking 0.05986141
## imprace 0.07306658
## gaming 0.08543416
## sinc3_1 0.08671057
## intel1_1 0.08938205
## clubbing 0.09112199
## imprelig 0.09589925
## go_out 0.09666240
## exercise 0.10489908
## reading 0.10834963
## exphappy 0.10880022
## fun1_1 0.11417838
## intel3_1 0.11653218
## yoga 0.12736834
## date 0.13808932
## tvsports 0.14339267
## music 0.17078705
## sinc1_1 0.19599174
## sports 0.21433513
## concerts 0.23294662
## amb1_1 0.23937459
## shar1_1 0.24479176
## movies 0.25190929
## tv 0.26173691
## dining 0.29229984
## amb3_1 0.29599479
## attr3_1 0.30224815
## shar2_1 0.34239198
## shopping 0.35325840
## intel2_1 0.39136364
## fun3_1 0.41071037
## amb2_1 0.41887450
## sinc2_1 0.45187268
## theater 0.48236356
## attr1_1 0.55462689
## art 0.84264249
## museums 0.85330328
## fun2_1 0.99500817
## attr2_1 0.99505267
As it can be seen, many of the variables has really low communality, the acceptable level is 0.16, so filter all that lower it
com1 = com %>% filter(oblfa.communality<=0.16)
varlist = com1 %>%select(-oblfa.communality)
varlist
## таблица данных с 0 колонок и 16 строками
So, we receive 16 variables out of 39 that should be removed. Now we remove inappropriate variables from a dataset.
dating22 = select(dating12,
-imprace,
-imprelig,
-date,
-go_out,
-tvsports,
-exercise,
-hiking,
-gaming,
-clubbing,
-reading,
-music,
-yoga,
-exphappy,
-sinc1_1,
-intel1_1,
-fun1_1,
-sinc3_1,
-intel3_1)
fa.parallel(dating22, fa="both", n.iter=100)
## Parallel analysis suggests that the number of factors = 8 and the number of components = 7
ev <- eigen(cor(dating22))
ev$values
## [1] 3.646080534 2.783487735 2.133889152 1.545371111 1.303104265 1.225666848
## [7] 1.058307000 0.893124464 0.795302348 0.760055543 0.727792695 0.666873598
## [13] 0.638928949 0.585083906 0.539249256 0.490091480 0.396517630 0.367757791
## [19] 0.300295559 0.136166369 0.006853765
Now, the maximum number of factors is 7. Another option is 3.
Again, we try to use the maximum number of factors.
fa1=fa(dating22, nfactors=7, rotate="none", fm="ml")
fa1
## Factor Analysis using method = ml
## Call: fa(r = dating22, nfactors = 7, rotate = "none", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML6 ML1 ML7 ML5 ML2 ML3 ML4 h2 u2 com
## sports -0.12 0.14 -0.07 -0.05 -0.06 0.00 -0.01 0.045 0.9554 3.2
## dining 0.39 -0.09 0.32 0.09 0.05 0.06 0.08 0.290 0.7095 2.4
## museums 0.91 -0.12 -0.11 0.14 0.02 0.01 0.03 0.880 0.1204 1.1
## art 0.88 -0.11 -0.10 0.13 0.00 -0.01 0.07 0.825 0.1746 1.1
## tv 0.08 -0.01 0.55 0.04 -0.01 -0.07 0.03 0.318 0.6821 1.1
## theater 0.56 -0.14 0.25 0.19 0.07 0.00 0.11 0.450 0.5499 2.0
## movies 0.35 -0.04 0.34 0.09 0.02 -0.02 0.05 0.255 0.7448 2.2
## concerts 0.40 -0.07 0.13 0.10 -0.04 0.02 0.07 0.204 0.7965 1.5
## shopping 0.28 -0.17 0.64 0.02 0.03 0.00 0.06 0.518 0.4821 1.6
## attr1_1 -0.13 -0.26 -0.09 -0.27 -0.18 0.05 0.00 0.200 0.7996 3.5
## amb1_1 0.14 0.10 0.31 0.14 0.09 0.00 -0.10 0.166 0.8340 2.6
## shar1_1 0.02 0.18 -0.02 0.38 -0.06 -0.17 -0.07 0.217 0.7834 2.0
## attr2_1 0.01 -0.95 0.00 -0.16 -0.22 0.01 0.15 0.995 0.0050 1.2
## sinc2_1 0.01 0.73 0.00 -0.14 -0.05 -0.62 0.22 0.995 0.0050 2.3
## intel2_1 0.00 0.70 0.00 -0.06 -0.12 0.61 0.34 0.995 0.0050 2.5
## fun2_1 0.00 0.01 0.00 -0.09 0.99 0.07 -0.05 0.995 0.0050 1.0
## amb2_1 0.02 0.54 0.00 -0.18 -0.28 0.14 -0.76 0.992 0.0079 2.3
## shar2_1 -0.03 0.31 0.01 0.92 0.02 -0.17 -0.07 0.973 0.0269 1.3
## attr3_1 0.10 0.03 0.19 0.06 -0.01 0.01 0.00 0.050 0.9501 1.8
## fun3_1 0.15 0.00 0.33 -0.04 -0.12 0.03 0.01 0.146 0.8544 1.8
## amb3_1 0.10 0.03 0.33 -0.01 -0.10 0.05 -0.05 0.138 0.8617 1.5
##
## ML6 ML1 ML7 ML5 ML2 ML3 ML4
## SS loadings 2.55 2.54 1.40 1.28 1.21 0.85 0.83
## Proportion Var 0.12 0.12 0.07 0.06 0.06 0.04 0.04
## Cumulative Var 0.12 0.24 0.31 0.37 0.43 0.47 0.51
## Proportion Explained 0.24 0.24 0.13 0.12 0.11 0.08 0.08
## Cumulative Proportion 0.24 0.48 0.61 0.73 0.84 0.92 1.00
##
## Mean item complexity = 1.9
## Test of the hypothesis that 7 factors are sufficient.
##
## df null model = 210 with the objective function = 9.72 with Chi Square = 79561.25
## df of the model are 84 and the objective function was 1.51
##
## The root mean square of the residuals (RMSR) is 0.07
## The df corrected root mean square of the residuals is 0.11
##
## The harmonic n.obs is 8191 with the empirical chi square 17475.94 with prob < 0
## The total n.obs was 8191 with Likelihood Chi Square = 12347.83 with prob < 0
##
## Tucker Lewis Index of factoring reliability = 0.613
## RMSEA index = 0.134 and the 90 % confidence intervals are 0.132 0.136
## BIC = 11590.92
## Fit based upon off diagonal values = 0.87
## Measures of factor score adequacy
## ML6 ML1 ML7 ML5 ML2 ML3
## Correlation of (regression) scores with factors 0.96 1 0.84 0.99 1.00 1.00
## Multiple R square of scores with factors 0.93 1 0.70 0.98 1.00 0.99
## Minimum correlation of possible factor scores 0.86 1 0.40 0.96 0.99 0.99
## ML4
## Correlation of (regression) scores with factors 1.00
## Multiple R square of scores with factors 0.99
## Minimum correlation of possible factor scores 0.98
Higher Cumulative Var = 0.51 We have RMSR = 0.07 - the same - should be closer 0 Almost the same RMSEA index = 0.134 (<.08 acceptable, <.05 excellent) Tucker Lewis Index= 0.613 (>.90 acceptable, >.95 excellent) The result is better but still not good.
Now we will use 3 factors.
fa2=fa(dating22, nfactors=3, rotate="none", fm="ml")
fa2
## Factor Analysis using method = ml
## Call: fa(r = dating22, nfactors = 3, rotate = "none", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML3 ML2 ML1 h2 u2 com
## sports -0.13 0.14 0.04 0.0378 0.9622 2.2
## dining 0.40 -0.10 -0.02 0.1710 0.8290 1.1
## museums 0.91 -0.09 -0.06 0.8390 0.1610 1.0
## art 0.90 -0.07 -0.07 0.8230 0.1770 1.0
## tv 0.08 0.00 -0.02 0.0064 0.9936 1.1
## theater 0.59 -0.14 -0.04 0.3713 0.6287 1.1
## movies 0.37 -0.04 -0.02 0.1360 0.8640 1.0
## concerts 0.42 -0.02 -0.08 0.1865 0.8135 1.1
## shopping 0.26 -0.16 -0.09 0.1026 0.8974 1.9
## attr1_1 -0.18 -0.14 -0.31 0.1494 0.8506 2.1
## amb1_1 0.16 0.05 0.15 0.0526 0.9474 2.2
## shar1_1 0.09 0.26 0.09 0.0859 0.9141 1.5
## attr2_1 -0.01 -0.61 -0.79 0.9951 0.0049 1.9
## sinc2_1 -0.02 0.57 0.36 0.4511 0.5489 1.7
## intel2_1 -0.01 0.52 0.32 0.3752 0.6248 1.7
## fun2_1 0.00 -0.63 0.77 0.9950 0.0050 1.9
## amb2_1 -0.07 0.59 0.21 0.3950 0.6050 1.3
## shar2_1 0.14 0.41 0.26 0.2571 0.7429 2.0
## attr3_1 0.11 0.04 0.01 0.0135 0.9865 1.3
## fun3_1 0.14 0.06 -0.09 0.0311 0.9689 2.1
## amb3_1 0.09 0.08 -0.05 0.0174 0.9826 2.5
##
## ML3 ML2 ML1
## SS loadings 2.69 2.08 1.72
## Proportion Var 0.13 0.10 0.08
## Cumulative Var 0.13 0.23 0.31
## Proportion Explained 0.41 0.32 0.27
## Cumulative Proportion 0.41 0.73 1.00
##
## Mean item complexity = 1.6
## Test of the hypothesis that 3 factors are sufficient.
##
## df null model = 210 with the objective function = 9.72 with Chi Square = 79561.25
## df of the model are 150 and the objective function was 5.22
##
## The root mean square of the residuals (RMSR) is 0.11
## The df corrected root mean square of the residuals is 0.13
##
## The harmonic n.obs is 8191 with the empirical chi square 40924.11 with prob < 0
## The total n.obs was 8191 with Likelihood Chi Square = 42701.67 with prob < 0
##
## Tucker Lewis Index of factoring reliability = 0.249
## RMSEA index = 0.186 and the 90 % confidence intervals are 0.185 0.188
## BIC = 41350.05
## Fit based upon off diagonal values = 0.69
## Measures of factor score adequacy
## ML3 ML2 ML1
## Correlation of (regression) scores with factors 0.96 1.00 1.00
## Multiple R square of scores with factors 0.92 0.99 1.00
## Minimum correlation of possible factor scores 0.84 0.99 0.99
Lower Cumulative Var = 0.31 Worse RMSR = 0.11 (should be closer to 0) Worse RMSEA index = 0.186 (<.08 acceptable, <.05 excellent) Tucker Lewis Index= 0.249 (>.90 acceptable, >.95 excellent) The result is worse, BUT we need to look closer at the factor loadings
Factor Loadings of (1)
print(fa1$loadings,cutoff = 0.4)
##
## Loadings:
## ML6 ML1 ML7 ML5 ML2 ML3 ML4
## sports
## dining
## museums 0.913
## art 0.884
## tv 0.552
## theater 0.559
## movies
## concerts 0.403
## shopping 0.638
## attr1_1
## amb1_1
## shar1_1
## attr2_1 -0.948
## sinc2_1 0.731 -0.623
## intel2_1 0.703 0.606
## fun2_1 0.990
## amb2_1 0.538 -0.759
## shar2_1 0.918
## attr3_1
## fun3_1
## amb3_1
##
## ML6 ML1 ML7 ML5 ML2 ML3 ML4
## SS loadings 2.551 2.538 1.397 1.278 1.206 0.851 0.826
## Proportion Var 0.121 0.121 0.067 0.061 0.057 0.041 0.039
## Cumulative Var 0.121 0.242 0.309 0.370 0.427 0.468 0.507
Factor Loadings of (2)
print(fa2$loadings,cutoff = 0.4)
##
## Loadings:
## ML3 ML2 ML1
## sports
## dining
## museums 0.910
## art 0.901
## tv
## theater 0.592
## movies
## concerts 0.423
## shopping
## attr1_1
## amb1_1
## shar1_1
## attr2_1 -0.613 -0.787
## sinc2_1 0.569
## intel2_1 0.522
## fun2_1 -0.634 0.770
## amb2_1 0.588
## shar2_1 0.413
## attr3_1
## fun3_1
## amb3_1
##
## ML3 ML2 ML1
## SS loadings 2.691 2.080 1.721
## Proportion Var 0.128 0.099 0.082
## Cumulative Var 0.128 0.227 0.309
Firstly, both of them have variables that do not belong to neither of factors. Secondly, in the (1) there are factors to which only one variable belongs (ML5,ML2,ML4) and factors which by SS loadings should not be here (ML3,ML4), this is bad result. In the case (2) the situation with factors is better, but the cumulative variance is lower. So, it is very controversial what to change now.
At first, we will continue working on removing the variables.
dating32 = select(dating22, -shopping,
-dining,
-movies,
-attr1_1,
-amb1_1,
-shar1_1,
-attr3_1,
-fun3_1,
-amb3_1)
Now we have more or less consistent set of variables, but the appropriate number of factors is still unknown. We will try different number of factors (from 3 to 7) with different rotation
fac1=fa(dating32, nfactors=3, rotate="none", fm="ml")
fac1o=fa(dating32, nfactors=3, rotate="oblimin", fm="ml")
fac1v=fa(dating32, nfactors=3, rotate="varimax", fm="ml")
fac2=fa(dating32, nfactors=4, rotate="none", fm="ml")
fac2o=fa(dating32, nfactors=4, rotate="oblimin", fm="ml")
fac2v=fa(dating32, nfactors=4, rotate="varimax", fm="ml")
fac3=fa(dating32, nfactors=5, rotate="none", fm="ml")
fac3o=fa(dating32, nfactors=5, rotate="oblimin", fm="ml")
fac3v=fa(dating32, nfactors=5, rotate="varimax", fm="ml")
fac4=fa(dating32, nfactors=6, rotate="none", fm="ml")
fac4o=fa(dating32, nfactors=6, rotate="oblimin", fm="ml")
fac4v=fa(dating32, nfactors=6, rotate="varimax", fm="ml")
fac5=fa(dating32, nfactors=7, rotate="none", fm="ml")
fac5o=fa(dating32, nfactors=7, rotate="oblimin", fm="ml")
fac5v=fa(dating32, nfactors=7, rotate="varimax", fm="ml")
Comparison of main characteristics by number of factors Of course we cannot fully rely only on RMSEA, TLI and Cumulative Variance to tell how good is our factor analysis, but for now we try to meet the criteria as much as possible: RMSEA (<.08 acceptable, <.05 excellent) TLI (>.90 acceptable, >.95 excellent)
rmsea1 = fac1$RMSEA
rmsea2 = fac2$RMSEA
rmsea3 = fac3$RMSEA
rmsea4 = fac4$RMSEA
rmsea5 = fac5$RMSEA
RMSEA = c(rmsea1[1], rmsea2[1], rmsea3[1], rmsea4[1], rmsea5[1])
TLI = c(fac1$TLI, fac2$TLI, fac3$TLI, fac4$TLI, fac5$TLI)
cumvar1 = fac1$Vaccounted
cumvar1 = cumvar1[3,3]
cumvar2 = fac2$Vaccounted
cumvar2 = cumvar2[3,4]
cumvar3 = fac3$Vaccounted
cumvar3 = cumvar3[3,5]
cumvar4 = fac4$Vaccounted
cumvar4 = cumvar4[3,6]
cumvar5 = fac5$Vaccounted
cumvar5 = cumvar5[3,7]
Cumulative_Var = c(cumvar1, cumvar2, cumvar3, cumvar4, cumvar5)
name = c("FA1 (3 factors)",
"FA2 (4 factors)",
"FA3 (5 factors)",
"FA4 (6 factors)",
"FA5 (7 factors)")
fa_comparison = data.frame(name,Cumulative_Var, TLI, RMSEA)
fa_comparison
## name Cumulative_Var TLI RMSEA
## 1 FA1 (3 factors) 0.4758031 0.13005722 0.29669032
## 2 FA2 (4 factors) 0.5382459 -0.05394687 0.32655000
## 3 FA3 (5 factors) 0.5103276 -0.20815490 0.34961006
## 4 FA4 (6 factors) 0.6859750 0.80214235 0.14147548
## 5 FA5 (7 factors) 0.7522720 0.92288036 0.08832208
So, our choice is obviously FA5 with 7 factors. Choosing the rotation
print(fac5$loadings,cutoff = 0.4)
##
## Loadings:
## ML1 ML2 ML7 ML3 ML6 ML4 ML5
## sports
## museums 0.425 0.645
## art 0.427 0.795
## tv
## theater 0.746 0.532
## concerts
## attr2_1 -0.957
## sinc2_1 0.686 -0.537
## intel2_1 0.638 0.580 -0.439
## fun2_1 0.969
## amb2_1 0.610 0.591
## shar2_1 0.415 0.666 -0.422
##
## ML1 ML2 ML7 ML3 ML6 ML4 ML5
## SS loadings 2.479 1.713 1.109 1.106 1.058 0.796 0.765
## Proportion Var 0.207 0.143 0.092 0.092 0.088 0.066 0.064
## Cumulative Var 0.207 0.349 0.442 0.534 0.622 0.688 0.752
fa.diagram(fac5)
#### Oblimin Rotation
print(fac5o$loadings,cutoff = 0.4)
##
## Loadings:
## ML7 ML1 ML2 ML5 ML4 ML3 ML6
## sports
## museums 0.827
## art 1.005
## tv
## theater 0.993
## concerts
## attr2_1
## sinc2_1 1.030
## intel2_1 1.026
## fun2_1 0.981
## amb2_1 1.016
## shar2_1 1.019
##
## ML7 ML1 ML2 ML5 ML4 ML3 ML6
## SS loadings 1.782 1.222 1.208 1.179 1.175 1.146 1.086
## Proportion Var 0.148 0.102 0.101 0.098 0.098 0.095 0.090
## Cumulative Var 0.148 0.250 0.351 0.449 0.547 0.643 0.733
fa.diagram(fac5o)
#### Varimax Rotation
print(fac5v$loadings,cutoff = 0.4)
##
## Loadings:
## ML7 ML1 ML3 ML4 ML5 ML6 ML2
## sports
## museums 0.858
## art 0.980
## tv
## theater 0.458 0.879
## concerts
## attr2_1 -0.541 -0.473 -0.459 -0.411
## sinc2_1 0.984
## intel2_1 0.978
## fun2_1 0.981
## amb2_1 0.969
## shar2_1 0.988
##
## ML7 ML1 ML3 ML4 ML5 ML6 ML2
## SS loadings 2.068 1.310 1.215 1.184 1.154 1.082 1.014
## Proportion Var 0.172 0.109 0.101 0.099 0.096 0.090 0.084
## Cumulative Var 0.172 0.282 0.383 0.481 0.578 0.668 0.752
fa.diagram(fac5v)
In Oblimin rotation, as well as in the Varimax rotation every latent factor describe something, but there is a big problem: most of the latent factors describe only one variable - so factors are not latent, they just describe the variable, it is not really explanatory. The no rotation option seems to be the best, however it has a problem there are some factors that do not describes anything, and there are some variables that do not belong to any factor. We need to fix it
In the final version it has been decided to stay with only 2 factors and 6 variables. It is not much, but it is the most precise option
fac5o_upgrade=fa(dating32[,-c(4,6,10,11,12,1)], nfactors=2, rotate="none", fm="ml")
fa.diagram(fac5o_upgrade)
fac5o_upgrade
## Factor Analysis using method = ml
## Call: fa(r = dating32[, -c(4, 6, 10, 11, 12, 1)], nfactors = 2, rotate = "none",
## fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML2 ML1 h2 u2 com
## museums 0.94 0.10 0.89 0.105 1.0
## art 0.90 0.11 0.81 0.187 1.0
## theater 0.55 0.12 0.32 0.683 1.1
## attr2_1 -0.01 1.00 1.00 0.005 1.0
## sinc2_1 -0.02 -0.63 0.40 0.599 1.0
## intel2_1 -0.01 -0.58 0.33 0.668 1.0
##
## ML2 ML1
## SS loadings 1.99 1.76
## Proportion Var 0.33 0.29
## Cumulative Var 0.33 0.63
## Proportion Explained 0.53 0.47
## Cumulative Proportion 0.53 1.00
##
## Mean item complexity = 1
## Test of the hypothesis that 2 factors are sufficient.
##
## df null model = 15 with the objective function = 2.63 with Chi Square = 21517.49
## df of the model are 4 and the objective function was 0.06
##
## The root mean square of the residuals (RMSR) is 0.04
## The df corrected root mean square of the residuals is 0.07
##
## The harmonic n.obs is 8191 with the empirical chi square 330.42 with prob < 3e-70
## The total n.obs was 8191 with Likelihood Chi Square = 484.64 with prob < 1.4e-103
##
## Tucker Lewis Index of factoring reliability = 0.916
## RMSEA index = 0.121 and the 90 % confidence intervals are 0.112 0.13
## BIC = 448.6
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy
## ML2 ML1
## Correlation of (regression) scores with factors 0.96 1.00
## Multiple R square of scores with factors 0.93 1.00
## Minimum correlation of possible factor scores 0.86 0.99
Final results: RMSR = 0.04 (closer to 0, improved!) RMSEA = 0.121 (improved, but still not good) TCI = 0.916 (good, improved!) Cumulative Variance = 0.63 (improved!)
Also, we can see almost perfect complexity (min. 1.000118, max. 1.089190) ## Part 3. Description of factors
How interested are you in the following activities, on a scale of 1-10? 1. Art - art 2. Museum - museum 3. Theater -theater (related to ML2) All of the relations are positive
What do you think the opposite sex looks for in a date?
People look for different things in partners, different personality traits can attract a person. However, it is worth taking into account what kind of relationship a person wants: long-term for building a family, living together, or short-term to satisfy sexual needs.Since the goals during the search for a partner are different, the focus of attention will be aimed at different qualities of a person.
The authors (Pamela C. Regan, Lauren Levin,Susan Sprecher,F. Scott Christopher & Rodney Gate) of the research “What Characteristics Do Men and Women Desire In Their Short-Term Sexual and Long-Term Romantic Partners?” (2008) has such a results on this topic: “both men and women focused upon sexual desirability (e.g., attractiveness, health, sex drive, athleticism) when evaluating a short-term sexual partner, and placed more importance on similarity and on socially appealing personality characteristics (e.g., intelligence, honesty, warmth) when considering a long-term romantic relationship.”
So, as I think, the factor can be interpreted as Sexual desirability (or Sexuality) factor
ML2 describes the activities, which are strictly related to highbrow culture. In comparison with music or movies which can be highbrow, lowbrow or middlebrow, activities that this factor have a relation with is usually treated as highbrow. So, If a person says that he is fond of theater, art, or likes to go to museums, this signals that the person is fond of highbrow culture.
The terms of high- and lowbrow culture is one of the key concepts in the of Bourdie’s “The Difference: Social Criticism of the Judgment of Taste” (1979). “Taste has as much to do with expressed distaste for the aesthetic preferences attributed to other social groups as with positive adherence to the preferences of one’s own social group” and that is why such a distinction between cultural activites exists.
I would like to name the factor “Highbrowness”.