2 Main analysis
Last review on July 12, 2021
Note This is an R markdown document and refers to the manuscript entitled as “It is better an approximate answer to the right question than the exact answer to the wrong question : the case of the psychometric analysis of the ASQ:SE”, by Luis Anunciação, Jane Squires, and J. Landeira-Fernandez. The data and all coded used here are availabe at https://osf.io/z6gwv/ . To get the same results, one needs to load the Rdata and run all chunks below.
Thank you.
Feel free to contact me at luisfca@puc-rio.br Last update: July 5, 2021
#Packages
pacman::p_load(tidyverse, #enrironment
mirt, #multidimensional IRT
psych) #classical test
packageVersion("psych")
## [1] '2.1.6'
packageVersion("EGANet")
## [1] '0.9.8'
Always load the main dataset
load("C:/Users/luisf/Dropbox/Puc-Rio/Tese e papers/R/base em R (todas as faixas etarias).RData")
For this specific paper, change labels
ds_60 <- original_60
ds_48 <- original_48
ds_36 <- original_36
ds_30 <- original_30
ds_24 <- original_24
ds_18 <- original_18
ds_12 <- original_12
backup_60 <- ds_60
backup_48 <- ds_48
backup_36 <- ds_36
backup_30 <- ds_30
backup_24 <- ds_24
backup_18 <- ds_18
backup_12 <- ds_12
Than I’ll remove all the other things
to.remove <- ls()
to.remove <- c(to.remove[!grepl(pattern = "^ds|^backup", to.remove)], "to.remove")
rm(list=to.remove)
#rm(list=setdiff(ls(), c("ds")))
Certify that items are the main variables in the dataset. In this project, we are using the 2011 data only.
ds_60 <- ds_60 %>%
select(-c(sum_emo, sum_soc)) %>%
filter(year == "2011") %>%
mutate(score = rowSums(select(., starts_with("q")), na.rm=T))
ds_60 %>% count(year)
## year n
## 1 2011 22331
ds_48 <- ds_48 %>%
select(-c(sum_emo, sum_soc)) %>%
filter(year == "2011") %>%
mutate(score = rowSums(select(., starts_with("q")), na.rm=T))
ds_48 %>% count(year)
## year n
## 1 2011 12473
ds_48 %>%
select(starts_with("q_")) %>%
mutate_all(factor) %>%
DataExplorer::plot_bar()
## Visual check: Original data (60 months)
ds_60 %>%
select(starts_with("q_")) %>%
mutate_all(factor) %>%
DataExplorer::plot_bar()
ds_48 %>%
select(starts_with("q_")) %>% summarytools::freq()
## Registered S3 method overwritten by 'pryr':
## method from
## print.bytes Rcpp
## Frequencies
## ds_48$q_1
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 11073 88.78 88.78 88.78 88.78
## 5 1249 10.01 98.79 10.01 98.79
## 10 151 1.21 100.00 1.21 100.00
## <NA> 0 0.00 100.00
## Total 12473 100.00 100.00 100.00 100.00
##
## ds_48$q_2
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 8386 67.23 67.23 67.23 67.23
## 5 2720 21.81 89.04 21.81 89.04
## 10 1367 10.96 100.00 10.96 100.00
## <NA> 0 0.00 100.00
## Total 12473 100.00 100.00 100.00 100.00
##
## ds_48$q_3
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 10222 81.95 81.95 81.95 81.95
## 5 1745 13.99 95.94 13.99 95.94
## 10 506 4.06 100.00 4.06 100.00
## <NA> 0 0.00 100.00
## Total 12473 100.00 100.00 100.00 100.00
##
## ds_48$q_4
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 10489 84.09 84.09 84.09 84.09
## 5 1476 11.83 95.93 11.83 95.93
## 10 508 4.07 100.00 4.07 100.00
## <NA> 0 0.00 100.00
## Total 12473 100.00 100.00 100.00 100.00
##
## ds_48$q_5
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 10441 83.71 83.71 83.71 83.71
## 5 1685 13.51 97.22 13.51 97.22
## 10 347 2.78 100.00 2.78 100.00
## <NA> 0 0.00 100.00
## Total 12473 100.00 100.00 100.00 100.00
##
## ds_48$q_6
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 9901 79.38 79.38 79.38 79.38
## 5 1653 13.25 92.63 13.25 92.63
## 10 919 7.37 100.00 7.37 100.00
## <NA> 0 0.00 100.00
## Total 12473 100.00 100.00 100.00 100.00
##
## ds_48$q_7
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 9452 75.78 75.78 75.78 75.78
## 5 2380 19.08 94.86 19.08 94.86
## 10 641 5.14 100.00 5.14 100.00
## <NA> 0 0.00 100.00
## Total 12473 100.00 100.00 100.00 100.00
##
## ds_48$q_8
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 9286 74.45 74.45 74.45 74.45
## 5 2180 17.48 91.93 17.48 91.93
## 10 1007 8.07 100.00 8.07 100.00
## <NA> 0 0.00 100.00
## Total 12473 100.00 100.00 100.00 100.00
##
## ds_48$q_9
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 11417 91.53 91.53 91.53 91.53
## 5 903 7.24 98.77 7.24 98.77
## 10 153 1.23 100.00 1.23 100.00
## <NA> 0 0.00 100.00
## Total 12473 100.00 100.00 100.00 100.00
##
## ds_48$q_10
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 11711 93.89 93.89 93.89 93.89
## 5 553 4.43 98.32 4.43 98.32
## 10 209 1.68 100.00 1.68 100.00
## <NA> 0 0.00 100.00
## Total 12473 100.00 100.00 100.00 100.00
##
## ds_48$q_11
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 11355 91.04 91.04 91.04 91.04
## 5 477 3.82 94.86 3.82 94.86
## 10 641 5.14 100.00 5.14 100.00
## <NA> 0 0.00 100.00
## Total 12473 100.00 100.00 100.00 100.00
##
## ds_48$q_12
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 9672 77.54 77.54 77.54 77.54
## 5 2391 19.17 96.71 19.17 96.71
## 10 410 3.29 100.00 3.29 100.00
## <NA> 0 0.00 100.00
## Total 12473 100.00 100.00 100.00 100.00
##
## ds_48$q_13
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 9613 77.07 77.07 77.07 77.07
## 5 2619 21.00 98.07 21.00 98.07
## 10 241 1.93 100.00 1.93 100.00
## <NA> 0 0.00 100.00
## Total 12473 100.00 100.00 100.00 100.00
##
## ds_48$q_14
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 11364 91.11 91.11 91.11 91.11
## 5 984 7.89 99.00 7.89 99.00
## 10 125 1.00 100.00 1.00 100.00
## <NA> 0 0.00 100.00
## Total 12473 100.00 100.00 100.00 100.00
##
## ds_48$q_15
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 11523 92.38 92.38 92.38 92.38
## 5 689 5.52 97.91 5.52 97.91
## 10 261 2.09 100.00 2.09 100.00
## <NA> 0 0.00 100.00
## Total 12473 100.00 100.00 100.00 100.00
##
## ds_48$q_16
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 7917 63.47 63.47 63.47 63.47
## 5 2394 19.19 82.67 19.19 82.67
## 10 2162 17.33 100.00 17.33 100.00
## <NA> 0 0.00 100.00
## Total 12473 100.00 100.00 100.00 100.00
##
## ds_48$q_17
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 11259 90.27 90.27 90.27 90.27
## 5 895 7.18 97.44 7.18 97.44
## 10 319 2.56 100.00 2.56 100.00
## <NA> 0 0.00 100.00
## Total 12473 100.00 100.00 100.00 100.00
##
## ds_48$q_18
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 10366 83.11 83.11 83.11 83.11
## 5 1707 13.69 96.79 13.69 96.79
## 10 400 3.21 100.00 3.21 100.00
## <NA> 0 0.00 100.00
## Total 12473 100.00 100.00 100.00 100.00
##
## ds_48$q_19
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 9780 78.41 78.41 78.41 78.41
## 5 1865 14.95 93.36 14.95 93.36
## 10 828 6.64 100.00 6.64 100.00
## <NA> 0 0.00 100.00
## Total 12473 100.00 100.00 100.00 100.00
##
## ds_48$q_20
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 10480 84.02 84.02 84.02 84.02
## 5 1694 13.58 97.60 13.58 97.60
## 10 299 2.40 100.00 2.40 100.00
## <NA> 0 0.00 100.00
## Total 12473 100.00 100.00 100.00 100.00
##
## ds_48$q_21
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 10162 81.47 81.47 81.47 81.47
## 5 1930 15.47 96.95 15.47 96.95
## 10 381 3.05 100.00 3.05 100.00
## <NA> 0 0.00 100.00
## Total 12473 100.00 100.00 100.00 100.00
##
## ds_48$q_22
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 11785 94.48 94.48 94.48 94.48
## 5 354 2.84 97.32 2.84 97.32
## 10 334 2.68 100.00 2.68 100.00
## <NA> 0 0.00 100.00
## Total 12473 100.00 100.00 100.00 100.00
##
## ds_48$q_23
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 12086 96.90 96.90 96.90 96.90
## 5 214 1.72 98.61 1.72 98.61
## 10 173 1.39 100.00 1.39 100.00
## <NA> 0 0.00 100.00
## Total 12473 100.00 100.00 100.00 100.00
##
## ds_48$q_24
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 8781 70.40 70.40 70.40 70.40
## 5 3110 24.93 95.33 24.93 95.33
## 10 582 4.67 100.00 4.67 100.00
## <NA> 0 0.00 100.00
## Total 12473 100.00 100.00 100.00 100.00
##
## ds_48$q_25
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 10713 85.89 85.89 85.89 85.89
## 5 1282 10.28 96.17 10.28 96.17
## 10 478 3.83 100.00 3.83 100.00
## <NA> 0 0.00 100.00
## Total 12473 100.00 100.00 100.00 100.00
##
## ds_48$q_26
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 9669 77.52 77.52 77.52 77.52
## 5 1732 13.89 91.41 13.89 91.41
## 10 1072 8.59 100.00 8.59 100.00
## <NA> 0 0.00 100.00
## Total 12473 100.00 100.00 100.00 100.00
##
## ds_48$q_27
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 11938 95.71 95.71 95.71 95.71
## 5 345 2.77 98.48 2.77 98.48
## 10 190 1.52 100.00 1.52 100.00
## <NA> 0 0.00 100.00
## Total 12473 100.00 100.00 100.00 100.00
##
## ds_48$q_28
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 7964 63.85 63.85 63.85 63.85
## 5 3447 27.64 91.49 27.64 91.49
## 10 1062 8.51 100.00 8.51 100.00
## <NA> 0 0.00 100.00
## Total 12473 100.00 100.00 100.00 100.00
##
## ds_48$q_29
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 11694 93.75 93.75 93.75 93.75
## 5 690 5.53 99.29 5.53 99.29
## 10 89 0.71 100.00 0.71 100.00
## <NA> 0 0.00 100.00
## Total 12473 100.00 100.00 100.00 100.00
##
## ds_48$q_30
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 11734 94.08 94.08 94.08 94.08
## 5 579 4.64 98.72 4.64 98.72
## 10 160 1.28 100.00 1.28 100.00
## <NA> 0 0.00 100.00
## Total 12473 100.00 100.00 100.00 100.00
##
## ds_48$q_31
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 9838 78.87 78.87 78.87 78.87
## 5 2032 16.29 95.17 16.29 95.17
## 10 603 4.83 100.00 4.83 100.00
## <NA> 0 0.00 100.00
## Total 12473 100.00 100.00 100.00 100.00
##
## ds_48$q_32
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 10832 86.84 86.84 86.84 86.84
## 5 1164 9.33 96.18 9.33 96.18
## 10 477 3.82 100.00 3.82 100.00
## <NA> 0 0.00 100.00
## Total 12473 100.00 100.00 100.00 100.00
ds_60 %>%
select(starts_with("q_")) %>% summarytools::freq()
## Frequencies
## ds_60$q_1
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 19080 85.44 85.44 85.44 85.44
## 5 2937 13.15 98.59 13.15 98.59
## 10 314 1.41 100.00 1.41 100.00
## <NA> 0 0.00 100.00
## Total 22331 100.00 100.00 100.00 100.00
##
## ds_60$q_2
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 14230 63.72 63.72 63.72 63.72
## 5 5214 23.35 87.07 23.35 87.07
## 10 2887 12.93 100.00 12.93 100.00
## <NA> 0 0.00 100.00
## Total 22331 100.00 100.00 100.00 100.00
##
## ds_60$q_3
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 16489 73.84 73.84 73.84 73.84
## 5 4835 21.65 95.49 21.65 95.49
## 10 1007 4.51 100.00 4.51 100.00
## <NA> 0 0.00 100.00
## Total 22331 100.00 100.00 100.00 100.00
##
## ds_60$q_4
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 17370 77.78 77.78 77.78 77.78
## 5 3881 17.38 95.16 17.38 95.16
## 10 1080 4.84 100.00 4.84 100.00
## <NA> 0 0.00 100.00
## Total 22331 100.00 100.00 100.00 100.00
##
## ds_60$q_5
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 18083 80.98 80.98 80.98 80.98
## 5 3240 14.51 95.49 14.51 95.49
## 10 1008 4.51 100.00 4.51 100.00
## <NA> 0 0.00 100.00
## Total 22331 100.00 100.00 100.00 100.00
##
## ds_60$q_6
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 16967 75.98 75.98 75.98 75.98
## 5 3500 15.67 91.65 15.67 91.65
## 10 1864 8.35 100.00 8.35 100.00
## <NA> 0 0.00 100.00
## Total 22331 100.00 100.00 100.00 100.00
##
## ds_60$q_7
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 16652 74.57 74.57 74.57 74.57
## 5 4693 21.02 95.58 21.02 95.58
## 10 986 4.42 100.00 4.42 100.00
## <NA> 0 0.00 100.00
## Total 22331 100.00 100.00 100.00 100.00
##
## ds_60$q_8
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 19763 88.50 88.50 88.50 88.50
## 5 2276 10.19 98.69 10.19 98.69
## 10 292 1.31 100.00 1.31 100.00
## <NA> 0 0.00 100.00
## Total 22331 100.00 100.00 100.00 100.00
##
## ds_60$q_9
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 17211 77.07 77.07 77.07 77.07
## 5 3503 15.69 92.76 15.69 92.76
## 10 1617 7.24 100.00 7.24 100.00
## <NA> 0 0.00 100.00
## Total 22331 100.00 100.00 100.00 100.00
##
## ds_60$q_10
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 20337 91.07 91.07 91.07 91.07
## 5 1768 7.92 98.99 7.92 98.99
## 10 226 1.01 100.00 1.01 100.00
## <NA> 0 0.00 100.00
## Total 22331 100.00 100.00 100.00 100.00
##
## ds_60$q_11
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 21546 96.48 96.48 96.48 96.48
## 5 441 1.97 98.46 1.97 98.46
## 10 344 1.54 100.00 1.54 100.00
## <NA> 0 0.00 100.00
## Total 22331 100.00 100.00 100.00 100.00
##
## ds_60$q_12
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 20431 91.49 91.49 91.49 91.49
## 5 822 3.68 95.17 3.68 95.17
## 10 1078 4.83 100.00 4.83 100.00
## <NA> 0 0.00 100.00
## Total 22331 100.00 100.00 100.00 100.00
##
## ds_60$q_13
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 18227 81.62 81.62 81.62 81.62
## 5 3348 14.99 96.61 14.99 96.61
## 10 756 3.39 100.00 3.39 100.00
## <NA> 0 0.00 100.00
## Total 22331 100.00 100.00 100.00 100.00
##
## ds_60$q_14
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 15083 67.54 67.54 67.54 67.54
## 5 6124 27.42 94.97 27.42 94.97
## 10 1124 5.03 100.00 5.03 100.00
## <NA> 0 0.00 100.00
## Total 22331 100.00 100.00 100.00 100.00
##
## ds_60$q_15
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 15975 71.54 71.54 71.54 71.54
## 5 5675 25.41 96.95 25.41 96.95
## 10 681 3.05 100.00 3.05 100.00
## <NA> 0 0.00 100.00
## Total 22331 100.00 100.00 100.00 100.00
##
## ds_60$q_16
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 13456 60.26 60.26 60.26 60.26
## 5 5085 22.77 83.03 22.77 83.03
## 10 3790 16.97 100.00 16.97 100.00
## <NA> 0 0.00 100.00
## Total 22331 100.00 100.00 100.00 100.00
##
## ds_60$q_17
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 19557 87.58 87.58 87.58 87.58
## 5 2231 9.99 97.57 9.99 97.57
## 10 543 2.43 100.00 2.43 100.00
## <NA> 0 0.00 100.00
## Total 22331 100.00 100.00 100.00 100.00
##
## ds_60$q_18
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 20446 91.56 91.56 91.56 91.56
## 5 1540 6.90 98.46 6.90 98.46
## 10 345 1.54 100.00 1.54 100.00
## <NA> 0 0.00 100.00
## Total 22331 100.00 100.00 100.00 100.00
##
## ds_60$q_19
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 17580 78.72 78.72 78.72 78.72
## 5 3555 15.92 94.64 15.92 94.64
## 10 1196 5.36 100.00 5.36 100.00
## <NA> 0 0.00 100.00
## Total 22331 100.00 100.00 100.00 100.00
##
## ds_60$q_20
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 17589 78.76 78.76 78.76 78.76
## 5 4100 18.36 97.13 18.36 97.13
## 10 642 2.87 100.00 2.87 100.00
## <NA> 0 0.00 100.00
## Total 22331 100.00 100.00 100.00 100.00
##
## ds_60$q_21
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 16848 75.45 75.45 75.45 75.45
## 5 4443 19.90 95.34 19.90 95.34
## 10 1040 4.66 100.00 4.66 100.00
## <NA> 0 0.00 100.00
## Total 22331 100.00 100.00 100.00 100.00
##
## ds_60$q_22
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 20668 92.55 92.55 92.55 92.55
## 5 893 4.00 96.55 4.00 96.55
## 10 770 3.45 100.00 3.45 100.00
## <NA> 0 0.00 100.00
## Total 22331 100.00 100.00 100.00 100.00
##
## ds_60$q_23
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 21545 96.48 96.48 96.48 96.48
## 5 458 2.05 98.53 2.05 98.53
## 10 328 1.47 100.00 1.47 100.00
## <NA> 0 0.00 100.00
## Total 22331 100.00 100.00 100.00 100.00
##
## ds_60$q_24
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 14573 65.26 65.26 65.26 65.26
## 5 6580 29.47 94.72 29.47 94.72
## 10 1178 5.28 100.00 5.28 100.00
## <NA> 0 0.00 100.00
## Total 22331 100.00 100.00 100.00 100.00
##
## ds_60$q_25
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 18832 84.33 84.33 84.33 84.33
## 5 2467 11.05 95.38 11.05 95.38
## 10 1032 4.62 100.00 4.62 100.00
## <NA> 0 0.00 100.00
## Total 22331 100.00 100.00 100.00 100.00
##
## ds_60$q_26
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 17647 79.02 79.02 79.02 79.02
## 5 3206 14.36 93.38 14.36 93.38
## 10 1478 6.62 100.00 6.62 100.00
## <NA> 0 0.00 100.00
## Total 22331 100.00 100.00 100.00 100.00
##
## ds_60$q_27
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 14105 63.16 63.16 63.16 63.16
## 5 6691 29.96 93.13 29.96 93.13
## 10 1535 6.87 100.00 6.87 100.00
## <NA> 0 0.00 100.00
## Total 22331 100.00 100.00 100.00 100.00
##
## ds_60$q_28
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 20182 90.38 90.38 90.38 90.38
## 5 1946 8.71 99.09 8.71 99.09
## 10 203 0.91 100.00 0.91 100.00
## <NA> 0 0.00 100.00
## Total 22331 100.00 100.00 100.00 100.00
##
## ds_60$q_29
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 20720 92.79 92.79 92.79 92.79
## 5 1345 6.02 98.81 6.02 98.81
## 10 266 1.19 100.00 1.19 100.00
## <NA> 0 0.00 100.00
## Total 22331 100.00 100.00 100.00 100.00
##
## ds_60$q_30
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 17795 79.69 79.69 79.69 79.69
## 5 3425 15.34 95.02 15.34 95.02
## 10 1111 4.98 100.00 4.98 100.00
## <NA> 0 0.00 100.00
## Total 22331 100.00 100.00 100.00 100.00
##
## ds_60$q_31
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 13290 59.51 59.51 59.51 59.51
## 5 7825 35.04 94.55 35.04 94.55
## 10 1216 5.45 100.00 5.45 100.00
## <NA> 0 0.00 100.00
## Total 22331 100.00 100.00 100.00 100.00
##
## ds_60$q_32
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 0 18996 85.07 85.07 85.07 85.07
## 5 2262 10.13 95.20 10.13 95.20
## 10 1073 4.80 100.00 4.80 100.00
## <NA> 0 0.00 100.00
## Total 22331 100.00 100.00 100.00 100.00
ds_48 %>%
select(starts_with("q_")) %>%
alpha(.)
##
## Reliability analysis
## Call: alpha(x = .)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.83 0.84 0.86 0.14 5.1 0.0022 1 0.96 0.12
##
## lower alpha upper 95% confidence boundaries
## 0.82 0.83 0.83
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## q_1 0.82 0.83 0.85 0.14 4.9 0.0023 0.0104 0.11
## q_2 0.83 0.84 0.86 0.14 5.2 0.0021 0.0102 0.12
## q_3 0.83 0.83 0.85 0.14 5.0 0.0022 0.0101 0.12
## q_4 0.82 0.83 0.85 0.14 4.9 0.0023 0.0103 0.12
## q_5 0.83 0.83 0.85 0.14 5.1 0.0022 0.0105 0.12
## q_6 0.83 0.84 0.86 0.14 5.2 0.0022 0.0104 0.12
## q_7 0.82 0.83 0.85 0.14 4.8 0.0023 0.0099 0.11
## q_8 0.82 0.83 0.85 0.14 4.9 0.0023 0.0103 0.11
## q_9 0.82 0.83 0.85 0.14 5.0 0.0022 0.0105 0.11
## q_10 0.83 0.84 0.86 0.14 5.1 0.0022 0.0107 0.12
## q_11 0.83 0.84 0.86 0.14 5.1 0.0022 0.0108 0.12
## q_12 0.83 0.84 0.86 0.14 5.1 0.0022 0.0107 0.12
## q_13 0.81 0.83 0.84 0.13 4.7 0.0023 0.0094 0.11
## q_14 0.82 0.83 0.85 0.14 4.9 0.0022 0.0104 0.12
## q_15 0.83 0.84 0.86 0.14 5.2 0.0022 0.0106 0.12
## q_16 0.82 0.83 0.85 0.14 5.0 0.0022 0.0103 0.12
## q_17 0.82 0.83 0.85 0.14 4.9 0.0022 0.0100 0.12
## q_18 0.82 0.83 0.85 0.13 4.8 0.0023 0.0101 0.11
## q_19 0.82 0.83 0.85 0.14 4.9 0.0022 0.0099 0.12
## q_20 0.82 0.83 0.85 0.13 4.8 0.0023 0.0100 0.11
## q_21 0.83 0.84 0.85 0.14 5.1 0.0022 0.0106 0.12
## q_22 0.82 0.83 0.85 0.14 5.0 0.0022 0.0108 0.11
## q_23 0.83 0.84 0.86 0.14 5.1 0.0022 0.0106 0.12
## q_24 0.81 0.83 0.84 0.13 4.7 0.0024 0.0093 0.11
## q_25 0.82 0.83 0.85 0.14 4.9 0.0023 0.0102 0.12
## q_26 0.82 0.83 0.85 0.14 5.0 0.0022 0.0108 0.12
## q_27 0.82 0.83 0.85 0.14 5.0 0.0022 0.0103 0.12
## q_28 0.82 0.83 0.85 0.14 4.9 0.0022 0.0104 0.11
## q_29 0.82 0.83 0.85 0.14 4.9 0.0022 0.0103 0.11
## q_30 0.82 0.83 0.85 0.14 5.0 0.0022 0.0104 0.12
## q_31 0.82 0.83 0.85 0.14 4.9 0.0023 0.0100 0.12
## q_32 0.83 0.84 0.86 0.14 5.2 0.0022 0.0102 0.12
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## q_1 12473 0.47 0.50 0.48 0.43 0.62 1.8
## q_2 12473 0.27 0.22 0.17 0.16 2.19 3.4
## q_3 12473 0.32 0.35 0.31 0.25 1.11 2.5
## q_4 12473 0.48 0.46 0.44 0.41 1.00 2.5
## q_5 12473 0.32 0.34 0.29 0.25 0.95 2.3
## q_6 12473 0.29 0.25 0.20 0.20 1.40 3.0
## q_7 12473 0.55 0.52 0.51 0.48 1.47 2.8
## q_8 12473 0.51 0.48 0.46 0.43 1.68 3.1
## q_9 12473 0.38 0.42 0.38 0.33 0.48 1.7
## q_10 12473 0.27 0.30 0.25 0.22 0.39 1.6
## q_11 12473 0.30 0.30 0.25 0.22 0.71 2.4
## q_12 12473 0.30 0.30 0.24 0.22 1.29 2.5
## q_13 12473 0.64 0.63 0.63 0.59 1.24 2.4
## q_14 12473 0.39 0.43 0.40 0.34 0.49 1.7
## q_15 12473 0.23 0.25 0.19 0.18 0.49 1.8
## q_16 12473 0.44 0.38 0.35 0.33 2.69 3.9
## q_17 12473 0.42 0.45 0.44 0.36 0.61 2.0
## q_18 12473 0.55 0.55 0.54 0.49 1.00 2.4
## q_19 12473 0.44 0.46 0.45 0.36 1.41 2.9
## q_20 12473 0.55 0.54 0.53 0.50 0.92 2.2
## q_21 12473 0.30 0.32 0.27 0.23 1.08 2.4
## q_22 12473 0.35 0.36 0.32 0.30 0.41 1.8
## q_23 12473 0.25 0.28 0.22 0.21 0.22 1.3
## q_24 12473 0.64 0.62 0.63 0.58 1.71 2.8
## q_25 12473 0.49 0.48 0.46 0.43 0.90 2.4
## q_26 12473 0.38 0.35 0.30 0.29 1.55 3.1
## q_27 12473 0.36 0.41 0.38 0.32 0.29 1.5
## q_28 12473 0.46 0.45 0.43 0.38 2.23 3.2
## q_29 12473 0.46 0.50 0.49 0.42 0.35 1.4
## q_30 12473 0.36 0.42 0.39 0.32 0.36 1.5
## q_31 12473 0.50 0.48 0.46 0.43 1.30 2.7
## q_32 12473 0.22 0.21 0.15 0.15 0.85 2.3
##
## Non missing response frequency for each item
## 0 5 10 miss
## q_1 0.89 0.10 0.01 0
## q_2 0.67 0.22 0.11 0
## q_3 0.82 0.14 0.04 0
## q_4 0.84 0.12 0.04 0
## q_5 0.84 0.14 0.03 0
## q_6 0.79 0.13 0.07 0
## q_7 0.76 0.19 0.05 0
## q_8 0.74 0.17 0.08 0
## q_9 0.92 0.07 0.01 0
## q_10 0.94 0.04 0.02 0
## q_11 0.91 0.04 0.05 0
## q_12 0.78 0.19 0.03 0
## q_13 0.77 0.21 0.02 0
## q_14 0.91 0.08 0.01 0
## q_15 0.92 0.06 0.02 0
## q_16 0.63 0.19 0.17 0
## q_17 0.90 0.07 0.03 0
## q_18 0.83 0.14 0.03 0
## q_19 0.78 0.15 0.07 0
## q_20 0.84 0.14 0.02 0
## q_21 0.81 0.15 0.03 0
## q_22 0.94 0.03 0.03 0
## q_23 0.97 0.02 0.01 0
## q_24 0.70 0.25 0.05 0
## q_25 0.86 0.10 0.04 0
## q_26 0.78 0.14 0.09 0
## q_27 0.96 0.03 0.02 0
## q_28 0.64 0.28 0.09 0
## q_29 0.94 0.06 0.01 0
## q_30 0.94 0.05 0.01 0
## q_31 0.79 0.16 0.05 0
## q_32 0.87 0.09 0.04 0
ds_60 %>%
select(starts_with("q_")) %>%
alpha(.)
##
## Reliability analysis
## Call: alpha(x = .)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.85 0.86 0.87 0.16 5.9 0.0014 1.2 1.1 0.14
##
## lower alpha upper 95% confidence boundaries
## 0.84 0.85 0.85
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## q_1 0.84 0.85 0.87 0.15 5.6 0.0015 0.013 0.13
## q_2 0.85 0.86 0.87 0.16 6.0 0.0014 0.012 0.14
## q_3 0.85 0.85 0.87 0.16 5.8 0.0014 0.012 0.14
## q_4 0.85 0.85 0.87 0.16 5.8 0.0014 0.012 0.14
## q_5 0.84 0.85 0.87 0.15 5.6 0.0015 0.013 0.13
## q_6 0.85 0.86 0.87 0.16 5.9 0.0014 0.012 0.14
## q_7 0.84 0.85 0.87 0.15 5.6 0.0015 0.012 0.14
## q_8 0.84 0.85 0.87 0.16 5.7 0.0015 0.013 0.13
## q_9 0.84 0.85 0.87 0.15 5.6 0.0015 0.013 0.13
## q_10 0.84 0.85 0.87 0.16 5.7 0.0015 0.013 0.14
## q_11 0.85 0.85 0.87 0.16 5.9 0.0014 0.013 0.14
## q_12 0.85 0.85 0.87 0.16 5.9 0.0014 0.013 0.14
## q_13 0.84 0.85 0.87 0.15 5.5 0.0015 0.012 0.13
## q_14 0.85 0.85 0.87 0.16 5.9 0.0014 0.013 0.14
## q_15 0.84 0.85 0.86 0.15 5.5 0.0015 0.011 0.13
## q_16 0.84 0.85 0.87 0.16 5.8 0.0015 0.012 0.14
## q_17 0.85 0.85 0.87 0.16 5.9 0.0014 0.013 0.14
## q_18 0.84 0.85 0.87 0.16 5.7 0.0015 0.012 0.14
## q_19 0.84 0.85 0.87 0.16 5.7 0.0015 0.012 0.14
## q_20 0.84 0.85 0.87 0.15 5.5 0.0015 0.012 0.13
## q_21 0.85 0.85 0.87 0.16 5.9 0.0014 0.013 0.14
## q_22 0.84 0.85 0.87 0.16 5.8 0.0015 0.013 0.13
## q_23 0.85 0.85 0.87 0.16 5.8 0.0015 0.013 0.14
## q_24 0.83 0.84 0.86 0.15 5.4 0.0016 0.011 0.13
## q_25 0.84 0.85 0.87 0.15 5.6 0.0015 0.012 0.13
## q_26 0.84 0.85 0.87 0.16 5.8 0.0015 0.013 0.13
## q_27 0.84 0.85 0.87 0.15 5.7 0.0015 0.012 0.13
## q_28 0.84 0.85 0.87 0.15 5.6 0.0015 0.012 0.13
## q_29 0.84 0.85 0.87 0.16 5.7 0.0015 0.013 0.13
## q_30 0.84 0.85 0.87 0.15 5.6 0.0015 0.012 0.13
## q_31 0.84 0.85 0.87 0.15 5.5 0.0015 0.012 0.13
## q_32 0.85 0.86 0.87 0.16 5.9 0.0014 0.012 0.14
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## q_1 22331 0.48 0.50 0.48 0.43 0.80 2.0
## q_2 22331 0.26 0.22 0.17 0.16 2.46 3.6
## q_3 22331 0.32 0.33 0.29 0.24 1.53 2.8
## q_4 22331 0.31 0.33 0.30 0.24 1.35 2.7
## q_5 22331 0.51 0.50 0.48 0.45 1.18 2.6
## q_6 22331 0.31 0.28 0.23 0.23 1.62 3.1
## q_7 22331 0.56 0.54 0.53 0.50 1.49 2.7
## q_8 22331 0.38 0.42 0.39 0.34 0.64 1.9
## q_9 22331 0.51 0.49 0.48 0.45 1.51 3.0
## q_10 22331 0.38 0.42 0.39 0.34 0.50 1.7
## q_11 22331 0.26 0.30 0.25 0.22 0.25 1.4
## q_12 22331 0.29 0.30 0.25 0.23 0.67 2.3
## q_13 22331 0.57 0.57 0.56 0.52 1.09 2.4
## q_14 22331 0.31 0.30 0.25 0.23 1.87 2.9
## q_15 22331 0.65 0.63 0.64 0.60 1.58 2.6
## q_16 22331 0.45 0.40 0.37 0.36 2.84 3.8
## q_17 22331 0.30 0.32 0.27 0.25 0.74 2.1
## q_18 22331 0.40 0.44 0.43 0.35 0.50 1.7
## q_19 22331 0.41 0.43 0.41 0.34 1.33 2.8
## q_20 22331 0.58 0.57 0.56 0.53 1.21 2.5
## q_21 22331 0.29 0.30 0.25 0.21 1.46 2.7
## q_22 22331 0.38 0.38 0.34 0.32 0.54 2.0
## q_23 22331 0.30 0.32 0.28 0.26 0.25 1.4
## q_24 22331 0.66 0.64 0.64 0.61 2.00 2.9
## q_25 22331 0.55 0.54 0.52 0.49 1.01 2.5
## q_26 22331 0.41 0.39 0.35 0.34 1.38 2.9
## q_27 22331 0.47 0.47 0.44 0.39 2.19 3.1
## q_28 22331 0.52 0.55 0.54 0.48 0.53 1.7
## q_29 22331 0.37 0.42 0.39 0.32 0.42 1.6
## q_30 22331 0.51 0.50 0.48 0.45 1.26 2.7
## q_31 22331 0.61 0.59 0.58 0.55 2.30 3.0
## q_32 22331 0.27 0.26 0.20 0.20 0.99 2.5
##
## Non missing response frequency for each item
## 0 5 10 miss
## q_1 0.85 0.13 0.01 0
## q_2 0.64 0.23 0.13 0
## q_3 0.74 0.22 0.05 0
## q_4 0.78 0.17 0.05 0
## q_5 0.81 0.15 0.05 0
## q_6 0.76 0.16 0.08 0
## q_7 0.75 0.21 0.04 0
## q_8 0.89 0.10 0.01 0
## q_9 0.77 0.16 0.07 0
## q_10 0.91 0.08 0.01 0
## q_11 0.96 0.02 0.02 0
## q_12 0.91 0.04 0.05 0
## q_13 0.82 0.15 0.03 0
## q_14 0.68 0.27 0.05 0
## q_15 0.72 0.25 0.03 0
## q_16 0.60 0.23 0.17 0
## q_17 0.88 0.10 0.02 0
## q_18 0.92 0.07 0.02 0
## q_19 0.79 0.16 0.05 0
## q_20 0.79 0.18 0.03 0
## q_21 0.75 0.20 0.05 0
## q_22 0.93 0.04 0.03 0
## q_23 0.96 0.02 0.01 0
## q_24 0.65 0.29 0.05 0
## q_25 0.84 0.11 0.05 0
## q_26 0.79 0.14 0.07 0
## q_27 0.63 0.30 0.07 0
## q_28 0.90 0.09 0.01 0
## q_29 0.93 0.06 0.01 0
## q_30 0.80 0.15 0.05 0
## q_31 0.60 0.35 0.05 0
## q_32 0.85 0.10 0.05 0
ds_48 %>%
select(sex, score) %>%
group_by(sex) %>%
summarytools::descr()
## Descriptive Statistics
## score by sex
## Data Frame: ds_48
## N: 6495
##
## sex = M sex = F
## ----------------- --------- ---------
## Mean 37.38 29.08
## Std.Dev 33.10 27.55
## Min 0.00 0.00
## Q1 15.00 10.00
## Median 30.00 20.00
## Q3 55.00 40.00
## Max 240.00 230.00
## MAD 29.65 22.24
## IQR 40.00 30.00
## CV 0.89 0.95
## Skewness 1.43 1.58
## SE.Skewness 0.03 0.03
## Kurtosis 2.52 3.32
## N.Valid 6495.00 5978.00
## Pct.Valid 100.00 100.00
ds_60 %>%
select(sex, score) %>%
group_by(sex) %>%
summarytools::descr()
## Descriptive Statistics
## score by sex
## Data Frame: ds_60
## N: 11291
##
## sex = 1 sex = 2
## ----------------- ---------- ----------
## Mean 44.82 34.02
## Std.Dev 36.66 30.44
## Min 0.00 0.00
## Q1 15.00 10.00
## Median 35.00 25.00
## Q3 65.00 50.00
## Max 275.00 290.00
## MAD 29.65 22.24
## IQR 50.00 40.00
## CV 0.82 0.89
## Skewness 1.24 1.49
## SE.Skewness 0.02 0.02
## Kurtosis 1.92 2.91
## N.Valid 11291.00 11040.00
## Pct.Valid 100.00 100.00
As described, I’ll get a random sample from the main data (items-only)
36 months
set.seed(123)
ds_36_random <- ds_36 %>% sample_n(.,500)
48 months
set.seed(123)
ds_48_random <- ds_48 %>% sample_n(.,500)
60 months
set.seed(15)
ds_60_random <- ds_60 %>% sample_n(.,500)
library(fairsubset)
check_ds <- fairSubset(ds_60, subset_setting = "ks", manual_N = 500, random_subsets = 10)
ds_60_random2 <- check_ds$best_subset %>% as.data.frame
bind_rows(
ds_60_random %>% mutate(base = "random") %>% select(-months),
ds_60 %>% mutate(base = "original") %>% select(-months)) %>%
group_by(base) %>%
select(score) %>%
summarytools::descr()
## Adding missing grouping variables: `base`
## Descriptive Statistics
## score by base
## N: 22331
##
## base = original base = random
## ----------------- ----------------- ---------------
## Mean 39.48 41.00
## Std.Dev 34.16 36.40
## Min 0.00 0.00
## Q1 15.00 15.00
## Median 30.00 30.00
## Q3 55.00 60.00
## Max 290.00 205.00
## MAD 29.65 29.65
## IQR 40.00 45.00
## CV 0.87 0.89
## Skewness 1.38 1.20
## SE.Skewness 0.02 0.11
## Kurtosis 2.44 1.24
## N.Valid 22331.00 500.00
## Pct.Valid 100.00 100.00
bind_rows(
ds_60_random %>% mutate(base = "random") %>% select(-months),
ds_60 %>% mutate(base = "original") %>% select(-months)) %>%
{t.test(score ~ base, var.equal = T,.)}
##
## Two Sample t-test
##
## data: score by base
## t = -0.98204, df = 22829, p-value = 0.3261
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -4.551496 1.513030
## sample estimates:
## mean in group original mean in group random
## 39.48077 41.00000
bind_rows(
ds_60_random %>% mutate(base = "random") %>% select(-months),
ds_60 %>% mutate(base = "original") %>% select(-months)) %>%
{descr::crosstab(.$sex,.$base, chisq = T, plot = F)}
## Cell Contents
## |-------------------------|
## | Count |
## |-------------------------|
##
## ==================================
## .$base
## .$sex original random Total
## ----------------------------------
## 1 11291 276 11567
## ----------------------------------
## 2 11040 224 11264
## ----------------------------------
## Total 22331 500 22831
## ==================================
##
## Statistics for All Table Factors
##
## Pearson's Chi-squared test
## ------------------------------------------------------------
## Chi^2 = 4.208733 d.f. = 1 p = 0.0402
##
## Pearson's Chi-squared test with Yates' continuity correction
## ------------------------------------------------------------
## Chi^2 = 4.025226 d.f. = 1 p = 0.0448
## Minimum expected frequency: 246.6821
ds_60_random %>% select(starts_with("q")) %>%
mutate_all(., ~case_when(. == "0" ~ 1,
. == "5" ~ 2,
. == "10" ~ 3)) %>% alpha(.)
##
## Reliability analysis
## Call: alpha(x = .)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.86 0.87 0.9 0.17 6.5 0.0085 1.3 0.23 0.15
##
## lower alpha upper 95% confidence boundaries
## 0.85 0.86 0.88
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## q_1 0.86 0.86 0.89 0.17 6.2 0.0088 0.016 0.15
## q_2 0.86 0.87 0.90 0.17 6.5 0.0085 0.016 0.16
## q_3 0.86 0.87 0.90 0.17 6.5 0.0085 0.016 0.15
## q_4 0.86 0.87 0.90 0.17 6.5 0.0085 0.016 0.16
## q_5 0.86 0.86 0.89 0.17 6.2 0.0089 0.017 0.15
## q_6 0.86 0.87 0.90 0.17 6.6 0.0084 0.016 0.16
## q_7 0.85 0.86 0.89 0.16 6.1 0.0091 0.016 0.15
## q_8 0.86 0.86 0.89 0.17 6.4 0.0086 0.016 0.15
## q_9 0.85 0.86 0.89 0.16 6.1 0.0091 0.016 0.15
## q_10 0.86 0.86 0.89 0.17 6.3 0.0087 0.016 0.15
## q_11 0.86 0.87 0.90 0.17 6.5 0.0086 0.017 0.16
## q_12 0.86 0.87 0.90 0.17 6.5 0.0086 0.017 0.16
## q_13 0.86 0.86 0.89 0.16 6.1 0.0090 0.016 0.15
## q_14 0.86 0.86 0.89 0.17 6.4 0.0086 0.017 0.15
## q_15 0.85 0.86 0.89 0.16 6.0 0.0092 0.015 0.14
## q_16 0.86 0.86 0.89 0.17 6.3 0.0088 0.016 0.15
## q_17 0.86 0.87 0.90 0.17 6.5 0.0086 0.017 0.16
## q_18 0.86 0.86 0.89 0.17 6.3 0.0087 0.016 0.15
## q_19 0.86 0.86 0.89 0.17 6.3 0.0087 0.016 0.15
## q_20 0.85 0.86 0.89 0.16 6.1 0.0091 0.016 0.15
## q_21 0.86 0.86 0.90 0.17 6.4 0.0086 0.017 0.15
## q_22 0.86 0.86 0.89 0.17 6.4 0.0087 0.016 0.16
## q_23 0.86 0.86 0.89 0.17 6.4 0.0087 0.016 0.16
## q_24 0.85 0.86 0.89 0.16 6.0 0.0092 0.015 0.15
## q_25 0.85 0.86 0.89 0.16 6.1 0.0090 0.016 0.15
## q_26 0.86 0.86 0.89 0.17 6.4 0.0087 0.017 0.15
## q_27 0.86 0.86 0.89 0.17 6.2 0.0088 0.017 0.15
## q_28 0.86 0.86 0.89 0.17 6.2 0.0088 0.016 0.15
## q_29 0.86 0.86 0.89 0.17 6.3 0.0087 0.016 0.15
## q_30 0.86 0.86 0.89 0.17 6.2 0.0089 0.016 0.15
## q_31 0.85 0.86 0.89 0.16 6.0 0.0092 0.016 0.15
## q_32 0.86 0.87 0.90 0.17 6.5 0.0085 0.016 0.16
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## q_1 500 0.46 0.50 0.47 0.42 1.2 0.41
## q_2 500 0.36 0.32 0.29 0.27 1.5 0.71
## q_3 500 0.30 0.31 0.28 0.23 1.3 0.54
## q_4 500 0.27 0.29 0.24 0.20 1.3 0.55
## q_5 500 0.52 0.51 0.50 0.47 1.2 0.51
## q_6 500 0.28 0.25 0.20 0.20 1.3 0.65
## q_7 500 0.60 0.58 0.57 0.55 1.3 0.58
## q_8 500 0.35 0.39 0.37 0.30 1.1 0.39
## q_9 500 0.61 0.59 0.59 0.55 1.3 0.63
## q_10 500 0.37 0.42 0.39 0.33 1.1 0.32
## q_11 500 0.24 0.27 0.22 0.20 1.1 0.29
## q_12 500 0.29 0.30 0.26 0.24 1.1 0.44
## q_13 500 0.59 0.59 0.58 0.54 1.2 0.47
## q_14 500 0.38 0.38 0.35 0.31 1.4 0.57
## q_15 500 0.69 0.68 0.68 0.64 1.3 0.53
## q_16 500 0.49 0.44 0.42 0.41 1.6 0.78
## q_17 500 0.31 0.32 0.27 0.25 1.2 0.43
## q_18 500 0.37 0.42 0.40 0.33 1.1 0.37
## q_19 500 0.43 0.46 0.44 0.37 1.3 0.54
## q_20 500 0.62 0.60 0.60 0.57 1.3 0.55
## q_21 500 0.35 0.36 0.32 0.28 1.3 0.58
## q_22 500 0.37 0.37 0.34 0.32 1.1 0.42
## q_23 500 0.33 0.36 0.33 0.30 1.0 0.27
## q_24 500 0.67 0.64 0.65 0.62 1.4 0.60
## q_25 500 0.59 0.58 0.57 0.54 1.2 0.52
## q_26 500 0.40 0.38 0.35 0.33 1.3 0.57
## q_27 500 0.49 0.48 0.45 0.41 1.5 0.64
## q_28 500 0.49 0.53 0.52 0.45 1.1 0.34
## q_29 500 0.36 0.41 0.39 0.32 1.1 0.30
## q_30 500 0.52 0.51 0.49 0.47 1.3 0.55
## q_31 500 0.65 0.62 0.62 0.60 1.5 0.61
## q_32 500 0.28 0.25 0.20 0.21 1.2 0.55
##
## Non missing response frequency for each item
## 1 2 3 miss
## q_1 0.83 0.15 0.01 0
## q_2 0.65 0.22 0.13 0
## q_3 0.71 0.25 0.04 0
## q_4 0.78 0.16 0.05 0
## q_5 0.82 0.13 0.04 0
## q_6 0.77 0.13 0.10 0
## q_7 0.74 0.20 0.06 0
## q_8 0.89 0.08 0.02 0
## q_9 0.76 0.15 0.09 0
## q_10 0.90 0.09 0.00 0
## q_11 0.97 0.02 0.02 0
## q_12 0.92 0.04 0.04 0
## q_13 0.81 0.17 0.03 0
## q_14 0.62 0.34 0.04 0
## q_15 0.71 0.26 0.03 0
## q_16 0.60 0.22 0.18 0
## q_17 0.85 0.13 0.02 0
## q_18 0.91 0.07 0.02 0
## q_19 0.78 0.18 0.05 0
## q_20 0.76 0.19 0.05 0
## q_21 0.76 0.18 0.06 0
## q_22 0.93 0.03 0.04 0
## q_23 0.97 0.02 0.01 0
## q_24 0.65 0.29 0.06 0
## q_25 0.83 0.11 0.05 0
## q_26 0.79 0.15 0.06 0
## q_27 0.61 0.31 0.08 0
## q_28 0.90 0.09 0.01 0
## q_29 0.94 0.05 0.01 0
## q_30 0.79 0.15 0.06 0
## q_31 0.59 0.35 0.06 0
## q_32 0.83 0.11 0.06 0
Last review on July 12, 2021
First result presented
ds_60_random %>% select(starts_with("q")) %>%
mutate_all(., ~case_when(. == "0" ~ 1,
. == "5" ~ 2,
. == "10" ~ 3)) %>% KMO()
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = .)
## Overall MSA = 0.86
## MSA for each item =
## q_1 q_2 q_3 q_4 q_5 q_6 q_7 q_8 q_9 q_10 q_11 q_12 q_13 q_14 q_15 q_16
## 0.92 0.78 0.81 0.81 0.89 0.81 0.91 0.79 0.88 0.86 0.63 0.78 0.90 0.87 0.91 0.90
## q_17 q_18 q_19 q_20 q_21 q_22 q_23 q_24 q_25 q_26 q_27 q_28 q_29 q_30 q_31 q_32
## 0.76 0.77 0.80 0.91 0.84 0.80 0.76 0.90 0.90 0.86 0.86 0.85 0.76 0.91 0.91 0.80
ds_60_random %>% select(starts_with("q")) %>%
mutate_all(., ~case_when(. == "0" ~ 1,
. == "5" ~ 2,
. == "10" ~ 3)) %>% psych::cortest.bartlett()
## R was not square, finding R from data
## $chisq
## [1] 4629.517
##
## $p.value
## [1] 0
##
## $df
## [1] 496
RNGversion(3.6)
set.seed(3)
rho_60 <- ds_60_random %>% select(starts_with("q")) %>%
mutate_all(., ~case_when(. == "0" ~ 1,
. == "5" ~ 2,
. == "10" ~ 3)) %>%
polychoric(.)
## Warning in cor.smooth(mat): Matrix was not positive definite, smoothing was done
rho_60 <- rho_60$rho
parallel <- rho_60 %>%
fa.parallel(.) #arbitrarily set
## Warning in fa.parallel(.): It seems as if you are using a correlation
## matrix, but have not specified the number of cases. The number of subjects is
## arbitrarily set to be 100
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Parallel analysis suggests that the number of factors = 3 and the number of components = 2
parallel_full <- rho_60 %>%
fa.parallel(.,n.obs = 500)
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Parallel analysis suggests that the number of factors = 8 and the number of components = 4
Second result presented (page 10)
Parallel Analysis using polychoric correlation as input
PCA: 10.72 4.46 1.91 1.48 1.33 1.26 1.13 1.02 0.92 EFA: 10.13 3.70 1.22 0.73
parallel
## Call: fa.parallel(x = .)
## Parallel analysis suggests that the number of factors = 3 and the number of components = 2
##
## Eigen Values of
##
## eigen values of factors
## [1] 10.13 3.70 1.22 0.73 0.69 0.49 0.44 0.30 0.18 0.15 0.05 0.04
## [13] -0.03 -0.06 -0.09 -0.15 -0.17 -0.21 -0.22 -0.27 -0.37 -0.40 -0.43 -0.49
## [25] -0.51 -0.52 -0.57 -0.62 -0.68 -0.71 -0.73 -0.75
##
## eigen values of simulated factors
## [1] 1.36 1.10 0.99 0.86 0.74 0.67 0.57 0.48 0.41 0.32 0.27 0.21
## [13] 0.14 0.08 0.01 -0.04 -0.09 -0.14 -0.18 -0.24 -0.29 -0.33 -0.38 -0.42
## [25] -0.45 -0.49 -0.53 -0.57 -0.61 -0.66 -0.69 -0.74
##
## eigen values of components
## [1] 10.72 4.46 1.91 1.48 1.33 1.26 1.13 1.02 0.92 0.84 0.82 0.72
## [13] 0.68 0.62 0.57 0.51 0.49 0.43 0.36 0.32 0.28 0.27 0.21 0.20
## [25] 0.18 0.11 0.09 0.06 0.00 0.00 0.00 0.00
##
## eigen values of simulated components
## [1] 2.25 2.07 1.95 1.82 1.71 1.63 1.54 1.44 1.37 1.29 1.23 1.17 1.10 1.04 0.97
## [16] 0.93 0.87 0.82 0.77 0.72 0.67 0.62 0.58 0.54 0.50 0.47 0.43 0.38 0.35 0.30
## [31] 0.27 0.21
#build the table
obs <- data.frame(parallel$fa.values, parallel$pc.values)
obs$type <- c('Observed Data')
obs$num <- c(row.names(obs))
obs$num <- as.numeric(obs$num)
colnames(obs) <- c('eigenvalue_fa', 'eigenvalue_pca','type', 'num')
obs <- obs %>%
pivot_longer(-c(num, type)) %>%
mutate(name = str_remove(name,"eigenvalue_")) %>%
mutate(name = str_remove(name,"eigenvalue_")) %>%
mutate(name = toupper(name))
Plotting via ggplot2
plot_scree_poly <- ggplot(obs, aes(x = num, y = value, color = name)) +
geom_point(size=2) +
geom_line() +
scale_y_continuous(name='Eigenvalue')+
scale_x_continuous(name='Factor Number', breaks=min(obs$num):max(obs$num))+
geom_hline(yintercept = 1, linetype = 'dashed') +
labs(color = "Method") +
ggtitle("Polychoric correlation") +
theme_classic()
If I want to use the raw data (instead of a polychoric matrix as input), the results become fuzzy. 7 factors and 3 were recommended. However, due to the ordinal nature of the responses, the use of a polychoric matrix is recommended.
parallel_raw <- ds_60_random %>% select(starts_with("q")) %>%
mutate_all(., ~case_when(. == "0" ~ 1,
. == "5" ~ 2,
. == "10" ~ 3)) %>%
fa.parallel(., cor = "cor")
## Parallel analysis suggests that the number of factors = 7 and the number of components = 3
Thir result presented (page 10)
Pearson eigenvalues PCA: 6.83, 3.12, 1.60 1.38 1.37 1.21 1.15 1.074 0.987 EFA: 6.12, 2.26, 0.83
parallel_raw
## Call: fa.parallel(x = ., cor = "cor")
## Parallel analysis suggests that the number of factors = 7 and the number of components = 3
##
## Eigen Values of
## Original factors Resampled data Simulated data Original components
## 1 6.12 0.57 0.55 6.83
## 2 2.26 0.45 0.45 3.12
## 3 0.83 0.41 0.41 1.60
## 4 0.56 0.37 0.36 1.38
## 5 0.51 0.33 0.33 1.37
## 6 0.34 0.29 0.29 1.21
## 7 0.31 0.26 0.26 1.15
## Resampled components Simulated components
## 1 1.51 1.50
## 2 1.44 1.44
## 3 1.40 1.39
## 4 1.35 1.35
## 5 1.32 1.31
## 6 1.27 1.27
## 7 1.24 1.24
A better plot using the raw data as input
#build the table
obs_raw <- data.frame(parallel_raw$fa.values, parallel_raw$pc.values)
obs_raw$type <- c('Observed Data')
obs_raw$num <- c(row.names(obs_raw))
obs_raw$num <- as.numeric(obs_raw$num)
colnames(obs_raw) <- c('eigenvalue_fa', 'eigenvalue_pca','type', 'num')
obs_raw <- obs_raw %>%
pivot_longer(-c(num, type)) %>%
mutate(name = str_remove(name,"eigenvalue_")) %>%
mutate(name = str_remove(name,"eigenvalue_")) %>%
mutate(name = toupper(name))
Plotting via ggplot2
plot_scree_pearson <- ggplot(obs_raw, aes(x = num, y = value, color = name)) +
geom_point(size=2) +
geom_line() +
scale_y_continuous(name='Eigenvalue')+
scale_x_continuous(name='Factor Number', breaks=min(obs$num):max(obs$num))+
geom_hline(yintercept = 1, linetype = 'dashed') +
labs(color = "Method") +
ggtitle("Pearson correlation") +
theme_classic()
data_plot_scree <- bind_rows(
obs_raw %>%
mutate(correlation = "Pearson")
,
obs %>%
mutate(correlation = "Polychoric")
) %>%
arrange(num)
ggplot(data_plot_scree, aes(x = num, y = value, shape = factor(correlation), linetype = factor(correlation), color = factor(name))) +
geom_point(size=3) +
geom_line(size=0.5) +
scale_y_continuous(name='Eigenvalue', breaks = 0:12)+
scale_x_continuous(name='Factor Number', breaks=min(obs$num):max(obs$num))+
geom_hline(yintercept = 1, linetype = 'dashed') +
labs(color = "EFA or PCA", linetype = "Type of correlation", shape = "Type of correlation") +
annotate(geom="text", x=28, y=1.5, label="Kaiser criterion")+
#ggtitle("Pearson correlation") +
theme_classic()
source("C:/Users/luisf/Dropbox/Puc-Rio/Artigo - Theory and models/NEST.R")
ds_60_random %>% select(starts_with("q")) %>%
mutate_all(., ~case_when(. == "0" ~ 1,
. == "5" ~ 2,
. == "10" ~ 3)) %>%
NEST(.)
fatores_itens <- function(...) {
j <- unclass(...$loadings) %>% #get vector
as.data.frame() %>% #transform into dataframe
rownames_to_column("item") %>% #assign a consistent name
mutate(lambda = pmap_chr(select(., -c(item)), ~ if_else(abs(c(...)) %>%
max >= 0.3,"in","out"))) %>%
filter(lambda == "in") %>% #exclusion criteria (factor loadings)
mutate(main_factor = pmap_chr(select(., -c(item, lambda)), ~ abs(c(...)) %>%
which.max %>%
names )) %>% #return the items
#return the items
group_by(main_factor) %>%
mutate(itens_factor = paste0(item, collapse = ",")) %>% #insert y
select(main_factor, itens_factor) %>% #select final results
distinct(main_factor, .keep_all = TRUE) %>% #remove duplicates
mutate(itens_factor = str_remove_all(string = itens_factor, pattern = "q_")) %>% #compile a better report
mutate(numero = str_count(itens_factor, "\\d+")) %>% #count how many non-exclusive items
janitor::adorn_totals()
j
}
sol_elbow <- fa(rho_60,
fm = "wls",
nfactors = 2, rotate = "Promax")
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
sol_elbow
## Factor Analysis using method = wls
## Call: fa(r = rho_60, nfactors = 2, rotate = "Promax", fm = "wls")
## Standardized loadings (pattern matrix) based upon correlation matrix
## WLS1 WLS2 h2 u2 com
## q_1 0.19 0.60 0.48 0.52 1.2
## q_2 0.56 -0.22 0.27 0.73 1.3
## q_3 -0.17 0.66 0.38 0.62 1.1
## q_4 -0.18 0.63 0.35 0.65 1.2
## q_5 0.56 0.19 0.43 0.57 1.2
## q_6 0.60 -0.35 0.33 0.67 1.6
## q_7 0.64 0.16 0.51 0.49 1.1
## q_8 -0.04 0.71 0.49 0.51 1.0
## q_9 0.70 0.09 0.54 0.46 1.0
## q_10 -0.12 0.85 0.66 0.34 1.0
## q_11 0.29 0.20 0.17 0.83 1.8
## q_12 0.55 -0.09 0.28 0.72 1.0
## q_13 0.49 0.36 0.51 0.49 1.8
## q_14 0.00 0.56 0.32 0.68 1.0
## q_15 0.57 0.37 0.63 0.37 1.7
## q_16 0.78 -0.26 0.52 0.48 1.2
## q_17 0.27 0.18 0.14 0.86 1.7
## q_18 0.08 0.64 0.46 0.54 1.0
## q_19 -0.05 0.75 0.53 0.47 1.0
## q_20 0.66 0.17 0.55 0.45 1.1
## q_21 0.01 0.56 0.31 0.69 1.0
## q_22 0.70 -0.16 0.43 0.57 1.1
## q_23 0.77 -0.07 0.56 0.44 1.0
## q_24 0.61 0.26 0.56 0.44 1.4
## q_25 0.80 -0.01 0.64 0.36 1.0
## q_26 0.36 0.20 0.23 0.77 1.6
## q_27 0.24 0.46 0.35 0.65 1.5
## q_28 0.37 0.51 0.53 0.47 1.8
## q_29 0.01 0.74 0.56 0.44 1.0
## q_30 0.72 0.00 0.51 0.49 1.0
## q_31 0.62 0.21 0.53 0.47 1.2
## q_32 0.58 -0.29 0.29 0.71 1.5
##
## WLS1 WLS2
## SS loadings 7.89 6.14
## Proportion Var 0.25 0.19
## Cumulative Var 0.25 0.44
## Proportion Explained 0.56 0.44
## Cumulative Proportion 0.56 1.00
##
## With factor correlations of
## WLS1 WLS2
## WLS1 1.00 0.38
## WLS2 0.38 1.00
##
## Mean item complexity = 1.3
## Test of the hypothesis that 2 factors are sufficient.
##
## The degrees of freedom for the null model are 496 and the objective function was 108.04
## The degrees of freedom for the model are 433 and the objective function was 93.46
##
## The root mean square of the residuals (RMSR) is 0.08
## The df corrected root mean square of the residuals is 0.09
##
## Fit based upon off diagonal values = 0.94
fatores_itens(sol_elbow)
## main_factor itens_factor numero
## WLS2 1,3,4,8,10,14,18,19,21,27,28,29 12
## WLS1 2,5,6,7,9,12,13,15,16,20,22,23,24,25,26,30,31,32 18
## Total - 30
sol_elbow_raw <- ds_60_random %>% select(starts_with("q")) %>%
mutate_all(., ~case_when(. == "0" ~ 1,
. == "5" ~ 2,
. == "10" ~ 3)) %>%
fa(.,
fm = "wls",
nfactors = 2,
rotate = "Promax")
sol_elbow_raw
## Factor Analysis using method = wls
## Call: fa(r = ., nfactors = 2, rotate = "Promax", fm = "wls")
## Standardized loadings (pattern matrix) based upon correlation matrix
## WLS1 WLS2 h2 u2 com
## q_1 0.16 0.48 0.301 0.70 1.2
## q_2 0.42 -0.16 0.160 0.84 1.3
## q_3 -0.09 0.50 0.230 0.77 1.1
## q_4 -0.08 0.43 0.167 0.83 1.1
## q_5 0.45 0.12 0.251 0.75 1.1
## q_6 0.38 -0.21 0.137 0.86 1.6
## q_7 0.59 0.11 0.395 0.61 1.1
## q_8 -0.03 0.53 0.276 0.72 1.0
## q_9 0.58 0.06 0.362 0.64 1.0
## q_10 -0.08 0.65 0.399 0.60 1.0
## q_11 0.14 0.11 0.040 0.96 1.9
## q_12 0.31 -0.05 0.088 0.91 1.1
## q_13 0.44 0.30 0.360 0.64 1.8
## q_14 0.06 0.40 0.180 0.82 1.0
## q_15 0.55 0.31 0.502 0.50 1.6
## q_16 0.64 -0.21 0.366 0.63 1.2
## q_17 0.17 0.13 0.059 0.94 1.9
## q_18 0.03 0.50 0.259 0.74 1.0
## q_19 -0.02 0.63 0.394 0.61 1.0
## q_20 0.61 0.11 0.419 0.58 1.1
## q_21 0.03 0.40 0.168 0.83 1.0
## q_22 0.44 -0.11 0.174 0.83 1.1
## q_23 0.41 -0.08 0.158 0.84 1.1
## q_24 0.61 0.20 0.489 0.51 1.2
## q_25 0.68 -0.06 0.439 0.56 1.0
## q_26 0.27 0.13 0.110 0.89 1.5
## q_27 0.24 0.34 0.223 0.78 1.8
## q_28 0.27 0.38 0.289 0.71 1.8
## q_29 0.02 0.52 0.281 0.72 1.0
## q_30 0.60 -0.05 0.347 0.65 1.0
## q_31 0.60 0.15 0.433 0.57 1.1
## q_32 0.37 -0.18 0.126 0.87 1.4
##
## WLS1 WLS2
## SS loadings 5.05 3.53
## Proportion Var 0.16 0.11
## Cumulative Var 0.16 0.27
## Proportion Explained 0.59 0.41
## Cumulative Proportion 0.59 1.00
##
## With factor correlations of
## WLS1 WLS2
## WLS1 1.00 0.31
## WLS2 0.31 1.00
##
## Mean item complexity = 1.3
## Test of the hypothesis that 2 factors are sufficient.
##
## The degrees of freedom for the null model are 496 and the objective function was 9.5 with Chi Square of 4629.52
## The degrees of freedom for the model are 433 and the objective function was 2.85
##
## The root mean square of the residuals (RMSR) is 0.06
## The df corrected root mean square of the residuals is 0.06
##
## The harmonic number of observations is 500 with the empirical chi square 1574.23 with prob < 3.6e-129
## The total number of observations was 500 with Likelihood Chi Square = 1387.46 with prob < 2.1e-100
##
## Tucker Lewis Index of factoring reliability = 0.735
## RMSEA index = 0.066 and the 90 % confidence intervals are 0.063 0.07
## BIC = -1303.47
## Fit based upon off diagonal values = 0.93
## Measures of factor score adequacy
## WLS1 WLS2
## Correlation of (regression) scores with factors 0.94 0.91
## Multiple R square of scores with factors 0.89 0.83
## Minimum correlation of possible factor scores 0.77 0.67
qgraph::qgraph.loadings(sol_elbow$loadings,
model = "reflective",
posCol="blue",negCol="purple",
layout="circle",
width=20, minimum = 0.3,
title = "Exploratory Factor Analysis (Elbow method)")
sol_parallel <- fa(rho_60,
fm = "wls",
nfactors = 3,
rotate = "Promax")
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
sol_parallel
## Factor Analysis using method = wls
## Call: fa(r = rho_60, nfactors = 3, rotate = "Promax", fm = "wls")
## Standardized loadings (pattern matrix) based upon correlation matrix
## WLS1 WLS2 WLS3 h2 u2 com
## q_1 0.21 0.56 0.08 0.48 0.52 1.3
## q_2 0.10 -0.11 0.56 0.37 0.63 1.1
## q_3 0.20 0.53 -0.36 0.44 0.56 2.1
## q_4 -0.06 0.61 -0.08 0.35 0.65 1.1
## q_5 0.32 0.21 0.36 0.45 0.55 2.6
## q_6 0.21 -0.27 0.46 0.37 0.63 2.1
## q_7 0.65 0.08 0.08 0.53 0.47 1.1
## q_8 -0.07 0.72 0.12 0.51 0.49 1.1
## q_9 0.31 0.15 0.53 0.61 0.39 1.8
## q_10 -0.08 0.85 0.05 0.67 0.33 1.0
## q_11 -0.34 0.41 0.77 0.54 0.46 1.9
## q_12 0.19 -0.01 0.46 0.34 0.66 1.3
## q_13 0.60 0.26 -0.02 0.53 0.47 1.4
## q_14 0.02 0.55 0.05 0.32 0.68 1.0
## q_15 0.68 0.26 -0.02 0.66 0.34 1.3
## q_16 0.63 -0.30 0.22 0.52 0.48 1.7
## q_17 0.19 0.18 0.14 0.15 0.85 2.8
## q_18 -0.08 0.69 0.27 0.52 0.48 1.3
## q_19 -0.04 0.75 0.08 0.54 0.46 1.0
## q_20 0.57 0.13 0.20 0.55 0.45 1.4
## q_21 -0.08 0.58 0.16 0.34 0.66 1.2
## q_22 0.25 -0.07 0.56 0.51 0.49 1.4
## q_23 0.65 -0.11 0.21 0.56 0.44 1.3
## q_24 0.81 0.11 -0.13 0.65 0.35 1.1
## q_25 0.69 -0.06 0.21 0.64 0.36 1.2
## q_26 0.38 0.15 0.04 0.23 0.77 1.3
## q_27 0.49 0.33 -0.20 0.40 0.60 2.1
## q_28 0.53 0.40 -0.07 0.56 0.44 1.9
## q_29 0.18 0.67 -0.10 0.56 0.44 1.2
## q_30 0.84 -0.13 -0.06 0.59 0.41 1.1
## q_31 0.68 0.11 0.03 0.56 0.44 1.1
## q_32 0.63 -0.38 -0.03 0.34 0.66 1.7
##
## WLS1 WLS2 WLS3
## SS loadings 6.85 5.62 2.92
## Proportion Var 0.21 0.18 0.09
## Cumulative Var 0.21 0.39 0.48
## Proportion Explained 0.45 0.37 0.19
## Cumulative Proportion 0.45 0.81 1.00
##
## With factor correlations of
## WLS1 WLS2 WLS3
## WLS1 1.00 0.38 0.51
## WLS2 0.38 1.00 0.06
## WLS3 0.51 0.06 1.00
##
## Mean item complexity = 1.5
## Test of the hypothesis that 3 factors are sufficient.
##
## The degrees of freedom for the null model are 496 and the objective function was 108.04
## The degrees of freedom for the model are 403 and the objective function was 92.16
##
## The root mean square of the residuals (RMSR) is 0.07
## The df corrected root mean square of the residuals is 0.08
##
## Fit based upon off diagonal values = 0.96
fatores_itens(sol_parallel)
## main_factor itens_factor numero
## WLS2 1,3,4,8,10,14,18,19,21,29 10
## WLS3 2,5,6,9,11,12,22 7
## WLS1 7,13,15,16,20,23,24,25,26,27,28,30,31,32 14
## Total - 31
sol_parallel_raw <- ds_60_random %>% select(starts_with("q")) %>%
mutate_all(., ~case_when(. == "0" ~ 1,
. == "5" ~ 2,
. == "10" ~ 3)) %>%
fa(.,
fm = "wls",
nfactors = 3,
rotate = "Promax")
sol_parallel_raw
## Factor Analysis using method = wls
## Call: fa(r = ., nfactors = 3, rotate = "Promax", fm = "wls")
## Standardized loadings (pattern matrix) based upon correlation matrix
## WLS1 WLS2 WLS3 h2 u2 com
## q_1 0.11 0.46 0.10 0.303 0.70 1.2
## q_2 -0.15 -0.03 0.59 0.262 0.74 1.1
## q_3 0.27 0.38 -0.31 0.267 0.73 2.8
## q_4 0.01 0.41 -0.07 0.166 0.83 1.1
## q_5 0.04 0.19 0.47 0.303 0.70 1.3
## q_6 -0.04 -0.13 0.44 0.177 0.82 1.2
## q_7 0.60 -0.02 0.08 0.421 0.58 1.0
## q_8 -0.15 0.59 0.15 0.316 0.68 1.3
## q_9 -0.13 0.21 0.78 0.561 0.44 1.2
## q_10 -0.09 0.67 0.04 0.412 0.59 1.0
## q_11 -0.18 0.21 0.33 0.092 0.91 2.3
## q_12 0.01 0.00 0.32 0.106 0.89 1.0
## q_13 0.42 0.21 0.10 0.363 0.64 1.6
## q_14 0.10 0.37 -0.01 0.179 0.82 1.2
## q_15 0.73 0.14 -0.06 0.573 0.43 1.1
## q_16 0.38 -0.24 0.32 0.362 0.64 2.7
## q_17 0.05 0.14 0.14 0.062 0.94 2.3
## q_18 -0.13 0.56 0.19 0.297 0.70 1.4
## q_19 0.00 0.63 0.03 0.399 0.60 1.0
## q_20 0.47 0.04 0.22 0.418 0.58 1.4
## q_21 0.04 0.39 0.02 0.169 0.83 1.0
## q_22 -0.04 -0.01 0.51 0.233 0.77 1.0
## q_23 0.07 -0.03 0.38 0.176 0.82 1.1
## q_24 0.94 -0.04 -0.18 0.667 0.33 1.1
## q_25 0.37 -0.07 0.38 0.436 0.56 2.1
## q_26 0.29 0.07 0.03 0.115 0.89 1.1
## q_27 0.42 0.22 -0.11 0.250 0.75 1.7
## q_28 0.24 0.34 0.10 0.288 0.71 2.0
## q_29 -0.03 0.54 0.08 0.292 0.71 1.1
## q_30 0.55 -0.15 0.13 0.366 0.63 1.3
## q_31 0.63 0.02 0.07 0.464 0.54 1.0
## q_32 0.25 -0.21 0.14 0.125 0.87 2.6
##
## WLS1 WLS2 WLS3
## SS loadings 3.79 3.19 2.64
## Proportion Var 0.12 0.10 0.08
## Cumulative Var 0.12 0.22 0.30
## Proportion Explained 0.39 0.33 0.27
## Cumulative Proportion 0.39 0.73 1.00
##
## With factor correlations of
## WLS1 WLS2 WLS3
## WLS1 1.00 0.40 0.63
## WLS2 0.40 1.00 0.11
## WLS3 0.63 0.11 1.00
##
## Mean item complexity = 1.4
## Test of the hypothesis that 3 factors are sufficient.
##
## The degrees of freedom for the null model are 496 and the objective function was 9.5 with Chi Square of 4629.52
## The degrees of freedom for the model are 403 and the objective function was 2.28
##
## The root mean square of the residuals (RMSR) is 0.05
## The df corrected root mean square of the residuals is 0.05
##
## The harmonic number of observations is 500 with the empirical chi square 1149.58 with prob < 6.2e-73
## The total number of observations was 500 with Likelihood Chi Square = 1106.45 with prob < 6.8e-67
##
## Tucker Lewis Index of factoring reliability = 0.79
## RMSEA index = 0.059 and the 90 % confidence intervals are 0.055 0.063
## BIC = -1398.03
## Fit based upon off diagonal values = 0.95
## Measures of factor score adequacy
## WLS1 WLS2 WLS3
## Correlation of (regression) scores with factors 0.94 0.91 0.91
## Multiple R square of scores with factors 0.89 0.83 0.82
## Minimum correlation of possible factor scores 0.79 0.65 0.64
#library(semPlot)
#semPaths(semPlotModel(sol_parallel$loadings),
# layout="circle",
# nCharNodes = 6)
qgraph::qgraph.loadings(sol_parallel$loadings,
model = "reflective",
posCol="blue",negCol="purple",
layout="circle",
width=20,minimum = 0.3,
title = "Exploratory Factor Analysis (Parallel Analysis)")
sol_pca <- principal(rho_60,
nfactors = 2, rotate = "varimax")
sol_pca
## Principal Components Analysis
## Call: principal(r = rho_60, nfactors = 2, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC2 h2 u2 com
## q_1 0.28 0.67 0.52 0.48 1.3
## q_2 0.55 -0.11 0.31 0.69 1.1
## q_3 -0.08 0.65 0.43 0.57 1.0
## q_4 -0.09 0.61 0.38 0.62 1.0
## q_5 0.60 0.32 0.46 0.54 1.5
## q_6 0.55 -0.23 0.36 0.64 1.3
## q_7 0.69 0.31 0.57 0.43 1.4
## q_8 0.06 0.72 0.53 0.47 1.0
## q_9 0.73 0.24 0.59 0.41 1.2
## q_10 0.00 0.84 0.70 0.30 1.0
## q_11 0.32 0.26 0.17 0.83 1.9
## q_12 0.54 0.03 0.30 0.70 1.0
## q_13 0.55 0.49 0.55 0.45 2.0
## q_14 0.08 0.58 0.35 0.65 1.0
## q_15 0.64 0.52 0.68 0.32 1.9
## q_16 0.76 -0.11 0.59 0.41 1.0
## q_17 0.30 0.25 0.15 0.85 1.9
## q_18 0.17 0.68 0.49 0.51 1.1
## q_19 0.05 0.76 0.58 0.42 1.0
## q_20 0.71 0.33 0.61 0.39 1.4
## q_21 0.09 0.57 0.33 0.67 1.0
## q_22 0.67 -0.01 0.45 0.55 1.0
## q_23 0.71 0.12 0.52 0.48 1.1
## q_24 0.67 0.41 0.62 0.38 1.7
## q_25 0.82 0.16 0.71 0.29 1.1
## q_26 0.39 0.29 0.23 0.77 1.8
## q_27 0.31 0.53 0.37 0.63 1.6
## q_28 0.44 0.62 0.58 0.42 1.8
## q_29 0.11 0.78 0.61 0.39 1.0
## q_30 0.73 0.16 0.56 0.44 1.1
## q_31 0.66 0.36 0.57 0.43 1.5
## q_32 0.53 -0.17 0.31 0.69 1.2
##
## RC1 RC2
## SS loadings 8.21 6.98
## Proportion Var 0.26 0.22
## Cumulative Var 0.26 0.47
## Proportion Explained 0.54 0.46
## Cumulative Proportion 0.54 1.00
##
## Mean item complexity = 1.3
## Test of the hypothesis that 2 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.08
##
## Fit based upon off diagonal values = 0.94
sol_pca_raw <- ds_60_random %>%
select(starts_with("q")) %>%
mutate_all(., ~case_when(. == "0" ~ 1,
. == "5" ~ 2,
. == "10" ~ 3)) %>%
principal(.,
nfactors = 2, rotate = "varimax")
sol_pca_raw
## Principal Components Analysis
## Call: principal(r = ., nfactors = 2, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC2 h2 u2 com
## q_1 0.19 0.56 0.346 0.65 1.2
## q_2 0.46 -0.10 0.219 0.78 1.1
## q_3 -0.08 0.55 0.306 0.69 1.0
## q_4 -0.08 0.47 0.228 0.77 1.1
## q_5 0.49 0.23 0.291 0.71 1.4
## q_6 0.41 -0.17 0.197 0.80 1.3
## q_7 0.60 0.25 0.427 0.57 1.3
## q_8 0.00 0.58 0.335 0.67 1.0
## q_9 0.61 0.19 0.413 0.59 1.2
## q_10 -0.04 0.67 0.449 0.55 1.0
## q_11 0.16 0.16 0.050 0.95 2.0
## q_12 0.34 0.01 0.117 0.88 1.0
## q_13 0.47 0.42 0.393 0.61 2.0
## q_14 0.08 0.47 0.231 0.77 1.1
## q_15 0.56 0.45 0.523 0.48 1.9
## q_16 0.65 -0.08 0.424 0.58 1.0
## q_17 0.19 0.19 0.073 0.93 2.0
## q_18 0.06 0.55 0.306 0.69 1.0
## q_19 0.02 0.67 0.444 0.56 1.0
## q_20 0.62 0.25 0.454 0.55 1.3
## q_21 0.05 0.46 0.218 0.78 1.0
## q_22 0.48 -0.03 0.228 0.77 1.0
## q_23 0.45 -0.01 0.204 0.80 1.0
## q_24 0.62 0.36 0.508 0.49 1.6
## q_25 0.69 0.10 0.484 0.52 1.0
## q_26 0.29 0.21 0.132 0.87 1.8
## q_27 0.27 0.44 0.261 0.74 1.7
## q_28 0.31 0.49 0.331 0.67 1.7
## q_29 0.04 0.58 0.337 0.66 1.0
## q_30 0.62 0.09 0.392 0.61 1.0
## q_31 0.61 0.30 0.463 0.54 1.5
## q_32 0.40 -0.13 0.173 0.83 1.2
##
## RC1 RC2
## SS loadings 5.40 4.56
## Proportion Var 0.17 0.14
## Cumulative Var 0.17 0.31
## Proportion Explained 0.54 0.46
## Cumulative Proportion 0.54 1.00
##
## Mean item complexity = 1.3
## Test of the hypothesis that 2 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.06
## with the empirical chi square 2046.33 with prob < 3.6e-207
##
## Fit based upon off diagonal values = 0.91
qgraph::qgraph.loadings(sol_pca$loadings,
model = "formative",
posCol="blue",negCol="purple",
layout="circle",
width=20,minimum = 0.3,
title = "Principal Component Analysis (Parallel Analysis)")
library(EGAnet)
## [1;m[4;m
## EGAnet (version 0.9.8)[0m[0m
## For help getting started, type browseVignettes("EGAnet")
## For bugs and errors, submit an issue to <https://github.com/hfgolino/EGAnet/issues> [1;m
##
## NEW[0m: EGAnet will write your Methods section for you. Type ?methods.section for more details
set.seed(12) #or 1
ega_60 <- ds_60_random %>% select(starts_with("q")) %>%
mutate_all(., ~case_when(. == "0" ~ 1,
. == "5" ~ 2,
. == "10" ~ 3)) %>%
EGA(.)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
## Variables detected as ordinal: q_1; q_2; q_3; q_4; q_5; q_6; q_7; q_8; q_9; q_10; q_11; q_12; q_13; q_14; q_15; q_16; q_17; q_18; q_19; q_20; q_21; q_22; q_23; q_24; q_25; q_26; q_27; q_28; q_29; q_30; q_31; q_32
## Warning in EGA.estimate(data = cor.data, n = n, model = model, model.args = model.args, : Correlation matrix is not positive definite.
## Forcing positive definite matrix using Matrix::nearPD()
## Results may be unreliable
## Network estimated with:
## • gamma = 0.5
## • lambda.min.ratio = 0.1
## Warning in EGA.estimate(cor.data, n = n, model = model, model.args = model.args, : Correlation matrix is not positive definite.
## Forcing positive definite matrix using Matrix::nearPD()
## Results may be unreliable
ega_60
## EGA Results:
##
## Number of Dimensions:
## [1] 6
##
## Items per Dimension:
## items dimension
## q_16 q_16 1
## q_22 q_22 1
## q_23 q_23 1
## q_24 q_24 1
## q_25 q_25 1
## q_28 q_28 1
## q_30 q_30 1
## q_31 q_31 1
## q_32 q_32 1
## q_3 q_3 2
## q_4 q_4 2
## q_11 q_11 2
## q_12 q_12 2
## q_14 q_14 2
## q_17 q_17 2
## q_21 q_21 2
## q_26 q_26 2
## q_7 q_7 3
## q_13 q_13 3
## q_15 q_15 3
## q_20 q_20 3
## q_1 q_1 4
## q_10 q_10 4
## q_18 q_18 4
## q_19 q_19 4
## q_2 q_2 5
## q_6 q_6 5
## q_8 q_8 5
## q_27 q_27 5
## q_29 q_29 5
## q_5 q_5 6
## q_9 q_9 6
#methods.section(ega.wmt)
I need to export the dataset as CSV file and then run the analysis in Factor.
ds_60_random %>% select(starts_with("q")) %>%
mutate_all(., ~case_when(. == "0" ~ 1,
. == "5" ~ 2,
. == "10" ~ 3)) %>% write.csv(., "ds_60_random.csv", row.names = F)
fa(rho_60,
fm = "wls",
nfactors = 2, rotate = "Promax")$loadings %>% unclass(.) %>%
data.frame() %>%
rownames_to_column("item") %>%
pivot_longer(-item) %>%
group_by(item) %>%
mutate(fator = which.max(value)) %>%
pivot_wider() %>%
arrange(desc(fator))
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## # A tibble: 32 x 4
## # Groups: item [32]
## item fator WLS1 WLS2
## <chr> <int> <dbl> <dbl>
## 1 q_1 2 0.193 0.596
## 2 q_3 2 -0.172 0.657
## 3 q_4 2 -0.182 0.634
## 4 q_8 2 -0.0400 0.711
## 5 q_10 2 -0.123 0.851
## 6 q_14 2 0.000734 0.562
## 7 q_18 2 0.0760 0.645
## 8 q_19 2 -0.0512 0.746
## 9 q_21 2 0.00599 0.556
## 10 q_27 2 0.239 0.457
## # ... with 22 more rows
#print(n=100)
#filter(fator == "2") %>%
#pull(item)
!Done.
This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License. https://creativecommons.org/licenses/by-nc-sa/4.0/