#load data
data("bfi")
str(bfi)
## 'data.frame': 2800 obs. of 28 variables:
## $ A1 : int 2 2 5 4 2 6 2 4 4 2 ...
## $ A2 : int 4 4 4 4 3 6 5 3 3 5 ...
## $ A3 : int 3 5 5 6 3 5 5 1 6 6 ...
## $ A4 : int 4 2 4 5 4 6 3 5 3 6 ...
## $ A5 : int 4 5 4 5 5 5 5 1 3 5 ...
## $ C1 : int 2 5 4 4 4 6 5 3 6 6 ...
## $ C2 : int 3 4 5 4 4 6 4 2 6 5 ...
## $ C3 : int 3 4 4 3 5 6 4 4 3 6 ...
## $ C4 : int 4 3 2 5 3 1 2 2 4 2 ...
## $ C5 : int 4 4 5 5 2 3 3 4 5 1 ...
## $ E1 : int 3 1 2 5 2 2 4 3 5 2 ...
## $ E2 : int 3 1 4 3 2 1 3 6 3 2 ...
## $ E3 : int 3 6 4 4 5 6 4 4 NA 4 ...
## $ E4 : int 4 4 4 4 4 5 5 2 4 5 ...
## $ E5 : int 4 3 5 4 5 6 5 1 3 5 ...
## $ N1 : int 3 3 4 2 2 3 1 6 5 5 ...
## $ N2 : int 4 3 5 5 3 5 2 3 5 5 ...
## $ N3 : int 2 3 4 2 4 2 2 2 2 5 ...
## $ N4 : int 2 5 2 4 4 2 1 6 3 2 ...
## $ N5 : int 3 5 3 1 3 3 1 4 3 4 ...
## $ O1 : int 3 4 4 3 3 4 5 3 6 5 ...
## $ O2 : int 6 2 2 3 3 3 2 2 6 1 ...
## $ O3 : int 3 4 5 4 4 5 5 4 6 5 ...
## $ O4 : int 4 3 5 3 3 6 6 5 6 5 ...
## $ O5 : int 3 3 2 5 3 1 1 3 1 2 ...
## $ gender : int 1 2 2 2 1 2 1 1 1 2 ...
## $ education: int NA NA NA NA NA 3 NA 2 1 NA ...
## $ age : int 16 18 17 17 17 21 18 19 19 17 ...
dta <- bfi
#select needed column, remove 26-28
dta <- dta[, -c(26:28)]
#reverse
dta[, c("A1", "C4", "C5", "E1", "E2", "O2", "O5")] <- 7 - dta[, c("A1", "C4", "C5", "E1", "E2", "O2", "O5")]
my_summary <- function(x) {
require(moments)
funs <- c(mean, sd, skewness, kurtosis)
sapply(funs, function(f) f(x, na.rm = T))}
dta_desc <- apply(dta, 2, my_summary)
head(dta_desc)
## A1 A2 A3 A4 A5 C1 C2
## [1,] 4.5865661 4.802380 4.603821 4.699748 4.560345 4.5023390 4.3699568
## [2,] 1.4077372 1.172020 1.301834 1.479633 1.258512 1.2413465 1.3183465
## [3,] -0.8254883 -1.124894 -0.998997 -1.031499 -0.847690 -0.8551631 -0.7422207
## [4,] 2.6942957 4.057765 3.444524 3.042640 3.161176 3.3068088 2.8656243
## C3 C4 C5 E1 E2 E3
## [1,] 4.3039568 4.4466474 3.70330460 4.0255672 3.8581178 4.0007207
## [2,] 1.2885518 1.3751181 1.62854187 1.6315055 1.6052103 1.3527188
## [3,] -0.6918287 -0.5964955 -0.06620282 -0.3736569 -0.2209396 -0.4706335
## [4,] 2.8697332 2.3802970 1.78461246 1.9090390 1.8526925 2.5367154
## E4 E5 N1 N2 N3 N4 N5
## [1,] 4.4224292 4.416337 2.9290857 3.50773660 3.2165651 3.1856006 2.9696860
## [2,] 1.4575174 1.334768 1.5709175 1.52594359 1.6029021 1.5696851 1.6186474
## [3,] -0.8241831 -0.777486 0.3714298 -0.07698521 0.1506797 0.1969966 0.3744599
## [4,] 2.6977079 2.908401 1.9885722 1.95035250 1.8227046 1.9090309 1.9401121
## O1 O2 O3 O4 O5
## [1,] 4.8160547 4.286786 4.4383117 4.892319 4.5104317
## [2,] 1.1295303 1.565152 1.2209011 1.221250 1.3279590
## [3,] -0.8973669 -0.585679 -0.7730516 -1.218247 -0.7384818
## [4,] 3.4277033 2.188889 3.3043641 4.082686 2.7630094
#rename rows
rownames(dta_desc) <- c("mean", "sd", "skewness", "kurtosis")
rslt1 <- as.data.frame(t(dta_desc))
rslt1 |> knitr::kable()
| mean | sd | skewness | kurtosis | |
|---|---|---|---|---|
| A1 | 4.586566 | 1.407737 | -0.8254883 | 2.694296 |
| A2 | 4.802380 | 1.172020 | -1.1248938 | 4.057765 |
| A3 | 4.603821 | 1.301834 | -0.9989970 | 3.444524 |
| A4 | 4.699748 | 1.479633 | -1.0314991 | 3.042640 |
| A5 | 4.560345 | 1.258512 | -0.8476900 | 3.161176 |
| C1 | 4.502339 | 1.241346 | -0.8551631 | 3.306809 |
| C2 | 4.369957 | 1.318347 | -0.7422207 | 2.865624 |
| C3 | 4.303957 | 1.288552 | -0.6918287 | 2.869733 |
| C4 | 4.446647 | 1.375118 | -0.5964955 | 2.380297 |
| C5 | 3.703305 | 1.628542 | -0.0662028 | 1.784612 |
| E1 | 4.025567 | 1.631506 | -0.3736569 | 1.909039 |
| E2 | 3.858118 | 1.605210 | -0.2209396 | 1.852693 |
| E3 | 4.000721 | 1.352719 | -0.4706335 | 2.536715 |
| E4 | 4.422429 | 1.457517 | -0.8241831 | 2.697708 |
| E5 | 4.416337 | 1.334768 | -0.7774860 | 2.908401 |
| N1 | 2.929086 | 1.570917 | 0.3714298 | 1.988572 |
| N2 | 3.507737 | 1.525944 | -0.0769852 | 1.950352 |
| N3 | 3.216565 | 1.602902 | 0.1506797 | 1.822705 |
| N4 | 3.185601 | 1.569685 | 0.1969966 | 1.909031 |
| N5 | 2.969686 | 1.618647 | 0.3744599 | 1.940112 |
| O1 | 4.816055 | 1.129530 | -0.8973669 | 3.427703 |
| O2 | 4.286786 | 1.565152 | -0.5856790 | 2.188889 |
| O3 | 4.438312 | 1.220901 | -0.7730516 | 3.304364 |
| O4 | 4.892319 | 1.221250 | -1.2182471 | 4.082686 |
| O5 | 4.510432 | 1.327959 | -0.7384818 | 2.763009 |
# visualize and reshaping the wide data to long data
dtal_desc <- melt(dta_desc)
# assigning name to each column
names(dtal_desc)[1:2] <- c("moments", "items")
# detect extreme means
ggplot(data = subset(dtal_desc, moments == "mean"),
aes(x = reorder(items, value, max), y = value, group = moments)) +
geom_point(size = 3) +
geom_hline(yintercept = mean(t(dta_desc["mean",])) + # mean of the means as a reference
c(-1.5, 0, 1.5) * sd(t(dta_desc["mean", ])), # sd of the mean values
linetype = "dashed") +
coord_flip() +
labs(x = "items", y = "mean") +
theme_bw()
The means of N1,N5,N4,N3 seem to be different from others.
# sd: the larger the better
ggplot(data = subset(dtal_desc, moments == "sd"),
aes(x = reorder(items, value, max), y = value, group = moments)) +
geom_point(size = 3) +
geom_hline(yintercept = mean(t(dta_desc["sd",])) + # mean of sd as reference
c(-1.5, 0, 1.5) * sd(t(dta_desc["sd", ])), linetype = "dashed") + #sd of the sd
coord_flip() +
labs(x = "items", y = "sd") +
theme_bw()
Vertical line is the average sd of 25 items.
# Ideally skewness should close to 0 for normally distributed
ggplot(data = subset(dtal_desc, moments == "skewness"),
aes(x = reorder(items, value, max), y = value, group = moments)) +
geom_point(size = 3) +
geom_hline(yintercept = mean(t(dta_desc["skewness",])) +
c(-1.5, 0, 1.5) * sd(t(dta_desc["skewness", ])), linetype = "dashed") +
coord_flip() +
labs(x = "items", y = "skewness") +
theme_bw()
# the larger the kurtosis = item responses focus on a single option.
ggplot(data = subset(dtal_desc, moments == "kurtosis"),
aes(x = reorder(items, value, max), y = value, group = moments)) +
geom_point(size = 3) +
geom_hline(yintercept = mean(t(dta_desc["kurtosis",])) +
c(-1.5, 0, 1.5) * sd(t(dta_desc["kurtosis", ])), linetype = "dashed") +
coord_flip() +
labs(x = "items", y = "kurtosis") +
theme_bw()
# Dealing with NA
dta <- na.omit(dta)
# calculate sum for each column and assign the result to a new column(tot)
dta$tot <- apply(dta, 1, sum)
# creating a new column(grp) for high-score and low-score label, all NA for now
dta$grp <- NA
# using rank() to show the high and low points in data, and sort them High-group or Low-group.
dta$grp[rank(dta$tot) < 2436*.27] <- "L" # below 27%
dta$grp[rank(dta$tot) > 2436*.73] <- "H" # upper 73%
dta$grp <- factor(dta$grp) #IMPORTANT
head(dta)
## A1 A2 A3 A4 A5 C1 C2 C3 C4 C5 E1 E2 E3 E4 E5 N1 N2 N3 N4 N5 O1 O2 O3 O4
## 61617 5 4 3 4 4 2 3 3 3 3 4 4 3 4 4 3 4 2 2 3 3 1 3 4
## 61618 5 4 5 2 5 5 4 4 4 3 6 6 6 4 3 3 3 3 5 5 4 5 4 3
## 61620 2 4 5 4 4 4 5 4 5 2 5 3 4 4 5 4 5 4 2 3 4 5 5 5
## 61621 3 4 6 5 5 4 4 3 2 2 2 4 4 4 4 2 5 2 4 1 3 4 4 3
## 61622 5 3 3 4 5 4 4 5 4 5 5 5 5 4 5 2 3 4 4 3 3 4 4 3
## 61623 1 6 5 6 5 6 6 6 6 4 5 6 6 5 6 3 5 2 2 3 4 4 5 6
## O5 tot grp
## 61617 4 82 L
## 61618 4 105 <NA>
## 61620 5 102 <NA>
## 61621 2 86 L
## 61622 4 100 <NA>
## 61623 6 119 H
# calculate mean score for each group (H & L)
dtam <- aggregate(dta[, 1:25], by=list(dta$grp), mean)
print(dtam)
## Group.1 A1 A2 A3 A4 A5 C1 C2
## 1 H 5.076923 5.492936 5.372057 5.332810 5.238619 5.138148 5.113030
## 2 L 4.171053 4.076023 3.732456 3.953216 3.802632 3.869883 3.660819
## C3 C4 C5 E1 E2 E3 E4 E5
## 1 4.858713 5.070644 4.323391 4.747253 4.610675 4.897959 5.095761 5.221350
## 2 3.757310 3.755848 2.969298 3.067251 2.899123 3.042398 3.448830 3.431287
## N1 N2 N3 N4 N5 O1 O2 O3
## 1 3.323391 3.943485 3.687598 3.315542 3.293564 5.365777 4.786499 5.182104
## 2 2.676901 3.257310 2.866959 3.274854 2.774854 4.251462 3.880117 3.722222
## O4 O5
## 1 5.324961 5.048666
## 2 4.660819 4.077485
# delete first column
dtam <- t(dtam[,-1])
# create a function that run t-test of all items.
item_t <- sapply(dta[, 1:25], function(x) t.test(x ~ dta$grp)$statistic)
print(item_t)
## A1.t A2.t A3.t A4.t A5.t C1.t C2.t C3.t
## 12.335573 24.608873 26.442388 18.328454 22.705973 19.671500 22.059063 16.113772
## C4.t C5.t E1.t E2.t E3.t E4.t E5.t N1.t
## 18.485543 15.839220 20.443864 21.252021 29.715885 22.082668 27.705564 7.383004
## N2.t N3.t N4.t N5.t O1.t O2.t O3.t O4.t
## 8.199859 9.423782 0.459648 5.691748 19.007850 10.712654 24.528917 10.374009
## O5.t
## 14.017635
# rslt2 = t-test result! Then create three more necessary columns.
rslt2 <- data.frame(Item = rownames(dtam), low.mean.score = dtam[, 2], high.mean.score = dtam[, 1], mean.dif = dtam[, 1]-dtam[, 2], t.value = item_t)
print(rslt2)
## Item low.mean.score high.mean.score mean.dif t.value
## A1 A1 4.171053 5.076923 0.9058704 12.335573
## A2 A2 4.076023 5.492936 1.4169122 24.608873
## A3 A3 3.732456 5.372057 1.6396004 26.442388
## A4 A4 3.953216 5.332810 1.3795937 18.328454
## A5 A5 3.802632 5.238619 1.4359869 22.705973
## C1 C1 3.869883 5.138148 1.2682645 19.671500
## C2 C2 3.660819 5.113030 1.4522111 22.059063
## C3 C3 3.757310 4.858713 1.1014028 16.113772
## C4 C4 3.755848 5.070644 1.3147957 18.485543
## C5 C5 2.969298 4.323391 1.3540926 15.839220
## E1 E1 3.067251 4.747253 1.6800013 20.443864
## E2 E2 2.899123 4.610675 1.7115522 21.252021
## E3 E3 3.042398 4.897959 1.8555615 29.715885
## E4 E4 3.448830 5.095761 1.6469310 22.082668
## E5 E5 3.431287 5.221350 1.7900635 27.705564
## N1 N1 2.676901 3.323391 0.6464903 7.383004
## N2 N2 3.257310 3.943485 0.6861751 8.199859
## N3 N3 2.866959 3.687598 0.8206391 9.423782
## N4 N4 3.274854 3.315542 0.0406878 0.459648
## N5 N5 2.774854 3.293564 0.5187098 5.691748
## O1 O1 4.251462 5.365777 1.1143151 19.007850
## O2 O2 3.880117 4.786499 0.9063823 10.712654
## O3 O3 3.722222 5.182104 1.4598814 24.528917
## O4 O4 4.660819 5.324961 0.6641420 10.374009
## O5 O5 4.077485 5.048666 0.9711802 14.017635
ggplot(data = rslt2, aes(x = reorder(Item, t.value, max), y = t.value)) +
geom_point() +
geom_hline(yintercept = 2, linetype = "dashed") +
coord_flip() +
labs(x = "Items", y = "t-value") +
theme_bw()
Since the items are all testing the same construct, they should be correlated.
# calculate Cronbach's $\alpha$ for the total scale (25 questions)
# showing the scoring for correlation after dropping the item itself
itotr <- psych::alpha(dta[, 1:25])$item.stats[, "r.drop"]
## Warning in psych::alpha(dta[, 1:25]): Some items were negatively correlated with the total scale and probably
## should be reversed.
## To do this, run the function again with the 'check.keys=TRUE' option
## Some items ( N1 N2 N3 N4 N5 O4 ) were negatively correlated with the total scale and
## probably should be reversed.
## To do this, run the function again with the 'check.keys=TRUE' option
# create the five subscales with list ()
ldta <- list(A = dta[, 1:5], C = dta[, 6:10], E = dta[ ,11:15], N = dta[, 16:20], O = dta[, 21:25])
# calculate Crinbach's \$alpha\ for each subscale
isubalpha <- lapply(ldta, psych::alpha)
isubr <- c(isubalpha$A$item.stats[, "r.drop"],
isubalpha$C$item.stats[, "r.drop"],
isubalpha$E$item.stats[, "r.drop"],
isubalpha$N$item.stats[, "r.drop"],
isubalpha$O$item.stats[, "r.drop"])
# assign the two results into data frame (total correlation & subscale correlation)
rslt3 <- as.data.frame(t(rbind(itotr, isubr)))
names(rslt3) <- c("Item-total correlation without item", "Item-subscale correlation without item")
print(rslt3)
## Item-total correlation without item Item-subscale correlation without item
## 1 0.13831645 0.3190962
## 2 0.43987125 0.5759233
## 3 0.45233136 0.6035693
## 4 0.29524959 0.4145254
## 5 0.39538301 0.5004352
## 6 0.33264447 0.4654163
## 7 0.36566162 0.5128535
## 8 0.25827659 0.4769297
## 9 0.27701455 0.5731250
## 10 0.21412960 0.4860793
## 11 0.30332764 0.5153693
## 12 0.32336583 0.6142087
## 13 0.47243682 0.5049821
## 14 0.37560751 0.5827738
## 15 0.47171615 0.4634332
## 16 0.05148788 0.6778437
## 17 0.06718417 0.6548330
## 18 0.09377582 0.6781411
## 19 -0.10570782 0.5485366
## 20 0.01268122 0.4874632
## 21 0.31682532 0.3981233
## 22 0.10561657 0.3509392
## 23 0.41916095 0.4546552
## 24 0.12362945 0.2167170
## 25 0.18664142 0.4197456
Based on the result, we should remove item No.19 due to the negative correlation.
itotalpha <- psych::alpha(dta[, 1:25])$alpha.drop[, "raw_alpha"]
## Warning in psych::alpha(dta[, 1:25]): Some items were negatively correlated with the total scale and probably
## should be reversed.
## To do this, run the function again with the 'check.keys=TRUE' option
## Some items ( N1 N2 N3 N4 N5 O4 ) were negatively correlated with the total scale and
## probably should be reversed.
## To do this, run the function again with the 'check.keys=TRUE' option
ialphad <- c(isubalpha$A$alpha.drop[, "raw_alpha"],
isubalpha$C$alpha.drop[, "raw_alpha"],
isubalpha$E$alpha.drop[, "raw_alpha"],
isubalpha$N$alpha.drop[, "raw_alpha"],
isubalpha$O$alpha.drop[, "raw_alpha"])
rslt4 <- as.data.frame(t(rbind(itotalpha, ialphad)))
names(rslt4) <- c("Main Reliability(item drop)", "Sub Reliability (item drop)")
print(rslt4)
## Main Reliability(item drop) Sub Reliability (item drop)
## 1 0.6979605 0.7314607
## 2 0.6762664 0.6332003
## 3 0.6734577 0.6150842
## 4 0.6850740 0.6963136
## 5 0.6783576 0.6582421
## 6 0.6833182 0.7044913
## 7 0.6801730 0.6869874
## 8 0.6885173 0.7000899
## 9 0.6868631 0.6630853
## 10 0.6925096 0.7031818
## 11 0.6840630 0.7312733
## 12 0.6821853 0.6924954
## 13 0.6712580 0.7329196
## 14 0.6781666 0.7056423
## 15 0.6714425 0.7457366
## 16 0.7066986 0.7598167
## 17 0.7048568 0.7673732
## 18 0.7032029 0.7594660
## 19 0.7199257 0.7982138
## 20 0.7107374 0.8167652
## 21 0.6852134 0.5392062
## 22 0.7017610 0.5675733
## 23 0.6773863 0.5077720
## 24 0.6979447 0.6212459
## 25 0.6938900 0.5218455
total <- data.frame("Item-total correlation without item" = rslt3[, 1], "Item-subscale correlation without item" = rslt3[, 2], "Main Reliability(item drop)" = rslt4[, 1], "Sub Reliability (item drop)" = rslt4[,2])
#create two more necessary columns
total2 <- data.frame(Item = rownames(total), "score" = total[,1]+total[,2]+total[,3]+total[,4])
print(total2)
## Item score
## 1 1 1.886834
## 2 2 2.325261
## 3 3 2.344443
## 4 4 2.091163
## 5 5 2.232418
## 6 6 2.185870
## 7 7 2.245676
## 8 8 2.123814
## 9 9 2.200088
## 10 10 2.095900
## 11 11 2.234033
## 12 12 2.312255
## 13 13 2.381596
## 14 14 2.342190
## 15 15 2.352329
## 16 16 2.195847
## 17 17 2.194247
## 18 18 2.234586
## 19 19 1.960968
## 20 20 2.027647
## 21 21 1.939368
## 22 22 1.725890
## 23 23 2.058974
## 24 24 1.659537
## 25 25 1.822123
fa.parallel(dta[, 1:25], fa = "pc", show.legend = FALSE)
## Parallel analysis suggests that the number of factors = NA and the number of components = 5
Parallel analysis suggests that the number of factors = NA and the number of components = 5
因素負載量越高越好,越能反映子構念=factor loading the higher, the better.
# from psych
print.psych(fa(dta[, 1:25], # define the data
nfactor = 5, # number of factors based on parallel analysis
fm = "pa",
rotate = "Promax"),
cut = .3) #不列印.3以下的因素負載量
## Factor Analysis using method = pa
## Call: fa(r = dta[, 1:25], nfactors = 5, rotate = "Promax", fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PA2 PA1 PA3 PA5 PA4 h2 u2 com
## A1 0.44 0.20 0.80 1.8
## A2 0.63 0.46 0.54 1.1
## A3 0.65 0.54 0.46 1.2
## A4 0.43 0.30 0.70 1.9
## A5 0.31 0.51 0.47 0.53 1.8
## C1 0.57 0.35 0.65 1.2
## C2 0.69 0.45 0.55 1.2
## C3 0.59 0.32 0.68 1.1
## C4 0.65 0.48 0.52 1.1
## C5 0.56 0.44 0.56 1.3
## E1 0.62 0.35 0.65 1.2
## E2 0.71 0.55 0.45 1.0
## E3 0.50 0.44 0.56 2.0
## E4 0.66 0.54 0.46 1.3
## E5 0.47 0.41 0.59 2.2
## N1 0.86 0.68 0.32 1.2
## N2 0.81 0.61 0.39 1.1
## N3 0.72 0.54 0.46 1.0
## N4 0.47 -0.36 0.51 0.49 2.2
## N5 0.48 0.35 0.65 1.8
## O1 0.51 0.32 0.68 1.2
## O2 0.48 0.27 0.73 1.6
## O3 0.61 0.47 0.53 1.3
## O4 0.37 0.25 0.75 2.6
## O5 0.54 0.30 0.70 1.2
##
## PA2 PA1 PA3 PA5 PA4
## SS loadings 2.62 2.50 2.03 1.86 1.57
## Proportion Var 0.10 0.10 0.08 0.07 0.06
## Cumulative Var 0.10 0.21 0.29 0.36 0.42
## Proportion Explained 0.25 0.24 0.19 0.18 0.15
## Cumulative Proportion 0.25 0.48 0.68 0.85 1.00
##
## With factor correlations of
## PA2 PA1 PA3 PA5 PA4
## PA2 1.00 -0.33 -0.24 0.02 0.00
## PA1 -0.33 1.00 0.37 0.29 0.15
## PA3 -0.24 0.37 1.00 0.21 0.23
## PA5 0.02 0.29 0.21 1.00 0.19
## PA4 0.00 0.15 0.23 0.19 1.00
##
## Mean item complexity = 1.5
## Test of the hypothesis that 5 factors are sufficient.
##
## The degrees of freedom for the null model are 300 and the objective function was 7.48 with Chi Square of 18146.07
## The degrees of freedom for the model are 185 and the objective function was 0.64
##
## The root mean square of the residuals (RMSR) is 0.03
## The df corrected root mean square of the residuals is 0.04
##
## The harmonic number of observations is 2436 with the empirical chi square 1131.91 with prob < 1.1e-135
## The total number of observations was 2436 with Likelihood Chi Square = 1538.69 with prob < 8e-212
##
## Tucker Lewis Index of factoring reliability = 0.877
## RMSEA index = 0.055 and the 90 % confidence intervals are 0.052 0.057
## BIC = 96.03
## Fit based upon off diagonal values = 0.98
## Measures of factor score adequacy
## PA2 PA1 PA3 PA5 PA4
## Correlation of (regression) scores with factors 0.93 0.91 0.89 0.87 0.84
## Multiple R square of scores with factors 0.86 0.82 0.79 0.76 0.71
## Minimum correlation of possible factor scores 0.72 0.64 0.57 0.52 0.42
h2:五個因素可以解釋題目的比率
u2:測量誤差變異所佔的比率
h2 + u2 = 1
com:因素複雜度,複雜度=1表示題目完全負載在一個因素上
良好的題目:高h2低u2,接近1的複雜度
My selection process:
Step 1: eliminate items that cover more than one factor.
Step 2: select 3 items having highest h2 and lower u2 within each
sub-scale.
Step 3: check com number to make sure the selected items are closed to
1.
Step 4: Cross comparing with the correlation and reliability result.
sub-scale A: select A2,A3,A4
sub-scale C: select C2,C4,C5
sub-scale E: select E2,E3,E4
sub-scale N: select N1,N2,N3
sub-scale O: select O1,O3,O5