data(bfi)
dta <-bfi
dta <- na.omit(dta)

消除DATA中的NA

str(bfi)
## 'data.frame':    2800 obs. of  28 variables:
##  $ A1       : int  2 2 5 4 2 6 2 4 4 2 ...
##  $ A2       : int  4 4 4 4 3 6 5 3 3 5 ...
##  $ A3       : int  3 5 5 6 3 5 5 1 6 6 ...
##  $ A4       : int  4 2 4 5 4 6 3 5 3 6 ...
##  $ A5       : int  4 5 4 5 5 5 5 1 3 5 ...
##  $ C1       : int  2 5 4 4 4 6 5 3 6 6 ...
##  $ C2       : int  3 4 5 4 4 6 4 2 6 5 ...
##  $ C3       : int  3 4 4 3 5 6 4 4 3 6 ...
##  $ C4       : int  4 3 2 5 3 1 2 2 4 2 ...
##  $ C5       : int  4 4 5 5 2 3 3 4 5 1 ...
##  $ E1       : int  3 1 2 5 2 2 4 3 5 2 ...
##  $ E2       : int  3 1 4 3 2 1 3 6 3 2 ...
##  $ E3       : int  3 6 4 4 5 6 4 4 NA 4 ...
##  $ E4       : int  4 4 4 4 4 5 5 2 4 5 ...
##  $ E5       : int  4 3 5 4 5 6 5 1 3 5 ...
##  $ N1       : int  3 3 4 2 2 3 1 6 5 5 ...
##  $ N2       : int  4 3 5 5 3 5 2 3 5 5 ...
##  $ N3       : int  2 3 4 2 4 2 2 2 2 5 ...
##  $ N4       : int  2 5 2 4 4 2 1 6 3 2 ...
##  $ N5       : int  3 5 3 1 3 3 1 4 3 4 ...
##  $ O1       : int  3 4 4 3 3 4 5 3 6 5 ...
##  $ O2       : int  6 2 2 3 3 3 2 2 6 1 ...
##  $ O3       : int  3 4 5 4 4 5 5 4 6 5 ...
##  $ O4       : int  4 3 5 3 3 6 6 5 6 5 ...
##  $ O5       : int  3 3 2 5 3 1 1 3 1 2 ...
##  $ gender   : int  1 2 2 2 1 2 1 1 1 2 ...
##  $ education: int  NA NA NA NA NA 3 NA 2 1 NA ...
##  $ age      : int  16 18 17 17 17 21 18 19 19 17 ...
dta <- dta[, -c(26:28)]

只將特定的資料(題目的分數)納入資料中,以便後續的分析

dta[, c("A1", "C4", "C5", "E1", "E2", "O2", "O5")] <- 7 - dta[,c("A1","C4","C5","E1","E2", "O2", "O5")]

將負向題的分數轉為正向題,以便後續計算與分析

my_summary <- function(x) {
 require(moments)
 funs <- c(mean, sd, skewness, kurtosis)
 sapply(funs, function(f) f(x, na.rm = T))
 }
dta_desc <- apply(dta, 2, my_summary)

將my_summary function的結果加至各個columns中

head(dta_desc)
##              A1        A2        A3        A4         A5         C1         C2
## [1,]  4.6346154  4.834079  4.629249  4.749553  4.5849732  4.5697674  4.4011628
## [2,]  1.3919681  1.156915  1.289373  1.447941  1.2558329  1.2166107  1.3113098
## [3,] -0.8849273 -1.149616 -1.035184 -1.094809 -0.8806471 -0.8947177 -0.7697916
## [4,]  2.8355975  4.144108  3.567472  3.237885  3.2405729  3.4186321  2.9099922
##              C3         C4          C5         E1         E2        E3
## [1,]  4.3228980  4.4991055  3.74463327  4.0304114  3.8788014  4.009839
## [2,]  1.2871535  1.3628169  1.62959014  1.6181205  1.6056605  1.342438
## [3,] -0.6925833 -0.6410005 -0.09191721 -0.3825874 -0.2544784 -0.479646
## [4,]  2.8926454  2.4399914  1.77002240  1.9336243  1.8752405  2.570188
##              E4         E5        N1          N2        N3        N4       N5
## [1,]  4.4306798  4.4186047 2.9083184  3.48568873 3.1985689 3.1753131 2.952147
## [2,]  1.4591002  1.3301168 1.5644549  1.53476376 1.5963935 1.5605995 1.621980
## [3,] -0.8461735 -0.8112306 0.3898181 -0.06325393 0.1653818 0.2170194 0.399517
## [4,]  2.7347239  2.9743508 2.0115978  1.93343058 1.8219399 1.9423127 1.946799
##              O1        O2        O3        O4         O5
## [1,]  4.8215564  4.310823  4.483005  4.948122  4.5447227
## [2,]  1.1200433  1.545865  1.193261  1.175435  1.3295014
## [3,] -0.9081616 -0.606504 -0.792629 -1.260182 -0.7840773
## [4,]  3.4683128  2.234007  3.404031  4.264682  2.8377712
rownames(dta_desc) <- c("mean", "sd", "skewness", "kurtosis")

將每個row進行命名

rslt1 <- as.data.frame(t(dta_desc))

轉置數據(從matrix到data frame)

rslt1 |> knitr::kable()
mean sd skewness kurtosis
A1 4.634615 1.391968 -0.8849273 2.835597
A2 4.834079 1.156915 -1.1496157 4.144108
A3 4.629249 1.289373 -1.0351837 3.567472
A4 4.749553 1.447941 -1.0948090 3.237885
A5 4.584973 1.255833 -0.8806471 3.240573
C1 4.569767 1.216611 -0.8947177 3.418632
C2 4.401163 1.311310 -0.7697916 2.909992
C3 4.322898 1.287154 -0.6925833 2.892645
C4 4.499105 1.362817 -0.6410005 2.439991
C5 3.744633 1.629590 -0.0919172 1.770022
E1 4.030411 1.618121 -0.3825874 1.933624
E2 3.878801 1.605660 -0.2544784 1.875241
E3 4.009839 1.342438 -0.4796460 2.570188
E4 4.430680 1.459100 -0.8461735 2.734724
E5 4.418605 1.330117 -0.8112306 2.974351
N1 2.908318 1.564455 0.3898181 2.011598
N2 3.485689 1.534764 -0.0632539 1.933431
N3 3.198569 1.596394 0.1653818 1.821940
N4 3.175313 1.560599 0.2170194 1.942313
N5 2.952147 1.621980 0.3995170 1.946799
O1 4.821556 1.120043 -0.9081616 3.468313
O2 4.310823 1.545865 -0.6065040 2.234007
O3 4.483005 1.193261 -0.7926290 3.404031
O4 4.948122 1.175435 -1.2601825 4.264682
O5 4.544723 1.329501 -0.7840773 2.837771

展示出每題的平均數、標準差、峰值以及偏態

dtal_desc <- melt(dta_desc)

reshape資料,將wide data轉為long data

names(dtal_desc)[1:2] <- c("moments", "items")

將dtal_desc中的cloumn進行命名

ggplot(data = subset(dtal_desc, moments == "mean"),
 aes(x = reorder(items, value, max), y = value, group = moments)) +
 geom_point(size = 3) +
 geom_hline(yintercept = mean(t(dta_desc["mean",])) +  c(-1.5, 0, 1.5) * sd(t(dta_desc["mean", ])), 
 linetype = "dashed") +
 coord_flip() +
 labs(x = "items",  y = "mean") +
  theme_bw()

code的意義: data = subset(dtal_desc, moments == “mean”):指定data為何
aes(x = reorder(items, value, max), y = value, group = moments)) :對於軸的指定
geom_hline(yintercept = mean(t(dta_desc[“mean”,])+ c(-1.5, 0, 1.5) * sd(t(dta_desc[“mean”, ])):線的使用,而從這個CODE中也可以看到此圖將會以正負1.5個標準差呈現虛線與區間
theme_bw():圖表以黑白呈現

從圖中我們可以看見位於圖的4.0~4.55之間有數據的平均數的存在,而在其左右兩側的虛線,代表著以平均數正負1.5個標準差的值。而我們也可以看見圖中有超出這個區間的點(N3、N4、N5、N1),此時我們可以說這些題目與大部分的題目可能不太一樣(其平均數與其他平均數有差異)。

ggplot(data = subset(dtal_desc, moments == "sd"),
 aes(x = reorder(items, value, max), y = value, group = moments)) +
 geom_point(size = 3) +
 geom_hline(yintercept = mean(t(dta_desc["sd",])) +  # mean of sd as reference
 c(-1.5, 0, 1.5) * sd(t(dta_desc["sd", ])), linetype = "dashed") + #sd of the sd
 coord_flip() +
 labs(x = "items",  y = "sd") +
  theme_bw()

看此筆資料的SD 從圖中我們可以看見其整體的標準差平均位於1.4的附近。 而左右兩條虛線表示出了其標準差平均數正負1.5個標準差的值。此外,在圖中我們也可以看見有超出這兩個虛線的區域的點(O1),這就代表這題跟其他題目的sd相差很遠。

ggplot(data = subset(dtal_desc, moments == "skewness"),
 aes(x = reorder(items, value, max), y = value, group = moments)) +
 geom_point(size = 3) +
 geom_hline(yintercept = mean(t(dta_desc["skewness",])) +
 c(-1.5, 0, 1.5) * sd(t(dta_desc["skewness", ])), linetype = "dashed") +
 coord_flip() +
 labs(x = "items",  y = "skewness") +
  theme_bw()

此圖為對於偏態的分析,從圖中我們可以看見這筆DATA中的偏態平均數位於-0.5的附近,而左右兩條虛線表示出了其偏態平均數正負1.5個標準差的值。但同時我們也可以看見有幾個點位在這個區間的外面(N5、N1、N4、N3),而結果代表他們可能與其他題有所差異(不同)。

ggplot(data = subset(dtal_desc, moments == "kurtosis"),
 aes(x = reorder(items, value, max), y = value, group = moments)) +
 geom_point(size = 3) +
 geom_hline(yintercept = mean(t(dta_desc["kurtosis",])) +
 c(-1.5, 0, 1.5) * sd(t(dta_desc["kurtosis", ])), linetype = "dashed") +
 coord_flip() +
 labs(x = "items",  y = "kurtosis") +
  theme_bw()

從此圖中,我們可以看見峰度的平均數落在2.5~3.0之間,而其正負1.5個標準差為兩側虛線。在圖中我們可以看見有點超出於區間的點(O4、A2),所以也許這兩題與其他題目有些不同。

區辨指數

dta<- na.omit(bfi)
dta <- dta[, -c(26:28)]
dta[, c("A1", "C4", "C5", "E1", "E2", "O2", "O5")] <- 7 - dta[,c("A1","C4","C5","E1","E2", "O2", "O5")]
str(dta)
## 'data.frame':    2236 obs. of  25 variables:
##  $ A1: num  1 3 3 3 6 5 3 6 5 5 ...
##  $ A2: int  6 3 4 5 5 6 5 6 4 5 ...
##  $ A3: int  5 1 5 2 6 5 5 6 4 1 ...
##  $ A4: int  6 5 6 2 5 6 6 1 4 3 ...
##  $ A5: int  5 1 5 1 6 5 5 6 3 5 ...
##  $ C1: int  6 3 4 5 4 3 5 5 6 5 ...
##  $ C2: int  6 2 3 5 3 5 5 2 5 4 ...
##  $ C3: int  6 4 5 5 2 6 4 5 6 5 ...
##  $ C4: num  6 5 4 5 3 4 6 6 6 5 ...
##  $ C5: num  4 3 5 5 2 1 6 6 6 2 ...
##  $ E1: num  5 4 6 4 5 5 4 6 5 6 ...
##  $ E2: num  6 1 4 3 6 5 5 6 3 5 ...
##  $ E3: int  6 4 2 3 2 4 5 6 4 6 ...
##  $ E4: int  5 2 5 6 5 6 5 6 2 5 ...
##  $ E5: int  6 1 4 5 2 6 6 6 6 4 ...
##  $ N1: int  3 6 3 2 2 4 2 2 3 1 ...
##  $ N2: int  5 3 3 4 2 4 3 3 3 4 ...
##  $ N3: int  2 2 4 2 2 4 3 1 5 2 ...
##  $ N4: int  2 6 2 2 2 6 1 2 3 2 ...
##  $ N5: int  3 4 3 3 2 6 1 1 2 5 ...
##  $ O1: int  4 3 5 5 6 6 6 6 5 2 ...
##  $ O2: num  4 5 4 5 6 6 5 3 5 3 ...
##  $ O3: int  5 4 5 5 5 5 5 5 6 5 ...
##  $ O4: int  6 5 6 5 5 6 6 5 6 4 ...
##  $ O5: num  6 4 4 2 5 6 5 4 6 6 ...
dta$tot <- apply(dta, 1, sum)

把每個column進行加總,然後把結果放入一個新的column(tot)中

dta$grp <- NA
dta$grp[rank(dta$tot) < 2236*.27] <- "L"
dta$grp[rank(dta$tot) > 2236*.73] <- "H"
dta$grp <- factor(dta$grp)
head(dta)
##       A1 A2 A3 A4 A5 C1 C2 C3 C4 C5 E1 E2 E3 E4 E5 N1 N2 N3 N4 N5 O1 O2 O3 O4
## 61623  1  6  5  6  5  6  6  6  6  4  5  6  6  5  6  3  5  2  2  3  4  4  5  6
## 61629  3  3  1  5  1  3  2  4  5  3  4  1  4  2  1  6  3  2  6  4  3  5  4  5
## 61634  3  4  5  6  5  4  3  5  4  5  6  4  2  5  4  3  3  4  2  3  5  4  5  6
## 61640  3  5  2  2  1  5  5  5  5  5  4  3  3  6  5  2  4  2  2  3  5  5  5  5
## 61661  6  5  6  5  6  4  3  2  3  2  5  6  2  5  2  2  2  2  2  2  6  6  5  5
## 61664  5  6  5  6  5  3  5  6  4  1  5  5  4  6  6  4  4  4  6  6  6  6  5  6
##       O5 tot  grp
## 61623  6 119    H
## 61629  4  84    L
## 61634  4 104 <NA>
## 61640  2  94    L
## 61661  5  99 <NA>
## 61664  6 125    H

dta\(grp <- NA :在dta這個data中多出一個grp的row,並且都顯示NA 計算用於區辨高低組的分數 在上方的code中我們的定義為低分組為其分數低於27%;高分組為分數高於73%。 dta\)grp <- factor(dta$grp):避免R以類別變項的方式進行分析

dtam <- aggregate(dta[, 1:25], by=list(dta$grp), mean)
print(dtam)
##   Group.1       A1       A2       A3       A4       A5       C1       C2
## 1       H 5.107084 5.505766 5.373970 5.352554 5.257002 5.154860 5.116969
## 2       L 4.227575 4.126246 3.777409 4.041528 3.850498 3.923588 3.704319
##         C3       C4       C5       E1       E2       E3       E4       E5
## 1 4.848435 5.095552 4.342669 4.741351 4.611203 4.904448 5.093904 5.227348
## 2 3.765781 3.813953 3.008306 3.031561 2.892027 3.033223 3.446844 3.430233
##         N1       N2       N3       N4       N5       O1       O2       O3
## 1 3.293245 3.927512 3.660626 3.299835 3.271829 5.359143 4.771005 5.187809
## 2 2.602990 3.202658 2.805648 3.274086 2.774086 4.269103 3.877076 3.762458
##         O4       O5
## 1 5.331137 5.064250
## 2 4.710963 4.074751

dtam <- aggregate(dta[, 1:25], by=list(dta$grp), mean):取高分組、低分組的平均

dtam <- t(dtam[, -1])

刪除第一個column

item_t <- sapply(dta[, 1:25], function(x) t.test(x ~ dta$grp)$statistic)
print(item_t)
##       A1.t       A2.t       A3.t       A4.t       A5.t       C1.t       C2.t 
## 11.6048485 23.0772247 24.8535301 16.8480716 21.4153701 18.3288421 20.3115177 
##       C3.t       C4.t       C5.t       E1.t       E2.t       E3.t       E4.t 
## 14.9526697 17.2490960 14.8831014 20.1982621 20.5972118 28.7359620 21.1694572 
##       E5.t       N1.t       N2.t       N3.t       N4.t       N5.t       O1.t 
## 26.4867355  7.6465266  8.3210388  9.4012398  0.2790589  5.2513414 17.6681816 
##       O2.t       O3.t       O4.t       O5.t 
## 10.1969028 22.6670752  9.3472473 13.5240334

用t-test去看高低分組的平均數是否有所差異。

rslt2 <- data.frame(Item = rownames(dtam), low.mean.score = dtam[, 2], high.mean.score = dtam[, 1], mean.dif = dtam[, 1]-dtam[, 2], t.value = item_t)
print(rslt2)
##    Item low.mean.score high.mean.score   mean.dif    t.value
## A1   A1       4.227575        5.107084 0.87950927 11.6048485
## A2   A2       4.126246        5.505766 1.37952022 23.0772247
## A3   A3       3.777409        5.373970 1.59656171 24.8535301
## A4   A4       4.041528        5.352554 1.31102530 16.8480716
## A5   A5       3.850498        5.257002 1.40650331 21.4153701
## C1   C1       3.923588        5.154860 1.23127193 18.3288421
## C2   C2       3.704319        5.116969 1.41264976 20.3115177
## C3   C3       3.765781        4.848435 1.08265419 14.9526697
## C4   C4       3.813953        5.095552 1.28159841 17.2490960
## C5   C5       3.008306        4.342669 1.33436322 14.8831014
## E1   E1       3.031561        4.741351 1.70978944 20.1982621
## E2   E2       2.892027        4.611203 1.71917606 20.5972118
## E3   E3       3.033223        4.904448 1.87122551 28.7359620
## E4   E4       3.446844        5.093904 1.64706059 21.1694572
## E5   E5       3.430233        5.227348 1.79711505 26.4867355
## N1   N1       2.602990        3.293245 0.69025544  7.6465266
## N2   N2       3.202658        3.927512 0.72485455  8.3210388
## N3   N3       2.805648        3.660626 0.85497819  9.4012398
## N4   N4       3.274086        3.299835 0.02574888  0.2790589
## N5   N5       2.774086        3.271829 0.49774229  5.2513414
## O1   O1       4.269103        5.359143 1.09004034 17.6681816
## O2   O2       3.877076        4.771005 0.89392853 10.1969028
## O3   O3       3.762458        5.187809 1.42535042 22.6670752
## O4   O4       4.710963        5.331137 0.62017328  9.3472473
## O5   O5       4.074751        5.064250 0.98949958 13.5240334

將資料整理成表格 low.mean.score = dtam[, 2] : cloumn名字=dtam的第2cloumn

ggplot(data = rslt2, aes(x = reorder(Item, t.value, max), y = t.value)) +
 geom_point() +
 geom_hline(yintercept = 2, linetype = "dashed") +
 coord_flip() +
 labs(x = "Items", y = "t-value") +
 theme_bw()

若顯著:item有區辨度
N4可能不具有良好的區辨度

Items correlation

itotr <- psych::alpha(dta[, 1:25])$item.stats[, "r.drop"]
## Warning in psych::alpha(dta[, 1:25]): Some items were negatively correlated with the total scale and probably 
## should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option
## Some items ( N1 N2 N3 N4 N5 O4 ) were negatively correlated with the total scale and 
## probably should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option

psych::alpha -> 指定用paych中的alpha
用以計算總分的alpha,顯示出的分數為去除特定題目後的

 ldta <- list(A = dta[, 1:5], C = dta[, 6:10], E = dta[ ,11:15], 
              N = dta[, 16:20], O = dta[, 21:25])

製造一個subscale

isubalpha <- lapply(ldta, psych::alpha)
isubr <- c(isubalpha$A$item.stats[, "r.drop"],
             isubalpha$C$item.stats[, "r.drop"],
             isubalpha$E$item.stats[, "r.drop"],
           isubalpha$N$item.stats[, "r.drop"],
           isubalpha$O$item.stats[, "r.drop"])
rslt3 <- as.data.frame(t(rbind(itotr, isubr)))
names(rslt3) <- c("Item-total correlation without item", "Item-subscale correlation without item")

print(rslt3)
##    Item-total correlation without item Item-subscale correlation without item
## 1                           0.13243666                              0.3077162
## 2                           0.42943569                              0.5618331
## 3                           0.44375121                              0.5909941
## 4                           0.28316705                              0.4036667
## 5                           0.38350120                              0.4887087
## 6                           0.32437095                              0.4552472
## 7                           0.35341507                              0.5018052
## 8                           0.25678142                              0.4775709
## 9                           0.26479190                              0.5645098
## 10                          0.20805656                              0.4830116
## 11                          0.31471245                              0.5128046
## 12                          0.32331085                              0.6160002
## 13                          0.48035284                              0.5213901
## 14                          0.37723273                              0.5845455
## 15                          0.47605288                              0.4694015
## 16                          0.06791869                              0.6771958
## 17                          0.08147559                              0.6508838
## 18                          0.10555562                              0.6767828
## 19                         -0.10731086                              0.5428429
## 20                          0.01078638                              0.4886151
## 21                          0.30525432                              0.3982523
## 22                          0.09618200                              0.3593390
## 23                          0.40883437                              0.4578104
## 24                          0.11107410                              0.2143355
## 25                          0.18707793                              0.4281881

可以看見除了去掉第16(N1)題外,基本上其他題目去除後整個量表中的題目之間的相關都比subscale中的來的低。

Items reliability

itotalpha <- psych::alpha(dta[, 1:25])$alpha.drop[, "raw_alpha"]
## Warning in psych::alpha(dta[, 1:25]): Some items were negatively correlated with the total scale and probably 
## should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option
## Some items ( N1 N2 N3 N4 N5 O4 ) were negatively correlated with the total scale and 
## probably should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option
ialphad <- c(isubalpha$A$alpha.drop[, "raw_alpha"],
             isubalpha$C$alpha.drop[, "raw_alpha"],
             isubalpha$E$alpha.drop[, "raw_alpha"],
             isubalpha$N$alpha.drop[, "raw_alpha"],
             isubalpha$O$alpha.drop[, "raw_alpha"])
rslt4 <- as.data.frame(t(rbind(itotalpha, ialphad)))
names(rslt4) <- c("Main Reliability(item drop)", "Sub Reliability (item drop)")
print(rslt4)
##    Main Reliability(item drop) Sub Reliability (item drop)
## 1                    0.6945414                   0.7220895
## 2                    0.6730193                   0.6217082
## 3                    0.6700579                   0.6023930
## 4                    0.6821019                   0.6846047
## 5                    0.6751560                   0.6463128
## 6                    0.6799121                   0.7000553
## 7                    0.6769802                   0.6826336
## 8                    0.6846139                   0.6916822
## 9                    0.6838192                   0.6576527
## 10                   0.6891334                   0.6961548
## 11                   0.6788212                   0.7375935
## 12                   0.6780166                   0.6976668
## 13                   0.6662841                   0.7329283
## 14                   0.6737941                   0.7105255
## 15                   0.6668361                   0.7488675
## 16                   0.7015049                   0.7578407
## 17                   0.6999881                   0.7662320
## 18                   0.6984647                   0.7575138
## 19                   0.7165875                   0.7977181
## 20                   0.7073799                   0.8144963
## 21                   0.6819352                   0.5450899
## 22                   0.6988113                   0.5687471
## 23                   0.6739993                   0.5128569
## 24                   0.6949209                   0.6269339
## 25                   0.6899707                   0.5233128

信度:對於不同人,每一次/多次測量都能測到一樣的結果。
在相關性的分析中看到的結果重複出現於信度分析結果中: 除了去掉第16題(N1)的分析之外,其他題目去除後其整個量表中的題目之間的信度都比subscale中的來的低。

factor loading

當factor loading越高:越能解釋因素

 print.psych(fa(dta[, 1:25], 
               nfactor = 5,      
               fm = "pa",           
               rotate = "promax"),  
               cut = .3)  
## Factor Analysis using method =  pa
## Call: fa(r = dta[, 1:25], nfactors = 5, rotate = "promax", fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
##      PA2   PA1   PA3   PA5   PA4   h2   u2 com
## A1                    0.45       0.20 0.80 1.5
## A2                    0.59       0.44 0.56 1.1
## A3                    0.60       0.53 0.47 1.3
## A4                    0.41       0.30 0.70 2.0
## A5        0.32        0.48       0.46 0.54 1.8
## C1              0.56             0.34 0.66 1.2
## C2              0.69             0.44 0.56 1.2
## C3              0.60             0.33 0.67 1.1
## C4              0.64             0.47 0.53 1.2
## C5              0.56             0.43 0.57 1.4
## E1        0.63                   0.34 0.66 1.2
## E2        0.72                   0.55 0.45 1.1
## E3        0.57                   0.46 0.54 1.6
## E4        0.65                   0.54 0.46 1.3
## E5        0.51                   0.42 0.58 1.8
## N1  0.84                         0.68 0.32 1.3
## N2  0.79                         0.61 0.39 1.2
## N3  0.74                         0.55 0.45 1.0
## N4  0.53 -0.32                   0.51 0.49 1.8
## N5  0.53                         0.35 0.65 1.5
## O1                          0.49 0.32 0.68 1.3
## O2                          0.49 0.28 0.72 1.5
## O3        0.31              0.58 0.48 0.52 1.6
## O4                          0.37 0.24 0.76 2.7
## O5                          0.55 0.31 0.69 1.1
## 
##                        PA2  PA1  PA3  PA5  PA4
## SS loadings           2.69 2.61 1.99 1.73 1.52
## Proportion Var        0.11 0.10 0.08 0.07 0.06
## Cumulative Var        0.11 0.21 0.29 0.36 0.42
## Proportion Explained  0.26 0.25 0.19 0.16 0.14
## Cumulative Proportion 0.26 0.50 0.69 0.86 1.00
## 
##  With factor correlations of 
##       PA2   PA1   PA3  PA5  PA4
## PA2  1.00 -0.26 -0.23 0.01 0.04
## PA1 -0.26  1.00  0.41 0.34 0.12
## PA3 -0.23  0.41  1.00 0.21 0.16
## PA5  0.01  0.34  0.21 1.00 0.12
## PA4  0.04  0.12  0.16 0.12 1.00
## 
## Mean item complexity =  1.4
## Test of the hypothesis that 5 factors are sufficient.
## 
## The degrees of freedom for the null model are  300  and the objective function was  7.41 with Chi Square of  16484.78
## The degrees of freedom for the model are 185  and the objective function was  0.63 
## 
## The root mean square of the residuals (RMSR) is  0.03 
## The df corrected root mean square of the residuals is  0.04 
## 
## The harmonic number of observations is  2236 with the empirical chi square  1046.45  with prob <  3.1e-120 
## The total number of observations was  2236  with Likelihood Chi Square =  1400.57  with prob <  1.5e-185 
## 
## Tucker Lewis Index of factoring reliability =  0.878
## RMSEA index =  0.054  and the 90 % confidence intervals are  0.052 0.057
## BIC =  -26.23
## Fit based upon off diagonal values = 0.98
## Measures of factor score adequacy             
##                                                    PA2  PA1  PA3  PA5  PA4
## Correlation of (regression) scores with factors   0.93 0.91 0.88 0.86 0.84
## Multiple R square of scores with factors          0.86 0.83 0.78 0.74 0.70
## Minimum correlation of possible factor scores     0.72 0.67 0.57 0.48 0.40

dta[, 1:25] : 分析資料來源

cut = .3 : 去掉0.3以下的因素負載量的顯示(負載量過低)
nfactor = 5 : 設定factor數(可先由Parallel analysis得出) 在分析結果中,被歸在同一類的item代表其對於特定因素的負載量較高;而倘若同時出現了對於一個因素以上的負載量,代表其可能不是一個良好的測驗題目。(例如:A5、N4與O3)。
而分析結果中的h2是五個因素可以解釋題目的比率;u2是測量誤差變異的比率;com 是因素複雜度(當複雜度為1:題目完全負載在一個因素上)。所以可以同時透過觀察這些值來決定要選那些題目。

在A中:去除A5(因為同時負載於兩個因素:因素一以及因素五)、去除A1(因為誤差變異比率過大,且可解釋題目比率為此subscale中最低)。

在C中:去除C3(因為其因素負載量為最低)、去除C1(因為C1與C5的因素負載量雖然一樣是此subscale中第二低的,但因為在可以解釋題目的比率中,C1低於C5,故去除此題)。

在E中:去除E5(因為以因素負載量與複雜度來看,此題目較不理想。而若回去看信度分析的部分,會發現在此subscale中,若去掉E5可擁有較高的信度)、去除E3(E3為因素負載量第二低者且其複雜度離1較遠;E1的因素負載量為第三低,且可解釋題目的比率為subscale中最低。但會去看信度分析會發現去除此兩題後subscale中以及整個量表中的信度差異不大;但在相關性分析中,卻可以看見去除E3後整個量表的相關性相較於去除E1來的高,故去除E3。)。

在N中:去除N4(因為同時負載於兩個因素)、去除N5(因為去除掉N4後,其因素負載量為此subscale中最低,同時h2為最低、u2為最高、複雜度也大於1且為最高)。

在O中:去除O3(因為同時負載於兩個因素)、去除O4(因為其因素負載量為此subscale中最低,同時h2為最低、u2為最高、複雜度也大於1且為最高)。

因此,A中僅留下A2、A3、A4;C中僅留下C2、C4、C5;E中僅留下E1、E2、E4;N中僅留下N1、N2、N3;O中僅留下O1、O2、O5。

Parallel analysis

fa.parallel(dta[, 1:25], fa = "pc", show.legend = FALSE)

## Parallel analysis suggests that the number of factors =  NA  and the number of components =  5

在此結果中我們可以看見,當factor為5個或是小於5時,其eigen value皆不大於1。故較為良好的factor數為5個。