library(moments)
library(psych)
library(lavaan)
library(reshape2)
library(tidyverse)
data(bfi)

轉換反向題

keys <-
  list(agree=c("-A1","A2","A3","A4","A5"),conscientious=c("C1","C2","C3","-C4","-C5"),
extraversion=c("-E1","-E2","E3","E4","E5"),neuroticism=c("N1","N2","N3","N4","N5"),
openness = c("O1","-O2","O3","O4","-O5"))
head(keys)
## $agree
## [1] "-A1" "A2"  "A3"  "A4"  "A5" 
## 
## $conscientious
## [1] "C1"  "C2"  "C3"  "-C4" "-C5"
## 
## $extraversion
## [1] "-E1" "-E2" "E3"  "E4"  "E5" 
## 
## $neuroticism
## [1] "N1" "N2" "N3" "N4" "N5"
## 
## $openness
## [1] "O1"  "-O2" "O3"  "O4"  "-O5"
head(bfi$A1)
## [1] 2 2 5 4 2 6
bfi[, c(1, 9, 10, 11, 12, 22, 25)] = 7 - bfi[, c(1, 9, 10, 11, 12, 22, 25)]
head(bfi$A1)
## [1] 5 5 2 3 5 1

看看資料結構

str(bfi)
## 'data.frame':    2800 obs. of  28 variables:
##  $ A1       : num  5 5 2 3 5 1 5 3 3 5 ...
##  $ A2       : int  4 4 4 4 3 6 5 3 3 5 ...
##  $ A3       : int  3 5 5 6 3 5 5 1 6 6 ...
##  $ A4       : int  4 2 4 5 4 6 3 5 3 6 ...
##  $ A5       : int  4 5 4 5 5 5 5 1 3 5 ...
##  $ C1       : int  2 5 4 4 4 6 5 3 6 6 ...
##  $ C2       : int  3 4 5 4 4 6 4 2 6 5 ...
##  $ C3       : int  3 4 4 3 5 6 4 4 3 6 ...
##  $ C4       : num  3 4 5 2 4 6 5 5 3 5 ...
##  $ C5       : num  3 3 2 2 5 4 4 3 2 6 ...
##  $ E1       : num  4 6 5 2 5 5 3 4 2 5 ...
##  $ E2       : num  4 6 3 4 5 6 4 1 4 5 ...
##  $ E3       : int  3 6 4 4 5 6 4 4 NA 4 ...
##  $ E4       : int  4 4 4 4 4 5 5 2 4 5 ...
##  $ E5       : int  4 3 5 4 5 6 5 1 3 5 ...
##  $ N1       : int  3 3 4 2 2 3 1 6 5 5 ...
##  $ N2       : int  4 3 5 5 3 5 2 3 5 5 ...
##  $ N3       : int  2 3 4 2 4 2 2 2 2 5 ...
##  $ N4       : int  2 5 2 4 4 2 1 6 3 2 ...
##  $ N5       : int  3 5 3 1 3 3 1 4 3 4 ...
##  $ O1       : int  3 4 4 3 3 4 5 3 6 5 ...
##  $ O2       : num  1 5 5 4 4 4 5 5 1 6 ...
##  $ O3       : int  3 4 5 4 4 5 5 4 6 5 ...
##  $ O4       : int  4 3 5 3 3 6 6 5 6 5 ...
##  $ O5       : num  4 4 5 2 4 6 6 4 6 5 ...
##  $ gender   : int  1 2 2 2 1 2 1 1 1 2 ...
##  $ education: int  NA NA NA NA NA 3 NA 2 1 NA ...
##  $ age      : int  16 18 17 17 17 21 18 19 19 17 ...
head(bfi)
##       A1 A2 A3 A4 A5 C1 C2 C3 C4 C5 E1 E2 E3 E4 E5 N1 N2 N3 N4 N5 O1 O2 O3 O4
## 61617  5  4  3  4  4  2  3  3  3  3  4  4  3  4  4  3  4  2  2  3  3  1  3  4
## 61618  5  4  5  2  5  5  4  4  4  3  6  6  6  4  3  3  3  3  5  5  4  5  4  3
## 61620  2  4  5  4  4  4  5  4  5  2  5  3  4  4  5  4  5  4  2  3  4  5  5  5
## 61621  3  4  6  5  5  4  4  3  2  2  2  4  4  4  4  2  5  2  4  1  3  4  4  3
## 61622  5  3  3  4  5  4  4  5  4  5  5  5  5  4  5  2  3  4  4  3  3  4  4  3
## 61623  1  6  5  6  5  6  6  6  6  4  5  6  6  5  6  3  5  2  2  3  4  4  5  6
##       O5 gender education age
## 61617  4      1        NA  16
## 61618  4      2        NA  18
## 61620  5      2        NA  17
## 61621  2      2        NA  17
## 61622  4      1        NA  17
## 61623  6      2         3  21
summary(bfi)
##        A1              A2              A3              A4            A5      
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.0   Min.   :1.00  
##  1st Qu.:4.000   1st Qu.:4.000   1st Qu.:4.000   1st Qu.:4.0   1st Qu.:4.00  
##  Median :5.000   Median :5.000   Median :5.000   Median :5.0   Median :5.00  
##  Mean   :4.587   Mean   :4.802   Mean   :4.604   Mean   :4.7   Mean   :4.56  
##  3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.0   3rd Qu.:5.00  
##  Max.   :6.000   Max.   :6.000   Max.   :6.000   Max.   :6.0   Max.   :6.00  
##  NA's   :16      NA's   :27      NA's   :26      NA's   :19    NA's   :16    
##        C1              C2             C3              C4              C5       
##  Min.   :1.000   Min.   :1.00   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:4.00   1st Qu.:4.000   1st Qu.:3.000   1st Qu.:2.000  
##  Median :5.000   Median :5.00   Median :5.000   Median :5.000   Median :4.000  
##  Mean   :4.502   Mean   :4.37   Mean   :4.304   Mean   :4.447   Mean   :3.703  
##  3rd Qu.:5.000   3rd Qu.:5.00   3rd Qu.:5.000   3rd Qu.:6.000   3rd Qu.:5.000  
##  Max.   :6.000   Max.   :6.00   Max.   :6.000   Max.   :6.000   Max.   :6.000  
##  NA's   :21      NA's   :24     NA's   :20      NA's   :26      NA's   :16     
##        E1              E2              E3              E4       
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:3.000   1st Qu.:3.000   1st Qu.:4.000  
##  Median :4.000   Median :4.000   Median :4.000   Median :5.000  
##  Mean   :4.026   Mean   :3.858   Mean   :4.001   Mean   :4.422  
##  3rd Qu.:5.000   3rd Qu.:5.000   3rd Qu.:5.000   3rd Qu.:6.000  
##  Max.   :6.000   Max.   :6.000   Max.   :6.000   Max.   :6.000  
##  NA's   :23      NA's   :16      NA's   :25      NA's   :9      
##        E5              N1              N2              N3       
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000  
##  Median :5.000   Median :3.000   Median :4.000   Median :3.000  
##  Mean   :4.416   Mean   :2.929   Mean   :3.508   Mean   :3.217  
##  3rd Qu.:5.000   3rd Qu.:4.000   3rd Qu.:5.000   3rd Qu.:4.000  
##  Max.   :6.000   Max.   :6.000   Max.   :6.000   Max.   :6.000  
##  NA's   :21      NA's   :22      NA's   :21      NA's   :11     
##        N4              N5             O1              O2              O3       
##  Min.   :1.000   Min.   :1.00   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:2.00   1st Qu.:4.000   1st Qu.:3.000   1st Qu.:4.000  
##  Median :3.000   Median :3.00   Median :5.000   Median :5.000   Median :5.000  
##  Mean   :3.186   Mean   :2.97   Mean   :4.816   Mean   :4.287   Mean   :4.438  
##  3rd Qu.:4.000   3rd Qu.:4.00   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:5.000  
##  Max.   :6.000   Max.   :6.00   Max.   :6.000   Max.   :6.000   Max.   :6.000  
##  NA's   :36      NA's   :29     NA's   :22                      NA's   :28     
##        O4              O5           gender        education         age       
##  Min.   :1.000   Min.   :1.00   Min.   :1.000   Min.   :1.00   Min.   : 3.00  
##  1st Qu.:4.000   1st Qu.:4.00   1st Qu.:1.000   1st Qu.:3.00   1st Qu.:20.00  
##  Median :5.000   Median :5.00   Median :2.000   Median :3.00   Median :26.00  
##  Mean   :4.892   Mean   :4.51   Mean   :1.672   Mean   :3.19   Mean   :28.78  
##  3rd Qu.:6.000   3rd Qu.:6.00   3rd Qu.:2.000   3rd Qu.:4.00   3rd Qu.:35.00  
##  Max.   :6.000   Max.   :6.00   Max.   :2.000   Max.   :5.00   Max.   :86.00  
##  NA's   :14      NA's   :20                     NA's   :223
bfi <- bfi[, 1:25]
sum(is.na(bfi))
## [1] 508

看看基本統計量

my_summary <- function(x) {
 require(moments)
 funs <- c(mean, sd, skewness, kurtosis)
 sapply(funs, function(f) f(x, na.rm = T))
 }
bfi_desc <- apply(bfi, 2, my_summary)
head(bfi_desc)
##              A1        A2        A3        A4        A5         C1         C2
## [1,]  4.5865661  4.802380  4.603821  4.699748  4.560345  4.5023390  4.3699568
## [2,]  1.4077372  1.172020  1.301834  1.479633  1.258512  1.2413465  1.3183465
## [3,] -0.8254883 -1.124894 -0.998997 -1.031499 -0.847690 -0.8551631 -0.7422207
## [4,]  2.6942957  4.057765  3.444524  3.042640  3.161176  3.3068088  2.8656243
##              C3         C4          C5         E1         E2         E3
## [1,]  4.3039568  4.4466474  3.70330460  4.0255672  3.8581178  4.0007207
## [2,]  1.2885518  1.3751181  1.62854187  1.6315055  1.6052103  1.3527188
## [3,] -0.6918287 -0.5964955 -0.06620282 -0.3736569 -0.2209396 -0.4706335
## [4,]  2.8697332  2.3802970  1.78461246  1.9090390  1.8526925  2.5367154
##              E4        E5        N1          N2        N3        N4        N5
## [1,]  4.4224292  4.416337 2.9290857  3.50773660 3.2165651 3.1856006 2.9696860
## [2,]  1.4575174  1.334768 1.5709175  1.52594359 1.6029021 1.5696851 1.6186474
## [3,] -0.8241831 -0.777486 0.3714298 -0.07698521 0.1506797 0.1969966 0.3744599
## [4,]  2.6977079  2.908401 1.9885722  1.95035250 1.8227046 1.9090309 1.9401121
##              O1        O2         O3        O4         O5
## [1,]  4.8160547  4.286786  4.4383117  4.892319  4.5104317
## [2,]  1.1295303  1.565152  1.2209011  1.221250  1.3279590
## [3,] -0.8973669 -0.585679 -0.7730516 -1.218247 -0.7384818
## [4,]  3.4277033  2.188889  3.3043641  4.082686  2.7630094
rownames(bfi_desc) <- c("mean", "sd", "skewness", "kurtosis")
rslt1 <- as.data.frame(t(bfi_desc))
rslt1 |> knitr::kable()
mean sd skewness kurtosis
A1 4.586566 1.407737 -0.8254883 2.694296
A2 4.802380 1.172020 -1.1248938 4.057765
A3 4.603821 1.301834 -0.9989970 3.444524
A4 4.699748 1.479633 -1.0314991 3.042640
A5 4.560345 1.258512 -0.8476900 3.161176
C1 4.502339 1.241346 -0.8551631 3.306809
C2 4.369957 1.318347 -0.7422207 2.865624
C3 4.303957 1.288552 -0.6918287 2.869733
C4 4.446647 1.375118 -0.5964955 2.380297
C5 3.703305 1.628542 -0.0662028 1.784612
E1 4.025567 1.631506 -0.3736569 1.909039
E2 3.858118 1.605210 -0.2209396 1.852693
E3 4.000721 1.352719 -0.4706335 2.536715
E4 4.422429 1.457517 -0.8241831 2.697708
E5 4.416337 1.334768 -0.7774860 2.908401
N1 2.929086 1.570917 0.3714298 1.988572
N2 3.507737 1.525944 -0.0769852 1.950352
N3 3.216565 1.602902 0.1506797 1.822705
N4 3.185601 1.569685 0.1969966 1.909031
N5 2.969686 1.618647 0.3744599 1.940112
O1 4.816055 1.129530 -0.8973669 3.427703
O2 4.286786 1.565152 -0.5856790 2.188889
O3 4.438312 1.220901 -0.7730516 3.304364
O4 4.892319 1.221250 -1.2182471 4.082686
O5 4.510432 1.327959 -0.7384818 2.763009
bfil_desc <- melt(bfi_desc)
names(bfil_desc)[1:2] <- c("moments", "items")
head(bfil_desc)
##    moments items      value
## 1     mean    A1  4.5865661
## 2       sd    A1  1.4077372
## 3 skewness    A1 -0.8254883
## 4 kurtosis    A1  2.6942957
## 5     mean    A2  4.8023801
## 6       sd    A2  1.1720199
ggplot(data = subset(bfil_desc, moments == "mean"),
 aes(x = reorder(items, value, max), y = value, group = moments)) +
 geom_point(size = 2) +
 geom_hline(yintercept = mean(t(bfi_desc["mean",])) +
 c(-1.5, 0, 1.5) * sd(t(bfi_desc["mean", ])), linetype = "dashed") +
 coord_flip() +
 labs(x = "items",  y = "mean") +
  theme_bw()

ggplot(data = subset(bfil_desc, moments == "skewness"),
 aes(x = reorder(items, value, max), y = value, group = moments)) +
 geom_point(size = 2) +
 geom_hline(yintercept = mean(t(bfi_desc["skewness",])) +
 c(-1.5, 0, 1.5) * sd(t(bfi_desc["skewness", ])), linetype = "dashed") +
 coord_flip() +
 labs(x = "items",  y = "skewness") +
  theme_bw()

ggplot(data = subset(bfil_desc, moments == "kurtosis"),
 aes(x = reorder(items, value, max), y = value, group = moments)) +
 geom_point(size = 2) +
 geom_hline(yintercept = mean(t(bfi_desc["kurtosis",])) +
 c(-1.5, 0, 1.5) * sd(t(bfi_desc["kurtosis", ])), linetype = "dashed") +
 coord_flip() +
 labs(x = "items",  y = "kurtosis") +
  theme_bw()

去除遺漏值

nabfi <- na.omit(bfi)

鑑別度指標

計算區變度,以總分為主,選取低分組與高分組,比較各題在兩組上的差異。

nabfi$tot <- apply(nabfi, 1, sum)
nabfi$grp <- NA
nabfi$grp[rank(nabfi$tot) < 2800*.27] <- "L"
nabfi$grp[rank(nabfi$tot) > 2800*.73] <- "H"
nabfi$grp <- factor(nabfi$grp)
head(nabfi)
##       A1 A2 A3 A4 A5 C1 C2 C3 C4 C5 E1 E2 E3 E4 E5 N1 N2 N3 N4 N5 O1 O2 O3 O4
## 61617  5  4  3  4  4  2  3  3  3  3  4  4  3  4  4  3  4  2  2  3  3  1  3  4
## 61618  5  4  5  2  5  5  4  4  4  3  6  6  6  4  3  3  3  3  5  5  4  5  4  3
## 61620  2  4  5  4  4  4  5  4  5  2  5  3  4  4  5  4  5  4  2  3  4  5  5  5
## 61621  3  4  6  5  5  4  4  3  2  2  2  4  4  4  4  2  5  2  4  1  3  4  4  3
## 61622  5  3  3  4  5  4  4  5  4  5  5  5  5  4  5  2  3  4  4  3  3  4  4  3
## 61623  1  6  5  6  5  6  6  6  6  4  5  6  6  5  6  3  5  2  2  3  4  4  5  6
##       O5 tot  grp
## 61617  4  82    L
## 61618  4 105 <NA>
## 61620  5 102 <NA>
## 61621  2  86    L
## 61622  4 100 <NA>
## 61623  6 119    H

算高低分組平均數

bfim <- aggregate(nabfi[, 1:25], by=list(nabfi$grp), mean)
print(bfim)
##   Group.1       A1       A2       A3       A4       A5       C1       C2
## 1       H 5.146341 5.569106 5.533875 5.455285 5.430894 5.284553 5.235772
## 2       L 4.208556 4.117647 3.796791 4.018717 3.848930 3.911765 3.676471
##        C3       C4       C5       E1       E2       E3       E4       E5
## 1 4.95122 5.268293 4.512195 4.883469 4.859079 5.102981 5.246612 5.371274
## 2 3.76738 3.795455 3.016043 3.106952 2.937166 3.097594 3.518717 3.493316
##         N1       N2       N3       N4       N5       O1       O2       O3
## 1 3.368564 3.981030 3.785908 3.311653 3.279133 5.479675 4.989160 5.346883
## 2 2.680481 3.258021 2.891711 3.258021 2.739305 4.287433 3.902406 3.754011
##         O4       O5
## 1 5.447154 5.211382
## 2 4.660428 4.100267

將第一列刪除

bfim <- t(bfim[, -1])

t-test

item_t <- sapply(nabfi[, 1:25], function(x) t.test(x ~ nabfi$grp)$statistic)
print(item_t)
##       A1.t       A2.t       A3.t       A4.t       A5.t       C1.t       C2.t 
## 11.1945059 24.6592025 27.2286394 18.1181944 23.9859251 20.1784506 22.8399194 
##       C3.t       C4.t       C5.t       E1.t       E2.t       E3.t       E4.t 
## 15.1728244 19.2355084 15.1650148 19.2682311 21.7245113 29.8583238 22.0618203 
##       E5.t       N1.t       N2.t       N3.t       N4.t       N5.t       O1.t 
## 27.9870044  6.5989063  7.3359003  8.8548083  0.5245383  5.0110134 19.8655575 
##       O2.t       O3.t       O4.t       O5.t 
## 11.6591368 25.9396177 11.9439768 15.1045027
rslt2 <- data.frame(Item = rownames(bfim), low.mean.score = bfim[, 2], 
                    high.mean.score = bfim[, 1], mean.dif = bfim[, 1]-bfim[,2], 
                    t.value = item_t)

rslt2 |> knitr::kable()
Item low.mean.score high.mean.score mean.dif t.value
A1 A1 4.208556 5.146342 0.9377853 11.1945059
A2 A2 4.117647 5.569106 1.4514586 24.6592025
A3 A3 3.796791 5.533875 1.7370839 27.2286394
A4 A4 4.018717 5.455285 1.4365680 18.1181944
A5 A5 3.848930 5.430894 1.5819638 23.9859251
C1 C1 3.911765 5.284553 1.3727881 20.1784506
C2 C2 3.676471 5.235772 1.5593018 22.8399194
C3 C3 3.767380 4.951219 1.1838398 15.1728244
C4 C4 3.795454 5.268293 1.4728381 19.2355084
C5 C5 3.016043 4.512195 1.4961523 15.1650148
E1 E1 3.106952 4.883469 1.7765170 19.2682311
E2 E2 2.937166 4.859079 1.9219128 21.7245113
E3 E3 3.097594 5.102981 2.0053874 29.8583238
E4 E4 3.518717 5.246613 1.7278959 22.0618203
E5 E5 3.493316 5.371274 1.8779582 27.9870044
N1 N1 2.680481 3.368564 0.6880824 6.5989063
N2 N2 3.258021 3.981030 0.7230084 7.3359003
N3 N3 2.891711 3.785908 0.8941966 8.8548083
N4 N4 3.258021 3.311653 0.0536317 0.5245383
N5 N5 2.739305 3.279133 0.5398280 5.0110134
O1 O1 4.287433 5.479675 1.1922416 19.8655575
O2 O2 3.902406 4.989160 1.0867535 11.6591368
O3 O3 3.754011 5.346883 1.5928728 25.9396177
O4 O4 4.660428 5.447154 0.7867267 11.9439768
O5 O5 4.100267 5.211382 1.1111147 15.1045027
ggplot(data = rslt2, aes(x = reorder(Item, t.value, max), y = t.value)) +
 geom_point() +
 geom_hline(yintercept = 2, linetype = "dashed") +
 coord_flip() +
 labs(x = "Items", y = "t-value") +
 theme_bw()

項目相關

itotr <- psych::alpha(bfi[, 1:25])$item.stats[, "r.drop"]
## Some items ( N1 N2 N3 N4 N5 O4 ) were negatively correlated with the total scale and 
## probably should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option
lbfi <- list(x = nabfi[, 1:5], y  = nabfi[, 6:10], z = nabfi[ , 11:15], a = nabfi[ , 16:20], b = nabfi[ , 21:25])
isubalpha <- lapply(lbfi, psych::alpha)
isubr <- c(isubalpha$x$item.stats[, "r.drop"],
             isubalpha$y$item.stats[, "r.drop"],
             isubalpha$z$item.stats[, "r.drop"], 
           isubalpha$a$item.stats[, "r.drop"], 
           isubalpha$b$item.stats[, "r.drop"])
rslt3 <- as.data.frame(t(rbind(itotr, isubr)))
names(rslt3) <- c("項目總分相關", "分量表項目總分相關")

rslt3 |> knitr::kable()
項目總分相關 分量表項目總分相關
0.1338772 0.3190962
0.4329093 0.5759233
0.4465656 0.6035693
0.2810327 0.4145254
0.3935261 0.5004352
0.3072427 0.4654163
0.3573829 0.5128535
0.2590389 0.4769297
0.2668326 0.5731250
0.2134501 0.4860793
0.3011816 0.5153693
0.3132431 0.6142087
0.4642659 0.5049821
0.3699658 0.5827738
0.4670182 0.4634332
0.0529007 0.6778437
0.0677008 0.6548330
0.0951782 0.6781411
-0.1043410 0.5485366
0.0195477 0.4874632
0.3172428 0.3981233
0.0963448 0.3509392
0.4134673 0.4546552
0.1310377 0.2167170
0.1861993 0.4197456

題目信度

itotalpha <- psych::alpha(nabfi[, 1:25])$alpha.drop[, "raw_alpha"]
## Warning in psych::alpha(nabfi[, 1:25]): Some items were negatively correlated with the total scale and probably 
## should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option
## Some items ( N1 N2 N3 N4 N5 O4 ) were negatively correlated with the total scale and 
## probably should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option
ialphad <- c(isubalpha$x$alpha.drop[, "raw_alpha"],
             isubalpha$y$alpha.drop[, "raw_alpha"],
             isubalpha$z$alpha.drop[, "raw_alpha"],
             isubalpha$a$item.stats[, "r.drop"], 
           isubalpha$b$item.stats[, "r.drop"])
rslt4 <- as.data.frame(t(rbind(itotalpha, ialphad)))
names(rslt4) <- c("Main Reliability(item drop)", "Sub Reliability (item drop)")
rslt4 |> knitr::kable()
Main Reliability(item drop) Sub Reliability (item drop)
0.6979605 0.7314607
0.6762664 0.6332003
0.6734577 0.6150842
0.6850740 0.6963136
0.6783576 0.6582421
0.6833182 0.7044913
0.6801730 0.6869874
0.6885173 0.7000899
0.6868631 0.6630853
0.6925096 0.7031818
0.6840630 0.7312733
0.6821853 0.6924954
0.6712580 0.7329196
0.6781666 0.7056423
0.6714425 0.7457366
0.7066986 0.6778437
0.7048568 0.6548330
0.7032029 0.6781411
0.7199257 0.5485366
0.7107374 0.4874632
0.6852134 0.3981233
0.7017610 0.3509392
0.6773863 0.4546552
0.6979447 0.2167170
0.6938900 0.4197456