library(psych)
library(moments)
library(lavaan)
## This is lavaan 0.6-11
## lavaan is FREE software! Please report any bugs.
## 
## 載入套件:'lavaan'
## 下列物件被遮斷自 'package:psych':
## 
##     cor2cov
library(reshape2)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.9
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x ggplot2::%+%()   masks psych::%+%()
## x ggplot2::alpha() masks psych::alpha()
## x dplyr::filter()  masks stats::filter()
## x dplyr::lag()     masks stats::lag()
data(bfi)
str(bfi)
## 'data.frame':    2800 obs. of  28 variables:
##  $ A1       : int  2 2 5 4 2 6 2 4 4 2 ...
##  $ A2       : int  4 4 4 4 3 6 5 3 3 5 ...
##  $ A3       : int  3 5 5 6 3 5 5 1 6 6 ...
##  $ A4       : int  4 2 4 5 4 6 3 5 3 6 ...
##  $ A5       : int  4 5 4 5 5 5 5 1 3 5 ...
##  $ C1       : int  2 5 4 4 4 6 5 3 6 6 ...
##  $ C2       : int  3 4 5 4 4 6 4 2 6 5 ...
##  $ C3       : int  3 4 4 3 5 6 4 4 3 6 ...
##  $ C4       : int  4 3 2 5 3 1 2 2 4 2 ...
##  $ C5       : int  4 4 5 5 2 3 3 4 5 1 ...
##  $ E1       : int  3 1 2 5 2 2 4 3 5 2 ...
##  $ E2       : int  3 1 4 3 2 1 3 6 3 2 ...
##  $ E3       : int  3 6 4 4 5 6 4 4 NA 4 ...
##  $ E4       : int  4 4 4 4 4 5 5 2 4 5 ...
##  $ E5       : int  4 3 5 4 5 6 5 1 3 5 ...
##  $ N1       : int  3 3 4 2 2 3 1 6 5 5 ...
##  $ N2       : int  4 3 5 5 3 5 2 3 5 5 ...
##  $ N3       : int  2 3 4 2 4 2 2 2 2 5 ...
##  $ N4       : int  2 5 2 4 4 2 1 6 3 2 ...
##  $ N5       : int  3 5 3 1 3 3 1 4 3 4 ...
##  $ O1       : int  3 4 4 3 3 4 5 3 6 5 ...
##  $ O2       : int  6 2 2 3 3 3 2 2 6 1 ...
##  $ O3       : int  3 4 5 4 4 5 5 4 6 5 ...
##  $ O4       : int  4 3 5 3 3 6 6 5 6 5 ...
##  $ O5       : int  3 3 2 5 3 1 1 3 1 2 ...
##  $ gender   : int  1 2 2 2 1 2 1 1 1 2 ...
##  $ education: int  NA NA NA NA NA 3 NA 2 1 NA ...
##  $ age      : int  16 18 17 17 17 21 18 19 19 17 ...
bfi[, c(1,9,10,11,12,22)]<- 7-bfi[, c(1,9,10,11,12,22)]
bfi <-  bfi[ , 1:25]
head(bfi)
##       A1 A2 A3 A4 A5 C1 C2 C3 C4 C5 E1 E2 E3 E4 E5 N1 N2 N3 N4 N5 O1 O2 O3 O4
## 61617  5  4  3  4  4  2  3  3  3  3  4  4  3  4  4  3  4  2  2  3  3  1  3  4
## 61618  5  4  5  2  5  5  4  4  4  3  6  6  6  4  3  3  3  3  5  5  4  5  4  3
## 61620  2  4  5  4  4  4  5  4  5  2  5  3  4  4  5  4  5  4  2  3  4  5  5  5
## 61621  3  4  6  5  5  4  4  3  2  2  2  4  4  4  4  2  5  2  4  1  3  4  4  3
## 61622  5  3  3  4  5  4  4  5  4  5  5  5  5  4  5  2  3  4  4  3  3  4  4  3
## 61623  1  6  5  6  5  6  6  6  6  4  5  6  6  5  6  3  5  2  2  3  4  4  5  6
##       O5
## 61617  3
## 61618  3
## 61620  2
## 61621  5
## 61622  3
## 61623  1
my_summary <- function(x) {
 require(moments)
 funs <- c(mean, sd, skewness, kurtosis)
 sapply(funs, function(f) f(x, na.rm = T))
 }
bfi_desc <- apply(bfi, 2, my_summary)
head(bfi_desc)
##              A1        A2        A3        A4        A5         C1         C2
## [1,]  4.5865661  4.802380  4.603821  4.699748  4.560345  4.5023390  4.3699568
## [2,]  1.4077372  1.172020  1.301834  1.479633  1.258512  1.2413465  1.3183465
## [3,] -0.8254883 -1.124894 -0.998997 -1.031499 -0.847690 -0.8551631 -0.7422207
## [4,]  2.6942957  4.057765  3.444524  3.042640  3.161176  3.3068088  2.8656243
##              C3         C4          C5         E1         E2         E3
## [1,]  4.3039568  4.4466474  3.70330460  4.0255672  3.8581178  4.0007207
## [2,]  1.2885518  1.3751181  1.62854187  1.6315055  1.6052103  1.3527188
## [3,] -0.6918287 -0.5964955 -0.06620282 -0.3736569 -0.2209396 -0.4706335
## [4,]  2.8697332  2.3802970  1.78461246  1.9090390  1.8526925  2.5367154
##              E4        E5        N1          N2        N3        N4        N5
## [1,]  4.4224292  4.416337 2.9290857  3.50773660 3.2165651 3.1856006 2.9696860
## [2,]  1.4575174  1.334768 1.5709175  1.52594359 1.6029021 1.5696851 1.6186474
## [3,] -0.8241831 -0.777486 0.3714298 -0.07698521 0.1506797 0.1969966 0.3744599
## [4,]  2.6977079  2.908401 1.9885722  1.95035250 1.8227046 1.9090309 1.9401121
##              O1        O2         O3        O4        O5
## [1,]  4.8160547  4.286786  4.4383117  4.892319 2.4895683
## [2,]  1.1295303  1.565152  1.2209011  1.221250 1.3279590
## [3,] -0.8973669 -0.585679 -0.7730516 -1.218247 0.7384818
## [4,]  3.4277033  2.188889  3.3043641  4.082686 2.7630094
rownames(bfi_desc) <- c("mean", "sd", "skewness", "kurtosis")
rslt1 <- as.data.frame(t(bfi_desc))
rslt1 |> knitr::kable()
mean sd skewness kurtosis
A1 4.586566 1.407737 -0.8254883 2.694296
A2 4.802380 1.172020 -1.1248938 4.057765
A3 4.603821 1.301834 -0.9989970 3.444524
A4 4.699748 1.479633 -1.0314991 3.042640
A5 4.560345 1.258512 -0.8476900 3.161176
C1 4.502339 1.241346 -0.8551631 3.306809
C2 4.369957 1.318347 -0.7422207 2.865624
C3 4.303957 1.288552 -0.6918287 2.869733
C4 4.446647 1.375118 -0.5964955 2.380297
C5 3.703305 1.628542 -0.0662028 1.784612
E1 4.025567 1.631506 -0.3736569 1.909039
E2 3.858118 1.605210 -0.2209396 1.852693
E3 4.000721 1.352719 -0.4706335 2.536715
E4 4.422429 1.457517 -0.8241831 2.697708
E5 4.416337 1.334768 -0.7774860 2.908401
N1 2.929086 1.570917 0.3714298 1.988572
N2 3.507737 1.525944 -0.0769852 1.950352
N3 3.216565 1.602902 0.1506797 1.822705
N4 3.185601 1.569685 0.1969966 1.909031
N5 2.969686 1.618647 0.3744599 1.940112
O1 4.816055 1.129530 -0.8973669 3.427703
O2 4.286786 1.565152 -0.5856790 2.188889
O3 4.438312 1.220901 -0.7730516 3.304364
O4 4.892319 1.221250 -1.2182471 4.082686
O5 2.489568 1.327959 0.7384818 2.763009
bfil_desc <- melt(bfi_desc)
names(bfil_desc)[1:2] <- c("moments", "items")
head(bfil_desc)
##    moments items      value
## 1     mean    A1  4.5865661
## 2       sd    A1  1.4077372
## 3 skewness    A1 -0.8254883
## 4 kurtosis    A1  2.6942957
## 5     mean    A2  4.8023801
## 6       sd    A2  1.1720199
ggplot(data = subset(bfil_desc, moments == "mean"),
 aes(x = reorder(items, value, max), y = value, group = moments)) +
 geom_point(size = 3) +
 geom_hline(yintercept = mean(t(bfi_desc["mean",])) +
 c(-1.5, 0, 1.5) * sd(t(bfi_desc["mean", ])), linetype = "dashed") +
 coord_flip() +
 labs(x = "items",  y = "mean") +
  theme_bw()

ggplot(data = subset(bfil_desc, moments == "sd"),
 aes(x = reorder(items, value, max), y = value, group = moments)) +
 geom_point(size = 3) +
 geom_hline(yintercept = mean(t(bfi_desc["sd",])) +
 c(-1.5, 0, 1.5) * sd(t(bfi_desc["sd", ])), linetype = "dashed") +
 coord_flip() +
 labs(x = "items",  y = "sd") +
  theme_bw()

ggplot(data = subset(bfil_desc, moments == "skewness"),
 aes(x = reorder(items, value, max), y = value, group = moments)) +
 geom_point(size = 3) +
 geom_hline(yintercept = mean(t(bfi_desc["skewness",])) +
 c(-1.5, 0, 1.5) * sd(t(bfi_desc["skewness", ])), linetype = "dashed") +
 coord_flip() +
 labs(x = "items",  y = "skewness") +
  theme_bw()

ggplot(data = subset(bfil_desc, moments == "kurtosis"),
 aes(x = reorder(items, value, max), y = value, group = moments)) +
 geom_point(size = 3) +
 geom_hline(yintercept = mean(t(bfi_desc["kurtosis",])) +
 c(-1.5, 0, 1.5) * sd(t(bfi_desc["kurtosis", ])), linetype = "dashed") +
 coord_flip() +
 labs(x = "items",  y = "kurtosis") +
  theme_bw()

Discrimination index 計算區變度

#先去掉原資料的遺漏值(NA)

bfi = na.omit(bfi)
str(bfi)
## 'data.frame':    2436 obs. of  25 variables:
##  $ A1: num  5 5 2 3 5 1 5 3 5 3 ...
##  $ A2: int  4 4 4 4 3 6 5 3 5 4 ...
##  $ A3: int  3 5 5 6 3 5 5 1 6 5 ...
##  $ A4: int  4 2 4 5 4 6 3 5 6 6 ...
##  $ A5: int  4 5 4 5 5 5 5 1 5 5 ...
##  $ C1: int  2 5 4 4 4 6 5 3 6 4 ...
##  $ C2: int  3 4 5 4 4 6 4 2 5 3 ...
##  $ C3: int  3 4 4 3 5 6 4 4 6 5 ...
##  $ C4: num  3 4 5 2 4 6 5 5 5 4 ...
##  $ C5: num  3 3 2 2 5 4 4 3 6 5 ...
##  $ E1: num  4 6 5 2 5 5 3 4 5 6 ...
##  $ E2: num  4 6 3 4 5 6 4 1 5 4 ...
##  $ E3: int  3 6 4 4 5 6 4 4 4 2 ...
##  $ E4: int  4 4 4 4 4 5 5 2 5 5 ...
##  $ E5: int  4 3 5 4 5 6 5 1 5 4 ...
##  $ N1: int  3 3 4 2 2 3 1 6 5 3 ...
##  $ N2: int  4 3 5 5 3 5 2 3 5 3 ...
##  $ N3: int  2 3 4 2 4 2 2 2 5 4 ...
##  $ N4: int  2 5 2 4 4 2 1 6 2 2 ...
##  $ N5: int  3 5 3 1 3 3 1 4 4 3 ...
##  $ O1: int  3 4 4 3 3 4 5 3 5 5 ...
##  $ O2: num  1 5 5 4 4 4 5 5 6 4 ...
##  $ O3: int  3 4 5 4 4 5 5 4 5 5 ...
##  $ O4: int  4 3 5 3 3 6 6 5 5 6 ...
##  $ O5: int  3 3 2 5 3 1 1 3 2 3 ...
##  - attr(*, "na.action")= 'omit' Named int [1:364] 9 12 35 42 63 66 72 90 101 107 ...
##   ..- attr(*, "names")= chr [1:364] "61630" "61636" "61684" "61693" ...

#去除遺漏值後,資料剩下2436筆

bfi$tot <- apply(bfi, 1, sum)
bfi$grp <- NA
bfi$grp[rank(bfi$tot) < 2436*.3] <- "L"
bfi$grp[rank(bfi$tot) > 2436*.7] <- "H"
bfi$grp <- factor(bfi$grp)
head(bfi)
##       A1 A2 A3 A4 A5 C1 C2 C3 C4 C5 E1 E2 E3 E4 E5 N1 N2 N3 N4 N5 O1 O2 O3 O4
## 61617  5  4  3  4  4  2  3  3  3  3  4  4  3  4  4  3  4  2  2  3  3  1  3  4
## 61618  5  4  5  2  5  5  4  4  4  3  6  6  6  4  3  3  3  3  5  5  4  5  4  3
## 61620  2  4  5  4  4  4  5  4  5  2  5  3  4  4  5  4  5  4  2  3  4  5  5  5
## 61621  3  4  6  5  5  4  4  3  2  2  2  4  4  4  4  2  5  2  4  1  3  4  4  3
## 61622  5  3  3  4  5  4  4  5  4  5  5  5  5  4  5  2  3  4  4  3  3  4  4  3
## 61623  1  6  5  6  5  6  6  6  6  4  5  6  6  5  6  3  5  2  2  3  4  4  5  6
##       O5 tot  grp
## 61617  3  81    L
## 61618  3 104 <NA>
## 61620  2  99 <NA>
## 61621  5  89    L
## 61622  3  99 <NA>
## 61623  1 114    H

算高低分組平均數

bfim <- aggregate(bfi[, 1:25], by=list(bfi$grp), mean)
print(bfim)
##   Group.1       A1       A2       A3       A4       A5       C1       C2
## 1       H 4.977483 5.417219 5.329801 5.307285 5.202649 5.092715 5.078146
## 2       L 4.206989 4.091398 3.750000 3.951613 3.845430 3.944892 3.672043
##         C3       C4       C5       E1       E2       E3       E4      E5
## 1 4.859603 4.984106 4.313907 4.699338 4.574834 4.814570 5.115232 5.18543
## 2 3.727151 3.864247 3.029570 3.115591 2.963710 3.119624 3.475806 3.49328
##         N1       N2       N3       N4       N5       O1       O2       O3
## 1 3.344371 3.927152 3.649007 3.282119 3.319205 5.278146 4.659603 5.062252
## 2 2.610215 3.223118 2.814516 3.197581 2.690860 4.354839 4.016129 3.838710
##         O4      O5
## 1 5.256954 2.33245
## 2 4.684140 2.56586

將第一欄刪除

bfim <- t(bfim[, -1])

t-test

item_t <- sapply(bfi[, 1:25], function(x) t.test(x ~ bfi$grp)$statistic)
print(item_t)
##      A1.t      A2.t      A3.t      A4.t      A5.t      C1.t      C2.t      C3.t 
## 10.815628 23.821714 26.464655 18.801098 22.747181 18.698602 22.526249 17.767133 
##      C4.t      C5.t      E1.t      E2.t      E3.t      E4.t      E5.t      N1.t 
## 16.402713 16.029028 20.396905 21.062974 28.139909 23.411651 27.611165  9.155108 
##      N2.t      N3.t      N4.t      N5.t      O1.t      O2.t      O3.t      O4.t 
##  9.066525 10.300246  1.031340  7.411300 16.415663  7.949024 20.746077  9.439044 
##      O5.t 
## -3.402291
rslt2 <- data.frame(Item = rownames(bfim), low.mean.score = bfim[, 2], 
high.mean.score = bfim[, 1], mean.dif = bfim[, 1]-bfim[,2], 
t.value = item_t)

rslt2 |> knitr::kable()
Item low.mean.score high.mean.score mean.dif t.value
A1 A1 4.206989 4.977483 0.7704942 10.815628
A2 A2 4.091398 5.417218 1.3258207 23.821714
A3 A3 3.750000 5.329801 1.5798013 26.464655
A4 A4 3.951613 5.307285 1.3556719 18.801098
A5 A5 3.845430 5.202649 1.3572189 22.747181
C1 C1 3.944892 5.092715 1.1478228 18.698602
C2 C2 3.672043 5.078146 1.4061027 22.526249
C3 C3 3.727151 4.859603 1.1324521 17.767133
C4 C4 3.864247 4.984106 1.1198586 16.402713
C5 C5 3.029570 4.313907 1.2843374 16.029028
E1 E1 3.115591 4.699338 1.5837464 20.396905
E2 E2 2.963710 4.574834 1.6111248 21.062974
E3 E3 3.119624 4.814570 1.6949459 28.139909
E4 E4 3.475807 5.115232 1.6394253 23.411651
E5 E5 3.493280 5.185430 1.6921509 27.611165
N1 N1 2.610215 3.344371 0.7341558 9.155108
N2 N2 3.223118 3.927152 0.7040340 9.066525
N3 N3 2.814516 3.649007 0.8344905 10.300246
N4 N4 3.197581 3.282119 0.0845386 1.031340
N5 N5 2.690860 3.319205 0.6283451 7.411300
O1 O1 4.354839 5.278146 0.9233070 16.415663
O2 O2 4.016129 4.659603 0.6434736 7.949024
O3 O3 3.838710 5.062252 1.2235420 20.746077
O4 O4 4.684140 5.256954 0.5728139 9.439044
O5 O5 2.565860 2.332450 -0.2334099 -3.402291
ggplot(data = rslt2, aes(x = reorder(Item, t.value, max), y = t.value)) +
 geom_point() +
 geom_hline(yintercept = 2, linetype = "dashed") +
 coord_flip() +
 labs(x = "Items", y = "t-value") +
 theme_bw()

#t value若大於2,表示可以區分高/低分組