library(psych)
library(moments)
library(lavaan)
library(reshape2)
library(tidyverse)
data("bfi")
dta<-(bfi)
bfi[, c(1, 9, 10, 11, 12, 22)]=7-bfi[, c(1, 9, 10, 11, 12, 22)]
dta<-dta[ , 1:25]
#將NA去除(遺漏值)
dtna<-na.omit(dta$grp)
summary(dta)
##        A1              A2              A3              A4            A5      
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.0   Min.   :1.00  
##  1st Qu.:1.000   1st Qu.:4.000   1st Qu.:4.000   1st Qu.:4.0   1st Qu.:4.00  
##  Median :2.000   Median :5.000   Median :5.000   Median :5.0   Median :5.00  
##  Mean   :2.413   Mean   :4.802   Mean   :4.604   Mean   :4.7   Mean   :4.56  
##  3rd Qu.:3.000   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.0   3rd Qu.:5.00  
##  Max.   :6.000   Max.   :6.000   Max.   :6.000   Max.   :6.0   Max.   :6.00  
##  NA's   :16      NA's   :27      NA's   :26      NA's   :19    NA's   :16    
##        C1              C2             C3              C4              C5       
##  Min.   :1.000   Min.   :1.00   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:4.00   1st Qu.:4.000   1st Qu.:1.000   1st Qu.:2.000  
##  Median :5.000   Median :5.00   Median :5.000   Median :2.000   Median :3.000  
##  Mean   :4.502   Mean   :4.37   Mean   :4.304   Mean   :2.553   Mean   :3.297  
##  3rd Qu.:5.000   3rd Qu.:5.00   3rd Qu.:5.000   3rd Qu.:4.000   3rd Qu.:5.000  
##  Max.   :6.000   Max.   :6.00   Max.   :6.000   Max.   :6.000   Max.   :6.000  
##  NA's   :21      NA's   :24     NA's   :20      NA's   :26      NA's   :16     
##        E1              E2              E3              E4       
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:2.000   1st Qu.:3.000   1st Qu.:4.000  
##  Median :3.000   Median :3.000   Median :4.000   Median :5.000  
##  Mean   :2.974   Mean   :3.142   Mean   :4.001   Mean   :4.422  
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:5.000   3rd Qu.:6.000  
##  Max.   :6.000   Max.   :6.000   Max.   :6.000   Max.   :6.000  
##  NA's   :23      NA's   :16      NA's   :25      NA's   :9      
##        E5              N1              N2              N3       
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000  
##  Median :5.000   Median :3.000   Median :4.000   Median :3.000  
##  Mean   :4.416   Mean   :2.929   Mean   :3.508   Mean   :3.217  
##  3rd Qu.:5.000   3rd Qu.:4.000   3rd Qu.:5.000   3rd Qu.:4.000  
##  Max.   :6.000   Max.   :6.000   Max.   :6.000   Max.   :6.000  
##  NA's   :21      NA's   :22      NA's   :21      NA's   :11     
##        N4              N5             O1              O2              O3       
##  Min.   :1.000   Min.   :1.00   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:2.00   1st Qu.:4.000   1st Qu.:1.000   1st Qu.:4.000  
##  Median :3.000   Median :3.00   Median :5.000   Median :2.000   Median :5.000  
##  Mean   :3.186   Mean   :2.97   Mean   :4.816   Mean   :2.713   Mean   :4.438  
##  3rd Qu.:4.000   3rd Qu.:4.00   3rd Qu.:6.000   3rd Qu.:4.000   3rd Qu.:5.000  
##  Max.   :6.000   Max.   :6.00   Max.   :6.000   Max.   :6.000   Max.   :6.000  
##  NA's   :36      NA's   :29     NA's   :22                      NA's   :28     
##        O4              O5      
##  Min.   :1.000   Min.   :1.00  
##  1st Qu.:4.000   1st Qu.:1.00  
##  Median :5.000   Median :2.00  
##  Mean   :4.892   Mean   :2.49  
##  3rd Qu.:6.000   3rd Qu.:3.00  
##  Max.   :6.000   Max.   :6.00  
##  NA's   :14      NA's   :20
my_summary <- function(x) {
 require(moments)
 funs <- c(mean, sd, skewness, kurtosis)
 sapply(funs, function(f) f(x, na.rm = T))
 }
dta_desc <- apply(dta, 2,my_summary)
head(dta_desc)
##             A1        A2        A3        A4        A5         C1         C2
## [1,] 2.4134339  4.802380  4.603821  4.699748  4.560345  4.5023390  4.3699568
## [2,] 1.4077372  1.172020  1.301834  1.479633  1.258512  1.2413465  1.3183465
## [3,] 0.8254883 -1.124894 -0.998997 -1.031499 -0.847690 -0.8551631 -0.7422207
## [4,] 2.6942957  4.057765  3.444524  3.042640  3.161176  3.3068088  2.8656243
##              C3        C4         C5        E1        E2         E3         E4
## [1,]  4.3039568 2.5533526 3.29669540 2.9744328 3.1418822  4.0007207  4.4224292
## [2,]  1.2885518 1.3751181 1.62854187 1.6315055 1.6052103  1.3527188  1.4575174
## [3,] -0.6918287 0.5964955 0.06620282 0.3736569 0.2209396 -0.4706335 -0.8241831
## [4,]  2.8697332 2.3802970 1.78461246 1.9090390 1.8526925  2.5367154  2.6977079
##             E5        N1          N2        N3        N4        N5         O1
## [1,]  4.416337 2.9290857  3.50773660 3.2165651 3.1856006 2.9696860  4.8160547
## [2,]  1.334768 1.5709175  1.52594359 1.6029021 1.5696851 1.6186474  1.1295303
## [3,] -0.777486 0.3714298 -0.07698521 0.1506797 0.1969966 0.3744599 -0.8973669
## [4,]  2.908401 1.9885722  1.95035250 1.8227046 1.9090309 1.9401121  3.4277033
##            O2         O3        O4        O5
## [1,] 2.713214  4.4383117  4.892319 2.4895683
## [2,] 1.565152  1.2209011  1.221250 1.3279590
## [3,] 0.585679 -0.7730516 -1.218247 0.7384818
## [4,] 2.188889  3.3043641  4.082686 2.7630094
rownames(dta_desc) <- c("mean", "sd", "skewness", "kurtosis")
rslt1 <- as.data.frame(t(dta_desc))
rslt1 |> knitr::kable()
mean sd skewness kurtosis
A1 2.413434 1.407737 0.8254883 2.694296
A2 4.802380 1.172020 -1.1248938 4.057765
A3 4.603821 1.301834 -0.9989970 3.444524
A4 4.699748 1.479633 -1.0314991 3.042640
A5 4.560345 1.258512 -0.8476900 3.161176
C1 4.502339 1.241346 -0.8551631 3.306809
C2 4.369957 1.318347 -0.7422207 2.865624
C3 4.303957 1.288552 -0.6918287 2.869733
C4 2.553353 1.375118 0.5964955 2.380297
C5 3.296695 1.628542 0.0662028 1.784612
E1 2.974433 1.631506 0.3736569 1.909039
E2 3.141882 1.605210 0.2209396 1.852693
E3 4.000721 1.352719 -0.4706335 2.536715
E4 4.422429 1.457517 -0.8241831 2.697708
E5 4.416337 1.334768 -0.7774860 2.908401
N1 2.929086 1.570917 0.3714298 1.988572
N2 3.507737 1.525944 -0.0769852 1.950352
N3 3.216565 1.602902 0.1506797 1.822705
N4 3.185601 1.569685 0.1969966 1.909031
N5 2.969686 1.618647 0.3744599 1.940112
O1 4.816055 1.129530 -0.8973669 3.427703
O2 2.713214 1.565152 0.5856790 2.188889
O3 4.438312 1.220901 -0.7730516 3.304364
O4 4.892319 1.221250 -1.2182471 4.082686
O5 2.489568 1.327959 0.7384818 2.763009
dtal_desc <- melt(dta_desc)
names(dtal_desc)[1:2] <- c("moments", "items")
head(dtal_desc)
##    moments items     value
## 1     mean    A1 2.4134339
## 2       sd    A1 1.4077372
## 3 skewness    A1 0.8254883
## 4 kurtosis    A1 2.6942957
## 5     mean    A2 4.8023801
## 6       sd    A2 1.1720199
ggplot(data = subset(dtal_desc, moments == "mean"),
 aes(x = reorder(items, value, max), y = value, group = moments)) +
 geom_point(size = 3) +
 geom_hline(yintercept = mean(t(dta_desc["mean",])) +
 c(-1.5, 0, 1.5) * sd(t(dta_desc["mean", ])), linetype = "dashed") +
 coord_flip() +
 labs(x = "items",  y = "mean") +
  theme_bw()

從此圖可看出,O4有較大的平均數,則受試者在此題的得分普遍較高。

ggplot(data = subset(dtal_desc, moments == "sd"),
 aes(x = reorder(items, value, max), y = value, group = moments)) +
 geom_point(size = 3) +
 geom_hline(yintercept = mean(t(dta_desc["sd",])) +
 c(-1.5, 0, 1.5) * sd(t(dta_desc["sd", ])), linetype = "dashed") +
 coord_flip() +
 labs(x = "items",  y = "sd") +
  theme_bw()

從此圖可看出,E1有較大的標準差,則其受試者的分數變異較大。

ggplot(data = subset(dtal_desc, moments == "skewness"),
 aes(x = reorder(items, value, max), y = value, group = moments)) +
 geom_point(size = 3) +
 geom_hline(yintercept = mean(t(dta_desc["skewness",])) +
 c(-1.5, 0, 1.5) * sd(t(dta_desc["skewness", ])), linetype = "dashed") +
 coord_flip() +
 labs(x = "items",  y = "skewness") +
  theme_bw()

從此圖可看出,A1有較大的偏態值,則受試者普遍在A1得到低分。而O4則有較低的偏態值,則受試者普遍在O4得到高分。(O4的平均數也是全部題目中最高的)

ggplot(data = subset(dtal_desc, moments == "kurtosis"),
 aes(x = reorder(items, value, max), y = value, group = moments)) +
 geom_point(size = 3) +
 geom_hline(yintercept = mean(t(dta_desc["kurtosis",])) +
 c(-1.5, 0, 1.5) * sd(t(dta_desc["kurtosis", ])), linetype = "dashed") +
 coord_flip() +
 labs(x = "items",  y = "kurtosis") +
  theme_bw()

從此圖可看出,O4有較大的峰態值,則受試者的分數較為聚集在平均數。 # Discrimination index 計算區變度,以總分為主,選取低分組與高分組,比較各題在兩組上的差異。

dta$tot <- apply(dta, 1, sum)
dta$grp <- NA
dta$grp[rank(dta$tot) < 301*.27] <- "L"
dta$grp[rank(dta$tot) > 301*.73] <- "H"
dta$grp <- factor(dta$grp)
head(dta)
##       A1 A2 A3 A4 A5 C1 C2 C3 C4 C5 E1 E2 E3 E4 E5 N1 N2 N3 N4 N5 O1 O2 O3 O4
## 61617  2  4  3  4  4  2  3  3  4  4  3  3  3  4  4  3  4  2  2  3  3  6  3  4
## 61618  2  4  5  2  5  5  4  4  3  4  1  1  6  4  3  3  3  3  5  5  4  2  4  3
## 61620  5  4  5  4  4  4  5  4  2  5  2  4  4  4  5  4  5  4  2  3  4  2  5  5
## 61621  4  4  6  5  5  4  4  3  5  5  5  3  4  4  4  2  5  2  4  1  3  3  4  3
## 61622  2  3  3  4  5  4  4  5  3  2  2  2  5  4  5  2  3  4  4  3  3  3  4  3
## 61623  6  6  5  6  5  6  6  6  1  3  2  1  6  5  6  3  5  2  2  3  4  3  5  6
##       O5 tot grp
## 61617  3  83   H
## 61618  3  88   H
## 61620  2  97   H
## 61621  5  97   H
## 61622  3  85   H
## 61623  1 104   H

算高低分組平均數

dtam <- aggregate(dta[, 1:25], by=list(dta$grp), mean)
print(dtam)
##   Group.1       A1       A2       A3       A4      A5       C1       C2
## 1       H       NA       NA       NA       NA      NA       NA       NA
## 2       L 2.337662 3.428571 2.805195 3.064935 3.25974 3.415584 3.272727
##         C3       C4       C5       E1       E2       E3 E4      E5       N1
## 1       NA       NA       NA       NA       NA       NA NA      NA       NA
## 2 3.337662 2.441558 2.974026 3.324675 3.467532 2.688312  3 3.12987 1.792208
##         N2       N3       N4       N5       O1       O2       O3       O4
## 1       NA       NA       NA       NA       NA 2.759352       NA       NA
## 2 2.220779 1.766234 2.207792 1.714286 3.935065 2.077922 3.467532 4.337662
##         O5
## 1       NA
## 2 2.064935

將第一欄刪除

dtam <- t(dtam[, -1])

t-test

item_t <- sapply(dta[, 1:25], function(x) t.test(x ~ dta$grp)$statistic)
print(item_t)
##       A1.t       A2.t       A3.t       A4.t       A5.t       C1.t       C2.t 
##  0.4683277  8.3866919  9.9976555  8.7697174  7.6360472  5.9432310  6.4161713 
##       C3.t       C4.t       C5.t       E1.t       E2.t       E3.t       E4.t 
##  5.8664556  0.8344815  1.8032145 -1.7610877 -1.5075345  8.5529826  7.9298711 
##       E5.t       N1.t       N2.t       N3.t       N4.t       N5.t       O1.t 
##  7.6306539  8.9229466  9.3995072 12.1737530  6.2085730 10.8728143  4.9588954 
##       O2.t       O3.t       O4.t       O5.t 
##  4.2974027  5.5307518  3.6653979  3.1349902
# assigning the results to rslt2
rslt2 <- data.frame(Item = rownames(dtam), low.mean.score = dtam[, 2], 
                    high.mean.score = dtam[, 1], mean.dif = dtam[, 1]-dtam[,2], 
                    t.value = item_t)

rslt2 |> knitr::kable()
Item low.mean.score high.mean.score mean.dif t.value
A1 A1 2.337662 NA NA 0.4683277
A2 A2 3.428571 NA NA 8.3866919
A3 A3 2.805195 NA NA 9.9976555
A4 A4 3.064935 NA NA 8.7697174
A5 A5 3.259740 NA NA 7.6360472
C1 C1 3.415584 NA NA 5.9432310
C2 C2 3.272727 NA NA 6.4161713
C3 C3 3.337662 NA NA 5.8664556
C4 C4 2.441558 NA NA 0.8344815
C5 C5 2.974026 NA NA 1.8032145
E1 E1 3.324675 NA NA -1.7610877
E2 E2 3.467532 NA NA -1.5075345
E3 E3 2.688312 NA NA 8.5529826
E4 E4 3.000000 NA NA 7.9298711
E5 E5 3.129870 NA NA 7.6306539
N1 N1 1.792208 NA NA 8.9229466
N2 N2 2.220779 NA NA 9.3995072
N3 N3 1.766234 NA NA 12.1737530
N4 N4 2.207792 NA NA 6.2085730
N5 N5 1.714286 NA NA 10.8728143
O1 O1 3.935065 NA NA 4.9588954
O2 O2 2.077922 2.759352 0.68143 4.2974027
O3 O3 3.467532 NA NA 5.5307518
O4 O4 4.337662 NA NA 3.6653979
O5 O5 2.064935 NA NA 3.1349902
ggplot(data = rslt2, aes(x = reorder(Item, t.value, max), y = t.value)) +
 geom_point() +
 geom_hline(yintercept = 2, linetype = "dashed") +
 coord_flip() +
 labs(x = "Items", y = "t-value") +
 theme_bw()