library(psych)
library(moments)
library(lavaan)
library(reshape2)
library(tidyverse)
data("bfi")
dta<-(bfi)
bfi[, c(1, 9, 10, 11, 12, 22)]=7-bfi[, c(1, 9, 10, 11, 12, 22)]
dta<-dta[ , 1:25]
#將NA去除(遺漏值)
dtna<-na.omit(dta$grp)
summary(dta)
## A1 A2 A3 A4 A5
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.0 Min. :1.00
## 1st Qu.:1.000 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:4.0 1st Qu.:4.00
## Median :2.000 Median :5.000 Median :5.000 Median :5.0 Median :5.00
## Mean :2.413 Mean :4.802 Mean :4.604 Mean :4.7 Mean :4.56
## 3rd Qu.:3.000 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.0 3rd Qu.:5.00
## Max. :6.000 Max. :6.000 Max. :6.000 Max. :6.0 Max. :6.00
## NA's :16 NA's :27 NA's :26 NA's :19 NA's :16
## C1 C2 C3 C4 C5
## Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:4.00 1st Qu.:4.000 1st Qu.:1.000 1st Qu.:2.000
## Median :5.000 Median :5.00 Median :5.000 Median :2.000 Median :3.000
## Mean :4.502 Mean :4.37 Mean :4.304 Mean :2.553 Mean :3.297
## 3rd Qu.:5.000 3rd Qu.:5.00 3rd Qu.:5.000 3rd Qu.:4.000 3rd Qu.:5.000
## Max. :6.000 Max. :6.00 Max. :6.000 Max. :6.000 Max. :6.000
## NA's :21 NA's :24 NA's :20 NA's :26 NA's :16
## E1 E2 E3 E4
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:3.000 1st Qu.:4.000
## Median :3.000 Median :3.000 Median :4.000 Median :5.000
## Mean :2.974 Mean :3.142 Mean :4.001 Mean :4.422
## 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:5.000 3rd Qu.:6.000
## Max. :6.000 Max. :6.000 Max. :6.000 Max. :6.000
## NA's :23 NA's :16 NA's :25 NA's :9
## E5 N1 N2 N3
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000
## Median :5.000 Median :3.000 Median :4.000 Median :3.000
## Mean :4.416 Mean :2.929 Mean :3.508 Mean :3.217
## 3rd Qu.:5.000 3rd Qu.:4.000 3rd Qu.:5.000 3rd Qu.:4.000
## Max. :6.000 Max. :6.000 Max. :6.000 Max. :6.000
## NA's :21 NA's :22 NA's :21 NA's :11
## N4 N5 O1 O2 O3
## Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:2.00 1st Qu.:4.000 1st Qu.:1.000 1st Qu.:4.000
## Median :3.000 Median :3.00 Median :5.000 Median :2.000 Median :5.000
## Mean :3.186 Mean :2.97 Mean :4.816 Mean :2.713 Mean :4.438
## 3rd Qu.:4.000 3rd Qu.:4.00 3rd Qu.:6.000 3rd Qu.:4.000 3rd Qu.:5.000
## Max. :6.000 Max. :6.00 Max. :6.000 Max. :6.000 Max. :6.000
## NA's :36 NA's :29 NA's :22 NA's :28
## O4 O5
## Min. :1.000 Min. :1.00
## 1st Qu.:4.000 1st Qu.:1.00
## Median :5.000 Median :2.00
## Mean :4.892 Mean :2.49
## 3rd Qu.:6.000 3rd Qu.:3.00
## Max. :6.000 Max. :6.00
## NA's :14 NA's :20
my_summary <- function(x) {
require(moments)
funs <- c(mean, sd, skewness, kurtosis)
sapply(funs, function(f) f(x, na.rm = T))
}
dta_desc <- apply(dta, 2,my_summary)
head(dta_desc)
## A1 A2 A3 A4 A5 C1 C2
## [1,] 2.4134339 4.802380 4.603821 4.699748 4.560345 4.5023390 4.3699568
## [2,] 1.4077372 1.172020 1.301834 1.479633 1.258512 1.2413465 1.3183465
## [3,] 0.8254883 -1.124894 -0.998997 -1.031499 -0.847690 -0.8551631 -0.7422207
## [4,] 2.6942957 4.057765 3.444524 3.042640 3.161176 3.3068088 2.8656243
## C3 C4 C5 E1 E2 E3 E4
## [1,] 4.3039568 2.5533526 3.29669540 2.9744328 3.1418822 4.0007207 4.4224292
## [2,] 1.2885518 1.3751181 1.62854187 1.6315055 1.6052103 1.3527188 1.4575174
## [3,] -0.6918287 0.5964955 0.06620282 0.3736569 0.2209396 -0.4706335 -0.8241831
## [4,] 2.8697332 2.3802970 1.78461246 1.9090390 1.8526925 2.5367154 2.6977079
## E5 N1 N2 N3 N4 N5 O1
## [1,] 4.416337 2.9290857 3.50773660 3.2165651 3.1856006 2.9696860 4.8160547
## [2,] 1.334768 1.5709175 1.52594359 1.6029021 1.5696851 1.6186474 1.1295303
## [3,] -0.777486 0.3714298 -0.07698521 0.1506797 0.1969966 0.3744599 -0.8973669
## [4,] 2.908401 1.9885722 1.95035250 1.8227046 1.9090309 1.9401121 3.4277033
## O2 O3 O4 O5
## [1,] 2.713214 4.4383117 4.892319 2.4895683
## [2,] 1.565152 1.2209011 1.221250 1.3279590
## [3,] 0.585679 -0.7730516 -1.218247 0.7384818
## [4,] 2.188889 3.3043641 4.082686 2.7630094
rownames(dta_desc) <- c("mean", "sd", "skewness", "kurtosis")
rslt1 <- as.data.frame(t(dta_desc))
rslt1 |> knitr::kable()
| mean | sd | skewness | kurtosis | |
|---|---|---|---|---|
| A1 | 2.413434 | 1.407737 | 0.8254883 | 2.694296 |
| A2 | 4.802380 | 1.172020 | -1.1248938 | 4.057765 |
| A3 | 4.603821 | 1.301834 | -0.9989970 | 3.444524 |
| A4 | 4.699748 | 1.479633 | -1.0314991 | 3.042640 |
| A5 | 4.560345 | 1.258512 | -0.8476900 | 3.161176 |
| C1 | 4.502339 | 1.241346 | -0.8551631 | 3.306809 |
| C2 | 4.369957 | 1.318347 | -0.7422207 | 2.865624 |
| C3 | 4.303957 | 1.288552 | -0.6918287 | 2.869733 |
| C4 | 2.553353 | 1.375118 | 0.5964955 | 2.380297 |
| C5 | 3.296695 | 1.628542 | 0.0662028 | 1.784612 |
| E1 | 2.974433 | 1.631506 | 0.3736569 | 1.909039 |
| E2 | 3.141882 | 1.605210 | 0.2209396 | 1.852693 |
| E3 | 4.000721 | 1.352719 | -0.4706335 | 2.536715 |
| E4 | 4.422429 | 1.457517 | -0.8241831 | 2.697708 |
| E5 | 4.416337 | 1.334768 | -0.7774860 | 2.908401 |
| N1 | 2.929086 | 1.570917 | 0.3714298 | 1.988572 |
| N2 | 3.507737 | 1.525944 | -0.0769852 | 1.950352 |
| N3 | 3.216565 | 1.602902 | 0.1506797 | 1.822705 |
| N4 | 3.185601 | 1.569685 | 0.1969966 | 1.909031 |
| N5 | 2.969686 | 1.618647 | 0.3744599 | 1.940112 |
| O1 | 4.816055 | 1.129530 | -0.8973669 | 3.427703 |
| O2 | 2.713214 | 1.565152 | 0.5856790 | 2.188889 |
| O3 | 4.438312 | 1.220901 | -0.7730516 | 3.304364 |
| O4 | 4.892319 | 1.221250 | -1.2182471 | 4.082686 |
| O5 | 2.489568 | 1.327959 | 0.7384818 | 2.763009 |
dtal_desc <- melt(dta_desc)
names(dtal_desc)[1:2] <- c("moments", "items")
head(dtal_desc)
## moments items value
## 1 mean A1 2.4134339
## 2 sd A1 1.4077372
## 3 skewness A1 0.8254883
## 4 kurtosis A1 2.6942957
## 5 mean A2 4.8023801
## 6 sd A2 1.1720199
ggplot(data = subset(dtal_desc, moments == "mean"),
aes(x = reorder(items, value, max), y = value, group = moments)) +
geom_point(size = 3) +
geom_hline(yintercept = mean(t(dta_desc["mean",])) +
c(-1.5, 0, 1.5) * sd(t(dta_desc["mean", ])), linetype = "dashed") +
coord_flip() +
labs(x = "items", y = "mean") +
theme_bw()
從此圖可看出,O4有較大的平均數,則受試者在此題的得分普遍較高。
ggplot(data = subset(dtal_desc, moments == "sd"),
aes(x = reorder(items, value, max), y = value, group = moments)) +
geom_point(size = 3) +
geom_hline(yintercept = mean(t(dta_desc["sd",])) +
c(-1.5, 0, 1.5) * sd(t(dta_desc["sd", ])), linetype = "dashed") +
coord_flip() +
labs(x = "items", y = "sd") +
theme_bw()
從此圖可看出,E1有較大的標準差,則其受試者的分數變異較大。
ggplot(data = subset(dtal_desc, moments == "skewness"),
aes(x = reorder(items, value, max), y = value, group = moments)) +
geom_point(size = 3) +
geom_hline(yintercept = mean(t(dta_desc["skewness",])) +
c(-1.5, 0, 1.5) * sd(t(dta_desc["skewness", ])), linetype = "dashed") +
coord_flip() +
labs(x = "items", y = "skewness") +
theme_bw()
從此圖可看出,A1有較大的偏態值,則受試者普遍在A1得到低分。而O4則有較低的偏態值,則受試者普遍在O4得到高分。(O4的平均數也是全部題目中最高的)
ggplot(data = subset(dtal_desc, moments == "kurtosis"),
aes(x = reorder(items, value, max), y = value, group = moments)) +
geom_point(size = 3) +
geom_hline(yintercept = mean(t(dta_desc["kurtosis",])) +
c(-1.5, 0, 1.5) * sd(t(dta_desc["kurtosis", ])), linetype = "dashed") +
coord_flip() +
labs(x = "items", y = "kurtosis") +
theme_bw()
從此圖可看出,O4有較大的峰態值,則受試者的分數較為聚集在平均數。 #
Discrimination index
計算區變度,以總分為主,選取低分組與高分組,比較各題在兩組上的差異。
dta$tot <- apply(dta, 1, sum)
dta$grp <- NA
dta$grp[rank(dta$tot) < 301*.27] <- "L"
dta$grp[rank(dta$tot) > 301*.73] <- "H"
dta$grp <- factor(dta$grp)
head(dta)
## A1 A2 A3 A4 A5 C1 C2 C3 C4 C5 E1 E2 E3 E4 E5 N1 N2 N3 N4 N5 O1 O2 O3 O4
## 61617 2 4 3 4 4 2 3 3 4 4 3 3 3 4 4 3 4 2 2 3 3 6 3 4
## 61618 2 4 5 2 5 5 4 4 3 4 1 1 6 4 3 3 3 3 5 5 4 2 4 3
## 61620 5 4 5 4 4 4 5 4 2 5 2 4 4 4 5 4 5 4 2 3 4 2 5 5
## 61621 4 4 6 5 5 4 4 3 5 5 5 3 4 4 4 2 5 2 4 1 3 3 4 3
## 61622 2 3 3 4 5 4 4 5 3 2 2 2 5 4 5 2 3 4 4 3 3 3 4 3
## 61623 6 6 5 6 5 6 6 6 1 3 2 1 6 5 6 3 5 2 2 3 4 3 5 6
## O5 tot grp
## 61617 3 83 H
## 61618 3 88 H
## 61620 2 97 H
## 61621 5 97 H
## 61622 3 85 H
## 61623 1 104 H
算高低分組平均數
dtam <- aggregate(dta[, 1:25], by=list(dta$grp), mean)
print(dtam)
## Group.1 A1 A2 A3 A4 A5 C1 C2
## 1 H NA NA NA NA NA NA NA
## 2 L 2.337662 3.428571 2.805195 3.064935 3.25974 3.415584 3.272727
## C3 C4 C5 E1 E2 E3 E4 E5 N1
## 1 NA NA NA NA NA NA NA NA NA
## 2 3.337662 2.441558 2.974026 3.324675 3.467532 2.688312 3 3.12987 1.792208
## N2 N3 N4 N5 O1 O2 O3 O4
## 1 NA NA NA NA NA 2.759352 NA NA
## 2 2.220779 1.766234 2.207792 1.714286 3.935065 2.077922 3.467532 4.337662
## O5
## 1 NA
## 2 2.064935
將第一欄刪除
dtam <- t(dtam[, -1])
item_t <- sapply(dta[, 1:25], function(x) t.test(x ~ dta$grp)$statistic)
print(item_t)
## A1.t A2.t A3.t A4.t A5.t C1.t C2.t
## 0.4683277 8.3866919 9.9976555 8.7697174 7.6360472 5.9432310 6.4161713
## C3.t C4.t C5.t E1.t E2.t E3.t E4.t
## 5.8664556 0.8344815 1.8032145 -1.7610877 -1.5075345 8.5529826 7.9298711
## E5.t N1.t N2.t N3.t N4.t N5.t O1.t
## 7.6306539 8.9229466 9.3995072 12.1737530 6.2085730 10.8728143 4.9588954
## O2.t O3.t O4.t O5.t
## 4.2974027 5.5307518 3.6653979 3.1349902
# assigning the results to rslt2
rslt2 <- data.frame(Item = rownames(dtam), low.mean.score = dtam[, 2],
high.mean.score = dtam[, 1], mean.dif = dtam[, 1]-dtam[,2],
t.value = item_t)
rslt2 |> knitr::kable()
| Item | low.mean.score | high.mean.score | mean.dif | t.value | |
|---|---|---|---|---|---|
| A1 | A1 | 2.337662 | NA | NA | 0.4683277 |
| A2 | A2 | 3.428571 | NA | NA | 8.3866919 |
| A3 | A3 | 2.805195 | NA | NA | 9.9976555 |
| A4 | A4 | 3.064935 | NA | NA | 8.7697174 |
| A5 | A5 | 3.259740 | NA | NA | 7.6360472 |
| C1 | C1 | 3.415584 | NA | NA | 5.9432310 |
| C2 | C2 | 3.272727 | NA | NA | 6.4161713 |
| C3 | C3 | 3.337662 | NA | NA | 5.8664556 |
| C4 | C4 | 2.441558 | NA | NA | 0.8344815 |
| C5 | C5 | 2.974026 | NA | NA | 1.8032145 |
| E1 | E1 | 3.324675 | NA | NA | -1.7610877 |
| E2 | E2 | 3.467532 | NA | NA | -1.5075345 |
| E3 | E3 | 2.688312 | NA | NA | 8.5529826 |
| E4 | E4 | 3.000000 | NA | NA | 7.9298711 |
| E5 | E5 | 3.129870 | NA | NA | 7.6306539 |
| N1 | N1 | 1.792208 | NA | NA | 8.9229466 |
| N2 | N2 | 2.220779 | NA | NA | 9.3995072 |
| N3 | N3 | 1.766234 | NA | NA | 12.1737530 |
| N4 | N4 | 2.207792 | NA | NA | 6.2085730 |
| N5 | N5 | 1.714286 | NA | NA | 10.8728143 |
| O1 | O1 | 3.935065 | NA | NA | 4.9588954 |
| O2 | O2 | 2.077922 | 2.759352 | 0.68143 | 4.2974027 |
| O3 | O3 | 3.467532 | NA | NA | 5.5307518 |
| O4 | O4 | 4.337662 | NA | NA | 3.6653979 |
| O5 | O5 | 2.064935 | NA | NA | 3.1349902 |
ggplot(data = rslt2, aes(x = reorder(Item, t.value, max), y = t.value)) +
geom_point() +
geom_hline(yintercept = 2, linetype = "dashed") +
coord_flip() +
labs(x = "Items", y = "t-value") +
theme_bw()