library(psych)
library(moments)
library(lavaan)
## This is lavaan 0.6-11
## lavaan is FREE software! Please report any bugs.
##
## 載入套件:'lavaan'
## 下列物件被遮斷自 'package:psych':
##
## cor2cov
library(reshape2)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.9
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x ggplot2::%+%() masks psych::%+%()
## x ggplot2::alpha() masks psych::alpha()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
data(bfi)
str(bfi)
## 'data.frame': 2800 obs. of 28 variables:
## $ A1 : int 2 2 5 4 2 6 2 4 4 2 ...
## $ A2 : int 4 4 4 4 3 6 5 3 3 5 ...
## $ A3 : int 3 5 5 6 3 5 5 1 6 6 ...
## $ A4 : int 4 2 4 5 4 6 3 5 3 6 ...
## $ A5 : int 4 5 4 5 5 5 5 1 3 5 ...
## $ C1 : int 2 5 4 4 4 6 5 3 6 6 ...
## $ C2 : int 3 4 5 4 4 6 4 2 6 5 ...
## $ C3 : int 3 4 4 3 5 6 4 4 3 6 ...
## $ C4 : int 4 3 2 5 3 1 2 2 4 2 ...
## $ C5 : int 4 4 5 5 2 3 3 4 5 1 ...
## $ E1 : int 3 1 2 5 2 2 4 3 5 2 ...
## $ E2 : int 3 1 4 3 2 1 3 6 3 2 ...
## $ E3 : int 3 6 4 4 5 6 4 4 NA 4 ...
## $ E4 : int 4 4 4 4 4 5 5 2 4 5 ...
## $ E5 : int 4 3 5 4 5 6 5 1 3 5 ...
## $ N1 : int 3 3 4 2 2 3 1 6 5 5 ...
## $ N2 : int 4 3 5 5 3 5 2 3 5 5 ...
## $ N3 : int 2 3 4 2 4 2 2 2 2 5 ...
## $ N4 : int 2 5 2 4 4 2 1 6 3 2 ...
## $ N5 : int 3 5 3 1 3 3 1 4 3 4 ...
## $ O1 : int 3 4 4 3 3 4 5 3 6 5 ...
## $ O2 : int 6 2 2 3 3 3 2 2 6 1 ...
## $ O3 : int 3 4 5 4 4 5 5 4 6 5 ...
## $ O4 : int 4 3 5 3 3 6 6 5 6 5 ...
## $ O5 : int 3 3 2 5 3 1 1 3 1 2 ...
## $ gender : int 1 2 2 2 1 2 1 1 1 2 ...
## $ education: int NA NA NA NA NA 3 NA 2 1 NA ...
## $ age : int 16 18 17 17 17 21 18 19 19 17 ...
bfi[, c(1,9,10,11,12,22)]<- 7-bfi[, c(1,9,10,11,12,22)]
bfi <- bfi[ , 1:25]
head(bfi)
## A1 A2 A3 A4 A5 C1 C2 C3 C4 C5 E1 E2 E3 E4 E5 N1 N2 N3 N4 N5 O1 O2 O3 O4
## 61617 5 4 3 4 4 2 3 3 3 3 4 4 3 4 4 3 4 2 2 3 3 1 3 4
## 61618 5 4 5 2 5 5 4 4 4 3 6 6 6 4 3 3 3 3 5 5 4 5 4 3
## 61620 2 4 5 4 4 4 5 4 5 2 5 3 4 4 5 4 5 4 2 3 4 5 5 5
## 61621 3 4 6 5 5 4 4 3 2 2 2 4 4 4 4 2 5 2 4 1 3 4 4 3
## 61622 5 3 3 4 5 4 4 5 4 5 5 5 5 4 5 2 3 4 4 3 3 4 4 3
## 61623 1 6 5 6 5 6 6 6 6 4 5 6 6 5 6 3 5 2 2 3 4 4 5 6
## O5
## 61617 3
## 61618 3
## 61620 2
## 61621 5
## 61622 3
## 61623 1
my_summary <- function(x) {
require(moments)
funs <- c(mean, sd, skewness, kurtosis)
sapply(funs, function(f) f(x, na.rm = T))
}
bfi_desc <- apply(bfi, 2, my_summary)
head(bfi_desc)
## A1 A2 A3 A4 A5 C1 C2
## [1,] 4.5865661 4.802380 4.603821 4.699748 4.560345 4.5023390 4.3699568
## [2,] 1.4077372 1.172020 1.301834 1.479633 1.258512 1.2413465 1.3183465
## [3,] -0.8254883 -1.124894 -0.998997 -1.031499 -0.847690 -0.8551631 -0.7422207
## [4,] 2.6942957 4.057765 3.444524 3.042640 3.161176 3.3068088 2.8656243
## C3 C4 C5 E1 E2 E3
## [1,] 4.3039568 4.4466474 3.70330460 4.0255672 3.8581178 4.0007207
## [2,] 1.2885518 1.3751181 1.62854187 1.6315055 1.6052103 1.3527188
## [3,] -0.6918287 -0.5964955 -0.06620282 -0.3736569 -0.2209396 -0.4706335
## [4,] 2.8697332 2.3802970 1.78461246 1.9090390 1.8526925 2.5367154
## E4 E5 N1 N2 N3 N4 N5
## [1,] 4.4224292 4.416337 2.9290857 3.50773660 3.2165651 3.1856006 2.9696860
## [2,] 1.4575174 1.334768 1.5709175 1.52594359 1.6029021 1.5696851 1.6186474
## [3,] -0.8241831 -0.777486 0.3714298 -0.07698521 0.1506797 0.1969966 0.3744599
## [4,] 2.6977079 2.908401 1.9885722 1.95035250 1.8227046 1.9090309 1.9401121
## O1 O2 O3 O4 O5
## [1,] 4.8160547 4.286786 4.4383117 4.892319 2.4895683
## [2,] 1.1295303 1.565152 1.2209011 1.221250 1.3279590
## [3,] -0.8973669 -0.585679 -0.7730516 -1.218247 0.7384818
## [4,] 3.4277033 2.188889 3.3043641 4.082686 2.7630094
rownames(bfi_desc) <- c("mean", "sd", "skewness", "kurtosis")
rslt1 <- as.data.frame(t(bfi_desc))
rslt1 |> knitr::kable()
| mean | sd | skewness | kurtosis | |
|---|---|---|---|---|
| A1 | 4.586566 | 1.407737 | -0.8254883 | 2.694296 |
| A2 | 4.802380 | 1.172020 | -1.1248938 | 4.057765 |
| A3 | 4.603821 | 1.301834 | -0.9989970 | 3.444524 |
| A4 | 4.699748 | 1.479633 | -1.0314991 | 3.042640 |
| A5 | 4.560345 | 1.258512 | -0.8476900 | 3.161176 |
| C1 | 4.502339 | 1.241346 | -0.8551631 | 3.306809 |
| C2 | 4.369957 | 1.318347 | -0.7422207 | 2.865624 |
| C3 | 4.303957 | 1.288552 | -0.6918287 | 2.869733 |
| C4 | 4.446647 | 1.375118 | -0.5964955 | 2.380297 |
| C5 | 3.703305 | 1.628542 | -0.0662028 | 1.784612 |
| E1 | 4.025567 | 1.631506 | -0.3736569 | 1.909039 |
| E2 | 3.858118 | 1.605210 | -0.2209396 | 1.852693 |
| E3 | 4.000721 | 1.352719 | -0.4706335 | 2.536715 |
| E4 | 4.422429 | 1.457517 | -0.8241831 | 2.697708 |
| E5 | 4.416337 | 1.334768 | -0.7774860 | 2.908401 |
| N1 | 2.929086 | 1.570917 | 0.3714298 | 1.988572 |
| N2 | 3.507737 | 1.525944 | -0.0769852 | 1.950352 |
| N3 | 3.216565 | 1.602902 | 0.1506797 | 1.822705 |
| N4 | 3.185601 | 1.569685 | 0.1969966 | 1.909031 |
| N5 | 2.969686 | 1.618647 | 0.3744599 | 1.940112 |
| O1 | 4.816055 | 1.129530 | -0.8973669 | 3.427703 |
| O2 | 4.286786 | 1.565152 | -0.5856790 | 2.188889 |
| O3 | 4.438312 | 1.220901 | -0.7730516 | 3.304364 |
| O4 | 4.892319 | 1.221250 | -1.2182471 | 4.082686 |
| O5 | 2.489568 | 1.327959 | 0.7384818 | 2.763009 |
bfil_desc <- melt(bfi_desc)
names(bfil_desc)[1:2] <- c("moments", "items")
head(bfil_desc)
## moments items value
## 1 mean A1 4.5865661
## 2 sd A1 1.4077372
## 3 skewness A1 -0.8254883
## 4 kurtosis A1 2.6942957
## 5 mean A2 4.8023801
## 6 sd A2 1.1720199
ggplot(data = subset(bfil_desc, moments == "mean"),
aes(x = reorder(items, value, max), y = value, group = moments)) +
geom_point(size = 3) +
geom_hline(yintercept = mean(t(bfi_desc["mean",])) +
c(-1.5, 0, 1.5) * sd(t(bfi_desc["mean", ])), linetype = "dashed") +
coord_flip() +
labs(x = "items", y = "mean") +
theme_bw()
ggplot(data = subset(bfil_desc, moments == "sd"),
aes(x = reorder(items, value, max), y = value, group = moments)) +
geom_point(size = 3) +
geom_hline(yintercept = mean(t(bfi_desc["sd",])) +
c(-1.5, 0, 1.5) * sd(t(bfi_desc["sd", ])), linetype = "dashed") +
coord_flip() +
labs(x = "items", y = "sd") +
theme_bw()
ggplot(data = subset(bfil_desc, moments == "skewness"),
aes(x = reorder(items, value, max), y = value, group = moments)) +
geom_point(size = 3) +
geom_hline(yintercept = mean(t(bfi_desc["skewness",])) +
c(-1.5, 0, 1.5) * sd(t(bfi_desc["skewness", ])), linetype = "dashed") +
coord_flip() +
labs(x = "items", y = "skewness") +
theme_bw()
ggplot(data = subset(bfil_desc, moments == "kurtosis"),
aes(x = reorder(items, value, max), y = value, group = moments)) +
geom_point(size = 3) +
geom_hline(yintercept = mean(t(bfi_desc["kurtosis",])) +
c(-1.5, 0, 1.5) * sd(t(bfi_desc["kurtosis", ])), linetype = "dashed") +
coord_flip() +
labs(x = "items", y = "kurtosis") +
theme_bw()
#先去掉原資料的遺漏值(NA)
bfi = na.omit(bfi)
str(bfi)
## 'data.frame': 2436 obs. of 25 variables:
## $ A1: num 5 5 2 3 5 1 5 3 5 3 ...
## $ A2: int 4 4 4 4 3 6 5 3 5 4 ...
## $ A3: int 3 5 5 6 3 5 5 1 6 5 ...
## $ A4: int 4 2 4 5 4 6 3 5 6 6 ...
## $ A5: int 4 5 4 5 5 5 5 1 5 5 ...
## $ C1: int 2 5 4 4 4 6 5 3 6 4 ...
## $ C2: int 3 4 5 4 4 6 4 2 5 3 ...
## $ C3: int 3 4 4 3 5 6 4 4 6 5 ...
## $ C4: num 3 4 5 2 4 6 5 5 5 4 ...
## $ C5: num 3 3 2 2 5 4 4 3 6 5 ...
## $ E1: num 4 6 5 2 5 5 3 4 5 6 ...
## $ E2: num 4 6 3 4 5 6 4 1 5 4 ...
## $ E3: int 3 6 4 4 5 6 4 4 4 2 ...
## $ E4: int 4 4 4 4 4 5 5 2 5 5 ...
## $ E5: int 4 3 5 4 5 6 5 1 5 4 ...
## $ N1: int 3 3 4 2 2 3 1 6 5 3 ...
## $ N2: int 4 3 5 5 3 5 2 3 5 3 ...
## $ N3: int 2 3 4 2 4 2 2 2 5 4 ...
## $ N4: int 2 5 2 4 4 2 1 6 2 2 ...
## $ N5: int 3 5 3 1 3 3 1 4 4 3 ...
## $ O1: int 3 4 4 3 3 4 5 3 5 5 ...
## $ O2: num 1 5 5 4 4 4 5 5 6 4 ...
## $ O3: int 3 4 5 4 4 5 5 4 5 5 ...
## $ O4: int 4 3 5 3 3 6 6 5 5 6 ...
## $ O5: int 3 3 2 5 3 1 1 3 2 3 ...
## - attr(*, "na.action")= 'omit' Named int [1:364] 9 12 35 42 63 66 72 90 101 107 ...
## ..- attr(*, "names")= chr [1:364] "61630" "61636" "61684" "61693" ...
#去除遺漏值後,資料剩下2436筆
bfi$tot <- apply(bfi, 1, sum)
bfi$grp <- NA
bfi$grp[rank(bfi$tot) < 2436*.3] <- "L"
bfi$grp[rank(bfi$tot) > 2436*.7] <- "H"
bfi$grp <- factor(bfi$grp)
head(bfi)
## A1 A2 A3 A4 A5 C1 C2 C3 C4 C5 E1 E2 E3 E4 E5 N1 N2 N3 N4 N5 O1 O2 O3 O4
## 61617 5 4 3 4 4 2 3 3 3 3 4 4 3 4 4 3 4 2 2 3 3 1 3 4
## 61618 5 4 5 2 5 5 4 4 4 3 6 6 6 4 3 3 3 3 5 5 4 5 4 3
## 61620 2 4 5 4 4 4 5 4 5 2 5 3 4 4 5 4 5 4 2 3 4 5 5 5
## 61621 3 4 6 5 5 4 4 3 2 2 2 4 4 4 4 2 5 2 4 1 3 4 4 3
## 61622 5 3 3 4 5 4 4 5 4 5 5 5 5 4 5 2 3 4 4 3 3 4 4 3
## 61623 1 6 5 6 5 6 6 6 6 4 5 6 6 5 6 3 5 2 2 3 4 4 5 6
## O5 tot grp
## 61617 3 81 L
## 61618 3 104 <NA>
## 61620 2 99 <NA>
## 61621 5 89 L
## 61622 3 99 <NA>
## 61623 1 114 H
bfim <- aggregate(bfi[, 1:25], by=list(bfi$grp), mean)
print(bfim)
## Group.1 A1 A2 A3 A4 A5 C1 C2
## 1 H 4.977483 5.417219 5.329801 5.307285 5.202649 5.092715 5.078146
## 2 L 4.206989 4.091398 3.750000 3.951613 3.845430 3.944892 3.672043
## C3 C4 C5 E1 E2 E3 E4 E5
## 1 4.859603 4.984106 4.313907 4.699338 4.574834 4.814570 5.115232 5.18543
## 2 3.727151 3.864247 3.029570 3.115591 2.963710 3.119624 3.475806 3.49328
## N1 N2 N3 N4 N5 O1 O2 O3
## 1 3.344371 3.927152 3.649007 3.282119 3.319205 5.278146 4.659603 5.062252
## 2 2.610215 3.223118 2.814516 3.197581 2.690860 4.354839 4.016129 3.838710
## O4 O5
## 1 5.256954 2.33245
## 2 4.684140 2.56586
bfim <- t(bfim[, -1])
item_t <- sapply(bfi[, 1:25], function(x) t.test(x ~ bfi$grp)$statistic)
print(item_t)
## A1.t A2.t A3.t A4.t A5.t C1.t C2.t C3.t
## 10.815628 23.821714 26.464655 18.801098 22.747181 18.698602 22.526249 17.767133
## C4.t C5.t E1.t E2.t E3.t E4.t E5.t N1.t
## 16.402713 16.029028 20.396905 21.062974 28.139909 23.411651 27.611165 9.155108
## N2.t N3.t N4.t N5.t O1.t O2.t O3.t O4.t
## 9.066525 10.300246 1.031340 7.411300 16.415663 7.949024 20.746077 9.439044
## O5.t
## -3.402291
rslt2 <- data.frame(Item = rownames(bfim), low.mean.score = bfim[, 2],
high.mean.score = bfim[, 1], mean.dif = bfim[, 1]-bfim[,2],
t.value = item_t)
rslt2 |> knitr::kable()
| Item | low.mean.score | high.mean.score | mean.dif | t.value | |
|---|---|---|---|---|---|
| A1 | A1 | 4.206989 | 4.977483 | 0.7704942 | 10.815628 |
| A2 | A2 | 4.091398 | 5.417218 | 1.3258207 | 23.821714 |
| A3 | A3 | 3.750000 | 5.329801 | 1.5798013 | 26.464655 |
| A4 | A4 | 3.951613 | 5.307285 | 1.3556719 | 18.801098 |
| A5 | A5 | 3.845430 | 5.202649 | 1.3572189 | 22.747181 |
| C1 | C1 | 3.944892 | 5.092715 | 1.1478228 | 18.698602 |
| C2 | C2 | 3.672043 | 5.078146 | 1.4061027 | 22.526249 |
| C3 | C3 | 3.727151 | 4.859603 | 1.1324521 | 17.767133 |
| C4 | C4 | 3.864247 | 4.984106 | 1.1198586 | 16.402713 |
| C5 | C5 | 3.029570 | 4.313907 | 1.2843374 | 16.029028 |
| E1 | E1 | 3.115591 | 4.699338 | 1.5837464 | 20.396905 |
| E2 | E2 | 2.963710 | 4.574834 | 1.6111248 | 21.062974 |
| E3 | E3 | 3.119624 | 4.814570 | 1.6949459 | 28.139909 |
| E4 | E4 | 3.475807 | 5.115232 | 1.6394253 | 23.411651 |
| E5 | E5 | 3.493280 | 5.185430 | 1.6921509 | 27.611165 |
| N1 | N1 | 2.610215 | 3.344371 | 0.7341558 | 9.155108 |
| N2 | N2 | 3.223118 | 3.927152 | 0.7040340 | 9.066525 |
| N3 | N3 | 2.814516 | 3.649007 | 0.8344905 | 10.300246 |
| N4 | N4 | 3.197581 | 3.282119 | 0.0845386 | 1.031340 |
| N5 | N5 | 2.690860 | 3.319205 | 0.6283451 | 7.411300 |
| O1 | O1 | 4.354839 | 5.278146 | 0.9233070 | 16.415663 |
| O2 | O2 | 4.016129 | 4.659603 | 0.6434736 | 7.949024 |
| O3 | O3 | 3.838710 | 5.062252 | 1.2235420 | 20.746077 |
| O4 | O4 | 4.684140 | 5.256954 | 0.5728139 | 9.439044 |
| O5 | O5 | 2.565860 | 2.332450 | -0.2334099 | -3.402291 |
ggplot(data = rslt2, aes(x = reorder(Item, t.value, max), y = t.value)) +
geom_point() +
geom_hline(yintercept = 2, linetype = "dashed") +
coord_flip() +
labs(x = "Items", y = "t-value") +
theme_bw()
#t value若大於2,表示可以區分高/低分組