data("bfi")
#將bfi中的負向題分數轉為正向題計分
bfi[,c(1,9, 10, 11, 12, 22)] = 7 - bfi[,c(1,9, 10, 11, 12, 22)]
dta <- bfi
#查看資料結構
str(dta)
## 'data.frame': 2800 obs. of 28 variables:
## $ A1 : num 5 5 2 3 5 1 5 3 3 5 ...
## $ A2 : int 4 4 4 4 3 6 5 3 3 5 ...
## $ A3 : int 3 5 5 6 3 5 5 1 6 6 ...
## $ A4 : int 4 2 4 5 4 6 3 5 3 6 ...
## $ A5 : int 4 5 4 5 5 5 5 1 3 5 ...
## $ C1 : int 2 5 4 4 4 6 5 3 6 6 ...
## $ C2 : int 3 4 5 4 4 6 4 2 6 5 ...
## $ C3 : int 3 4 4 3 5 6 4 4 3 6 ...
## $ C4 : num 3 4 5 2 4 6 5 5 3 5 ...
## $ C5 : num 3 3 2 2 5 4 4 3 2 6 ...
## $ E1 : num 4 6 5 2 5 5 3 4 2 5 ...
## $ E2 : num 4 6 3 4 5 6 4 1 4 5 ...
## $ E3 : int 3 6 4 4 5 6 4 4 NA 4 ...
## $ E4 : int 4 4 4 4 4 5 5 2 4 5 ...
## $ E5 : int 4 3 5 4 5 6 5 1 3 5 ...
## $ N1 : int 3 3 4 2 2 3 1 6 5 5 ...
## $ N2 : int 4 3 5 5 3 5 2 3 5 5 ...
## $ N3 : int 2 3 4 2 4 2 2 2 2 5 ...
## $ N4 : int 2 5 2 4 4 2 1 6 3 2 ...
## $ N5 : int 3 5 3 1 3 3 1 4 3 4 ...
## $ O1 : int 3 4 4 3 3 4 5 3 6 5 ...
## $ O2 : num 1 5 5 4 4 4 5 5 1 6 ...
## $ O3 : int 3 4 5 4 4 5 5 4 6 5 ...
## $ O4 : int 4 3 5 3 3 6 6 5 6 5 ...
## $ O5 : int 3 3 2 5 3 1 1 3 1 2 ...
## $ gender : int 1 2 2 2 1 2 1 1 1 2 ...
## $ education: int NA NA NA NA NA 3 NA 2 1 NA ...
## $ age : int 16 18 17 17 17 21 18 19 19 17 ...
28個變項,總共2800筆資料點
#摘要bfi
summary(dta)
## A1 A2 A3 A4 A5
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.0 Min. :1.00
## 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:4.0 1st Qu.:4.00
## Median :5.000 Median :5.000 Median :5.000 Median :5.0 Median :5.00
## Mean :4.587 Mean :4.802 Mean :4.604 Mean :4.7 Mean :4.56
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.0 3rd Qu.:5.00
## Max. :6.000 Max. :6.000 Max. :6.000 Max. :6.0 Max. :6.00
## NA's :16 NA's :27 NA's :26 NA's :19 NA's :16
## C1 C2 C3 C4 C5
## Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:4.00 1st Qu.:4.000 1st Qu.:3.000 1st Qu.:2.000
## Median :5.000 Median :5.00 Median :5.000 Median :5.000 Median :4.000
## Mean :4.502 Mean :4.37 Mean :4.304 Mean :4.447 Mean :3.703
## 3rd Qu.:5.000 3rd Qu.:5.00 3rd Qu.:5.000 3rd Qu.:6.000 3rd Qu.:5.000
## Max. :6.000 Max. :6.00 Max. :6.000 Max. :6.000 Max. :6.000
## NA's :21 NA's :24 NA's :20 NA's :26 NA's :16
## E1 E2 E3 E4
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:3.000 1st Qu.:3.000 1st Qu.:4.000
## Median :4.000 Median :4.000 Median :4.000 Median :5.000
## Mean :4.026 Mean :3.858 Mean :4.001 Mean :4.422
## 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:6.000
## Max. :6.000 Max. :6.000 Max. :6.000 Max. :6.000
## NA's :23 NA's :16 NA's :25 NA's :9
## E5 N1 N2 N3
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000
## Median :5.000 Median :3.000 Median :4.000 Median :3.000
## Mean :4.416 Mean :2.929 Mean :3.508 Mean :3.217
## 3rd Qu.:5.000 3rd Qu.:4.000 3rd Qu.:5.000 3rd Qu.:4.000
## Max. :6.000 Max. :6.000 Max. :6.000 Max. :6.000
## NA's :21 NA's :22 NA's :21 NA's :11
## N4 N5 O1 O2 O3
## Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:2.00 1st Qu.:4.000 1st Qu.:3.000 1st Qu.:4.000
## Median :3.000 Median :3.00 Median :5.000 Median :5.000 Median :5.000
## Mean :3.186 Mean :2.97 Mean :4.816 Mean :4.287 Mean :4.438
## 3rd Qu.:4.000 3rd Qu.:4.00 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:5.000
## Max. :6.000 Max. :6.00 Max. :6.000 Max. :6.000 Max. :6.000
## NA's :36 NA's :29 NA's :22 NA's :28
## O4 O5 gender education age
## Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.00 Min. : 3.00
## 1st Qu.:4.000 1st Qu.:1.00 1st Qu.:1.000 1st Qu.:3.00 1st Qu.:20.00
## Median :5.000 Median :2.00 Median :2.000 Median :3.00 Median :26.00
## Mean :4.892 Mean :2.49 Mean :1.672 Mean :3.19 Mean :28.78
## 3rd Qu.:6.000 3rd Qu.:3.00 3rd Qu.:2.000 3rd Qu.:4.00 3rd Qu.:35.00
## Max. :6.000 Max. :6.00 Max. :2.000 Max. :5.00 Max. :86.00
## NA's :14 NA's :20 NA's :223
#查看前6筆資料
head(dta)
## A1 A2 A3 A4 A5 C1 C2 C3 C4 C5 E1 E2 E3 E4 E5 N1 N2 N3 N4 N5 O1 O2 O3 O4
## 61617 5 4 3 4 4 2 3 3 3 3 4 4 3 4 4 3 4 2 2 3 3 1 3 4
## 61618 5 4 5 2 5 5 4 4 4 3 6 6 6 4 3 3 3 3 5 5 4 5 4 3
## 61620 2 4 5 4 4 4 5 4 5 2 5 3 4 4 5 4 5 4 2 3 4 5 5 5
## 61621 3 4 6 5 5 4 4 3 2 2 2 4 4 4 4 2 5 2 4 1 3 4 4 3
## 61622 5 3 3 4 5 4 4 5 4 5 5 5 5 4 5 2 3 4 4 3 3 4 4 3
## 61623 1 6 5 6 5 6 6 6 6 4 5 6 6 5 6 3 5 2 2 3 4 4 5 6
## O5 gender education age
## 61617 3 1 NA 16
## 61618 3 2 NA 18
## 61620 2 2 NA 17
## 61621 5 2 NA 17
## 61622 3 1 NA 17
## 61623 1 2 3 21
#將原始資料中的1到25行資料取出,也就是量表中的25題,重新定義其為dta
dta <- dta[, 1:25]
my_summary <- function(x) {
require(moments)
funs <- c(mean, sd, skewness, kurtosis)
sapply(funs, function(f) f(x, na.rm = T))
}
#計算dta中每一行的加總,並將結果儲存在dta_desc中
dta_desc <- apply(dta, 2, my_summary)
#查看每筆資料的mean, sd, skewness, kurtosis
head(dta_desc)
## A1 A2 A3 A4 A5 C1 C2
## [1,] 4.5865661 4.802380 4.603821 4.699748 4.560345 4.5023390 4.3699568
## [2,] 1.4077372 1.172020 1.301834 1.479633 1.258512 1.2413465 1.3183465
## [3,] -0.8254883 -1.124894 -0.998997 -1.031499 -0.847690 -0.8551631 -0.7422207
## [4,] 2.6942957 4.057765 3.444524 3.042640 3.161176 3.3068088 2.8656243
## C3 C4 C5 E1 E2 E3
## [1,] 4.3039568 4.4466474 3.70330460 4.0255672 3.8581178 4.0007207
## [2,] 1.2885518 1.3751181 1.62854187 1.6315055 1.6052103 1.3527188
## [3,] -0.6918287 -0.5964955 -0.06620282 -0.3736569 -0.2209396 -0.4706335
## [4,] 2.8697332 2.3802970 1.78461246 1.9090390 1.8526925 2.5367154
## E4 E5 N1 N2 N3 N4 N5
## [1,] 4.4224292 4.416337 2.9290857 3.50773660 3.2165651 3.1856006 2.9696860
## [2,] 1.4575174 1.334768 1.5709175 1.52594359 1.6029021 1.5696851 1.6186474
## [3,] -0.8241831 -0.777486 0.3714298 -0.07698521 0.1506797 0.1969966 0.3744599
## [4,] 2.6977079 2.908401 1.9885722 1.95035250 1.8227046 1.9090309 1.9401121
## O1 O2 O3 O4 O5
## [1,] 4.8160547 4.286786 4.4383117 4.892319 2.4895683
## [2,] 1.1295303 1.565152 1.2209011 1.221250 1.3279590
## [3,] -0.8973669 -0.585679 -0.7730516 -1.218247 0.7384818
## [4,] 3.4277033 2.188889 3.3043641 4.082686 2.7630094
#幫每個列取名字
rownames(dta_desc) <- c("mean", "sd", "skewness", "kurtosis")
#矩陣不是很方便看,將其轉成 data.frame呈現
rslt1 <- as.data.frame(t(dta_desc))
rslt1 |> knitr::kable()
| mean | sd | skewness | kurtosis | |
|---|---|---|---|---|
| A1 | 4.586566 | 1.407737 | -0.8254883 | 2.694296 |
| A2 | 4.802380 | 1.172020 | -1.1248938 | 4.057765 |
| A3 | 4.603821 | 1.301834 | -0.9989970 | 3.444524 |
| A4 | 4.699748 | 1.479633 | -1.0314991 | 3.042640 |
| A5 | 4.560345 | 1.258512 | -0.8476900 | 3.161176 |
| C1 | 4.502339 | 1.241346 | -0.8551631 | 3.306809 |
| C2 | 4.369957 | 1.318347 | -0.7422207 | 2.865624 |
| C3 | 4.303957 | 1.288552 | -0.6918287 | 2.869733 |
| C4 | 4.446647 | 1.375118 | -0.5964955 | 2.380297 |
| C5 | 3.703305 | 1.628542 | -0.0662028 | 1.784612 |
| E1 | 4.025567 | 1.631506 | -0.3736569 | 1.909039 |
| E2 | 3.858118 | 1.605210 | -0.2209396 | 1.852693 |
| E3 | 4.000721 | 1.352719 | -0.4706335 | 2.536715 |
| E4 | 4.422429 | 1.457517 | -0.8241831 | 2.697708 |
| E5 | 4.416337 | 1.334768 | -0.7774860 | 2.908401 |
| N1 | 2.929086 | 1.570917 | 0.3714298 | 1.988572 |
| N2 | 3.507737 | 1.525944 | -0.0769852 | 1.950352 |
| N3 | 3.216565 | 1.602902 | 0.1506797 | 1.822705 |
| N4 | 3.185601 | 1.569685 | 0.1969966 | 1.909031 |
| N5 | 2.969686 | 1.618647 | 0.3744599 | 1.940112 |
| O1 | 4.816055 | 1.129530 | -0.8973669 | 3.427703 |
| O2 | 4.286786 | 1.565152 | -0.5856790 | 2.188889 |
| O3 | 4.438312 | 1.220901 | -0.7730516 | 3.304364 |
| O4 | 4.892319 | 1.221250 | -1.2182471 | 4.082686 |
| O5 | 2.489568 | 1.327959 | 0.7384818 | 2.763009 |
#轉換成 longdata
dtal_desc <- melt(dta_desc)
names(dtal_desc)[1:2] <- c("moments", "items")
#查看前6筆資料
head(dtal_desc)
## moments items value
## 1 mean A1 4.5865661
## 2 sd A1 1.4077372
## 3 skewness A1 -0.8254883
## 4 kurtosis A1 2.6942957
## 5 mean A2 4.8023801
## 6 sd A2 1.1720199
#只截出 dtal_desc 這筆資料中,moments 這個項目的 mean
#reorder意思是,按照每個 item 的 value 由大到小照順序排列
ggplot(data = subset(dtal_desc, moments == "mean"),
aes(x = reorder(items, value, max), y = value, group = moments)) +
geom_point(size = 1) +
geom_hline(yintercept = mean(t(dta_desc["mean",])) +
c(-1.5, 0, 1.5) * sd(t(dta_desc["mean", ])), linetype = "dashed") +
coord_flip() +
labs(x = "items", y = "mean") +
theme_bw()
在mean當中,O5、N1、N5的值極端小於mean的平均。沒有任何值極端大於平均。
ggplot(data = subset(dtal_desc, moments == "sd"),
aes(x = reorder(items, value, max), y = value, group = moments)) +
geom_point(size = 1) +
geom_hline(yintercept = mean(t(dta_desc["sd",])) +
c(-1.5, 0, 1.5) * sd(t(dta_desc["sd", ])), linetype = "dashed") +
coord_flip() +
labs(x = "items", y = "sd") +
theme_bw()
在sd當中,O1的值極端小於sd的平均。沒有任何值極端大於sd的平均。
ggplot(data = subset(dtal_desc, moments == "skewness"),
aes(x = reorder(items, value, max), y = value, group = moments)) +
geom_point(size = 1) +
geom_hline(yintercept = mean(t(dta_desc["skewness",])) +
c(-1.5, 0, 1.5) * sd(t(dta_desc["skewness", ])), linetype = "dashed") +
coord_flip() +
labs(x = "items", y = "skewness") +
theme_bw()
在skewness當中,沒有任何值極端小於skewness的平均。O5、N5、N1極端大於skewness的平均。
ggplot(data = subset(dtal_desc, moments == "kurtosis"),
aes(x = reorder(items, value, max), y = value, group = moments)) +
geom_point(size = 1) +
geom_hline(yintercept = mean(t(dta_desc["kurtosis",])) +
c(-1.5, 0, 1.5) * sd(t(dta_desc["kurtosis", ])), linetype = "dashed") +
coord_flip() +
labs(x = "items", y = "kurtosis") +
theme_bw()
在kurtosis當中,沒有任何值極端小於kurtosis的平均。O4、A2極端大於kurtosis的平均。
#刪除所有含有缺失資料NA的row
dta1 = na.omit(dta)
dta1$tot <- apply(dta1, 1, sum)
#在 dta 中設一個欄位叫 grp,將所有數值設為 NA
dta1$grp <- NA
#只要分數小於2436*.27,就在 grp 中顯示L代表低;大於2436*.73 則顯示H代表高;其餘仍以NA顯示
dta1$grp[rank(dta1$tot) < 2436*.27] <- "L"
dta1$grp[rank(dta1$tot) > 2436*.73] <- "H"
dta1$grp <- factor(dta1$grp)
head(dta1)
## A1 A2 A3 A4 A5 C1 C2 C3 C4 C5 E1 E2 E3 E4 E5 N1 N2 N3 N4 N5 O1 O2 O3 O4
## 61617 5 4 3 4 4 2 3 3 3 3 4 4 3 4 4 3 4 2 2 3 3 1 3 4
## 61618 5 4 5 2 5 5 4 4 4 3 6 6 6 4 3 3 3 3 5 5 4 5 4 3
## 61620 2 4 5 4 4 4 5 4 5 2 5 3 4 4 5 4 5 4 2 3 4 5 5 5
## 61621 3 4 6 5 5 4 4 3 2 2 2 4 4 4 4 2 5 2 4 1 3 4 4 3
## 61622 5 3 3 4 5 4 4 5 4 5 5 5 5 4 5 2 3 4 4 3 3 4 4 3
## 61623 1 6 5 6 5 6 6 6 6 4 5 6 6 5 6 3 5 2 2 3 4 4 5 6
## O5 tot grp
## 61617 3 81 L
## 61618 3 104 <NA>
## 61620 2 99 <NA>
## 61621 5 89 L
## 61622 3 99 <NA>
## 61623 1 114 H
#算高低分組平均數
dtam <- aggregate(dta1[, 1:25], by=list(dta1$grp), mean)
print(dtam)
## Group.1 A1 A2 A3 A4 A5 C1 C2
## 1 H 5.007407 5.438519 5.364444 5.325926 5.237037 5.136296 5.102222
## 2 L 4.202080 4.017831 3.693908 3.918276 3.796434 3.897474 3.632987
## C3 C4 C5 E1 E2 E3 E4 E5
## 1 4.885926 5.040000 4.358519 4.746667 4.617778 4.856296 5.145185 5.229630
## 2 3.742942 3.832095 3.005944 3.077266 2.921248 3.062407 3.401189 3.423477
## N1 N2 N3 N4 N5 O1 O2 O3
## 1 3.364444 3.955556 3.680000 3.269630 3.330370 5.299259 4.691852 5.091852
## 2 2.585438 3.200594 2.783061 3.194651 2.649331 4.326895 4.031204 3.790490
## O4 O5
## 1 5.272593 2.334815
## 2 4.679049 2.563150
#將第一欄刪除
dtam <- t(dtam[, -1])
t-test
item_t <- sapply(dta1[, 1:25], function(x) t.test(x ~ dta1$grp)$statistic)
print(item_t)
## A1.t A2.t A3.t A4.t A5.t C1.t C2.t
## 10.8435846 24.1029485 26.7156154 18.7839003 23.0699742 19.1247670 22.3832065
## C3.t C4.t C5.t E1.t E2.t E3.t E4.t
## 17.1453481 16.9457542 16.0464821 20.4585020 21.2538284 28.5111542 23.8114957
## E5.t N1.t N2.t N3.t N4.t N5.t O1.t
## 28.1321438 9.2082776 9.2587359 10.4863674 0.8665417 7.6373698 16.2839271
## O2.t O3.t O4.t O5.t
## 7.7166981 20.8345645 9.2886876 -3.1290125
#將結果儲存進rslt2中
rslt2 <- data.frame(Item = rownames(dtam), low.mean.score = dtam[, 2],
high.mean.score = dtam[, 1], mean.dif = dtam[, 1]-dtam[,2],
t.value = item_t)
rslt2 |> knitr::kable()
| Item | low.mean.score | high.mean.score | mean.dif | t.value | |
|---|---|---|---|---|---|
| A1 | A1 | 4.202080 | 5.007407 | 0.8053272 | 10.8435846 |
| A2 | A2 | 4.017831 | 5.438518 | 1.4206879 | 24.1029485 |
| A3 | A3 | 3.693908 | 5.364444 | 1.6705366 | 26.7156154 |
| A4 | A4 | 3.918276 | 5.325926 | 1.4076496 | 18.7839003 |
| A5 | A5 | 3.796434 | 5.237037 | 1.4406032 | 23.0699742 |
| C1 | C1 | 3.897474 | 5.136296 | 1.2388223 | 19.1247670 |
| C2 | C2 | 3.632987 | 5.102222 | 1.4692356 | 22.3832065 |
| C3 | C3 | 3.742942 | 4.885926 | 1.1429839 | 17.1453481 |
| C4 | C4 | 3.832095 | 5.040000 | 1.2079049 | 16.9457542 |
| C5 | C5 | 3.005943 | 4.358518 | 1.3525750 | 16.0464821 |
| E1 | E1 | 3.077266 | 4.746667 | 1.6694007 | 20.4585020 |
| E2 | E2 | 2.921248 | 4.617778 | 1.6965296 | 21.2538284 |
| E3 | E3 | 3.062407 | 4.856296 | 1.7938892 | 28.5111542 |
| E4 | E4 | 3.401189 | 5.145185 | 1.7439965 | 23.8114957 |
| E5 | E5 | 3.423477 | 5.229630 | 1.8061527 | 28.1321438 |
| N1 | N1 | 2.585438 | 3.364444 | 0.7790061 | 9.2082776 |
| N2 | N2 | 3.200594 | 3.955556 | 0.7549612 | 9.2587359 |
| N3 | N3 | 2.783061 | 3.680000 | 0.8969391 | 10.4863674 |
| N4 | N4 | 3.194651 | 3.269630 | 0.0749788 | 0.8665417 |
| N5 | N5 | 2.649331 | 3.330370 | 0.6810390 | 7.6373698 |
| O1 | O1 | 4.326894 | 5.299259 | 0.9723648 | 16.2839271 |
| O2 | O2 | 4.031204 | 4.691852 | 0.6606483 | 7.7166981 |
| O3 | O3 | 3.790490 | 5.091852 | 1.3013615 | 20.8345645 |
| O4 | O4 | 4.679049 | 5.272593 | 0.5935436 | 9.2886876 |
| O5 | O5 | 2.563150 | 2.334815 | -0.2283353 | -3.1290125 |
ggplot(data = rslt2, aes(x = reorder(Item, t.value, max), y = t.value)) +
geom_point() +
geom_hline(yintercept = 2, linetype = "dashed") +
coord_flip() +
labs(x = "Items", y = "t-value") +
theme_bw()