data("bfi")
#將bfi中的負向題分數轉為正向題計分
bfi[,c(1,9, 10, 11, 12, 22)] = 7 - bfi[,c(1,9, 10, 11, 12, 22)]
dta <- bfi
#查看資料結構
str(dta)
## 'data.frame':    2800 obs. of  28 variables:
##  $ A1       : num  5 5 2 3 5 1 5 3 3 5 ...
##  $ A2       : int  4 4 4 4 3 6 5 3 3 5 ...
##  $ A3       : int  3 5 5 6 3 5 5 1 6 6 ...
##  $ A4       : int  4 2 4 5 4 6 3 5 3 6 ...
##  $ A5       : int  4 5 4 5 5 5 5 1 3 5 ...
##  $ C1       : int  2 5 4 4 4 6 5 3 6 6 ...
##  $ C2       : int  3 4 5 4 4 6 4 2 6 5 ...
##  $ C3       : int  3 4 4 3 5 6 4 4 3 6 ...
##  $ C4       : num  3 4 5 2 4 6 5 5 3 5 ...
##  $ C5       : num  3 3 2 2 5 4 4 3 2 6 ...
##  $ E1       : num  4 6 5 2 5 5 3 4 2 5 ...
##  $ E2       : num  4 6 3 4 5 6 4 1 4 5 ...
##  $ E3       : int  3 6 4 4 5 6 4 4 NA 4 ...
##  $ E4       : int  4 4 4 4 4 5 5 2 4 5 ...
##  $ E5       : int  4 3 5 4 5 6 5 1 3 5 ...
##  $ N1       : int  3 3 4 2 2 3 1 6 5 5 ...
##  $ N2       : int  4 3 5 5 3 5 2 3 5 5 ...
##  $ N3       : int  2 3 4 2 4 2 2 2 2 5 ...
##  $ N4       : int  2 5 2 4 4 2 1 6 3 2 ...
##  $ N5       : int  3 5 3 1 3 3 1 4 3 4 ...
##  $ O1       : int  3 4 4 3 3 4 5 3 6 5 ...
##  $ O2       : num  1 5 5 4 4 4 5 5 1 6 ...
##  $ O3       : int  3 4 5 4 4 5 5 4 6 5 ...
##  $ O4       : int  4 3 5 3 3 6 6 5 6 5 ...
##  $ O5       : int  3 3 2 5 3 1 1 3 1 2 ...
##  $ gender   : int  1 2 2 2 1 2 1 1 1 2 ...
##  $ education: int  NA NA NA NA NA 3 NA 2 1 NA ...
##  $ age      : int  16 18 17 17 17 21 18 19 19 17 ...

28個變項,總共2800筆資料點

#摘要bfi
summary(dta)
##        A1              A2              A3              A4            A5      
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.0   Min.   :1.00  
##  1st Qu.:4.000   1st Qu.:4.000   1st Qu.:4.000   1st Qu.:4.0   1st Qu.:4.00  
##  Median :5.000   Median :5.000   Median :5.000   Median :5.0   Median :5.00  
##  Mean   :4.587   Mean   :4.802   Mean   :4.604   Mean   :4.7   Mean   :4.56  
##  3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.0   3rd Qu.:5.00  
##  Max.   :6.000   Max.   :6.000   Max.   :6.000   Max.   :6.0   Max.   :6.00  
##  NA's   :16      NA's   :27      NA's   :26      NA's   :19    NA's   :16    
##        C1              C2             C3              C4              C5       
##  Min.   :1.000   Min.   :1.00   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:4.00   1st Qu.:4.000   1st Qu.:3.000   1st Qu.:2.000  
##  Median :5.000   Median :5.00   Median :5.000   Median :5.000   Median :4.000  
##  Mean   :4.502   Mean   :4.37   Mean   :4.304   Mean   :4.447   Mean   :3.703  
##  3rd Qu.:5.000   3rd Qu.:5.00   3rd Qu.:5.000   3rd Qu.:6.000   3rd Qu.:5.000  
##  Max.   :6.000   Max.   :6.00   Max.   :6.000   Max.   :6.000   Max.   :6.000  
##  NA's   :21      NA's   :24     NA's   :20      NA's   :26      NA's   :16     
##        E1              E2              E3              E4       
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:3.000   1st Qu.:3.000   1st Qu.:4.000  
##  Median :4.000   Median :4.000   Median :4.000   Median :5.000  
##  Mean   :4.026   Mean   :3.858   Mean   :4.001   Mean   :4.422  
##  3rd Qu.:5.000   3rd Qu.:5.000   3rd Qu.:5.000   3rd Qu.:6.000  
##  Max.   :6.000   Max.   :6.000   Max.   :6.000   Max.   :6.000  
##  NA's   :23      NA's   :16      NA's   :25      NA's   :9      
##        E5              N1              N2              N3       
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000  
##  Median :5.000   Median :3.000   Median :4.000   Median :3.000  
##  Mean   :4.416   Mean   :2.929   Mean   :3.508   Mean   :3.217  
##  3rd Qu.:5.000   3rd Qu.:4.000   3rd Qu.:5.000   3rd Qu.:4.000  
##  Max.   :6.000   Max.   :6.000   Max.   :6.000   Max.   :6.000  
##  NA's   :21      NA's   :22      NA's   :21      NA's   :11     
##        N4              N5             O1              O2              O3       
##  Min.   :1.000   Min.   :1.00   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:2.00   1st Qu.:4.000   1st Qu.:3.000   1st Qu.:4.000  
##  Median :3.000   Median :3.00   Median :5.000   Median :5.000   Median :5.000  
##  Mean   :3.186   Mean   :2.97   Mean   :4.816   Mean   :4.287   Mean   :4.438  
##  3rd Qu.:4.000   3rd Qu.:4.00   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:5.000  
##  Max.   :6.000   Max.   :6.00   Max.   :6.000   Max.   :6.000   Max.   :6.000  
##  NA's   :36      NA's   :29     NA's   :22                      NA's   :28     
##        O4              O5           gender        education         age       
##  Min.   :1.000   Min.   :1.00   Min.   :1.000   Min.   :1.00   Min.   : 3.00  
##  1st Qu.:4.000   1st Qu.:1.00   1st Qu.:1.000   1st Qu.:3.00   1st Qu.:20.00  
##  Median :5.000   Median :2.00   Median :2.000   Median :3.00   Median :26.00  
##  Mean   :4.892   Mean   :2.49   Mean   :1.672   Mean   :3.19   Mean   :28.78  
##  3rd Qu.:6.000   3rd Qu.:3.00   3rd Qu.:2.000   3rd Qu.:4.00   3rd Qu.:35.00  
##  Max.   :6.000   Max.   :6.00   Max.   :2.000   Max.   :5.00   Max.   :86.00  
##  NA's   :14      NA's   :20                     NA's   :223
#查看前6筆資料
head(dta)
##       A1 A2 A3 A4 A5 C1 C2 C3 C4 C5 E1 E2 E3 E4 E5 N1 N2 N3 N4 N5 O1 O2 O3 O4
## 61617  5  4  3  4  4  2  3  3  3  3  4  4  3  4  4  3  4  2  2  3  3  1  3  4
## 61618  5  4  5  2  5  5  4  4  4  3  6  6  6  4  3  3  3  3  5  5  4  5  4  3
## 61620  2  4  5  4  4  4  5  4  5  2  5  3  4  4  5  4  5  4  2  3  4  5  5  5
## 61621  3  4  6  5  5  4  4  3  2  2  2  4  4  4  4  2  5  2  4  1  3  4  4  3
## 61622  5  3  3  4  5  4  4  5  4  5  5  5  5  4  5  2  3  4  4  3  3  4  4  3
## 61623  1  6  5  6  5  6  6  6  6  4  5  6  6  5  6  3  5  2  2  3  4  4  5  6
##       O5 gender education age
## 61617  3      1        NA  16
## 61618  3      2        NA  18
## 61620  2      2        NA  17
## 61621  5      2        NA  17
## 61622  3      1        NA  17
## 61623  1      2         3  21
#將原始資料中的1到25行資料取出,也就是量表中的25題,重新定義其為dta
dta <- dta[, 1:25]
my_summary <- function(x) {
 require(moments)
 funs <- c(mean, sd, skewness, kurtosis)
 sapply(funs, function(f) f(x, na.rm = T))
 }
#計算dta中每一行的加總,並將結果儲存在dta_desc中
dta_desc <- apply(dta, 2, my_summary)
#查看每筆資料的mean, sd, skewness, kurtosis
head(dta_desc)
##              A1        A2        A3        A4        A5         C1         C2
## [1,]  4.5865661  4.802380  4.603821  4.699748  4.560345  4.5023390  4.3699568
## [2,]  1.4077372  1.172020  1.301834  1.479633  1.258512  1.2413465  1.3183465
## [3,] -0.8254883 -1.124894 -0.998997 -1.031499 -0.847690 -0.8551631 -0.7422207
## [4,]  2.6942957  4.057765  3.444524  3.042640  3.161176  3.3068088  2.8656243
##              C3         C4          C5         E1         E2         E3
## [1,]  4.3039568  4.4466474  3.70330460  4.0255672  3.8581178  4.0007207
## [2,]  1.2885518  1.3751181  1.62854187  1.6315055  1.6052103  1.3527188
## [3,] -0.6918287 -0.5964955 -0.06620282 -0.3736569 -0.2209396 -0.4706335
## [4,]  2.8697332  2.3802970  1.78461246  1.9090390  1.8526925  2.5367154
##              E4        E5        N1          N2        N3        N4        N5
## [1,]  4.4224292  4.416337 2.9290857  3.50773660 3.2165651 3.1856006 2.9696860
## [2,]  1.4575174  1.334768 1.5709175  1.52594359 1.6029021 1.5696851 1.6186474
## [3,] -0.8241831 -0.777486 0.3714298 -0.07698521 0.1506797 0.1969966 0.3744599
## [4,]  2.6977079  2.908401 1.9885722  1.95035250 1.8227046 1.9090309 1.9401121
##              O1        O2         O3        O4        O5
## [1,]  4.8160547  4.286786  4.4383117  4.892319 2.4895683
## [2,]  1.1295303  1.565152  1.2209011  1.221250 1.3279590
## [3,] -0.8973669 -0.585679 -0.7730516 -1.218247 0.7384818
## [4,]  3.4277033  2.188889  3.3043641  4.082686 2.7630094
#幫每個列取名字
rownames(dta_desc) <- c("mean", "sd", "skewness", "kurtosis")
#矩陣不是很方便看,將其轉成 data.frame呈現
rslt1 <- as.data.frame(t(dta_desc))
rslt1 |> knitr::kable()
mean sd skewness kurtosis
A1 4.586566 1.407737 -0.8254883 2.694296
A2 4.802380 1.172020 -1.1248938 4.057765
A3 4.603821 1.301834 -0.9989970 3.444524
A4 4.699748 1.479633 -1.0314991 3.042640
A5 4.560345 1.258512 -0.8476900 3.161176
C1 4.502339 1.241346 -0.8551631 3.306809
C2 4.369957 1.318347 -0.7422207 2.865624
C3 4.303957 1.288552 -0.6918287 2.869733
C4 4.446647 1.375118 -0.5964955 2.380297
C5 3.703305 1.628542 -0.0662028 1.784612
E1 4.025567 1.631506 -0.3736569 1.909039
E2 3.858118 1.605210 -0.2209396 1.852693
E3 4.000721 1.352719 -0.4706335 2.536715
E4 4.422429 1.457517 -0.8241831 2.697708
E5 4.416337 1.334768 -0.7774860 2.908401
N1 2.929086 1.570917 0.3714298 1.988572
N2 3.507737 1.525944 -0.0769852 1.950352
N3 3.216565 1.602902 0.1506797 1.822705
N4 3.185601 1.569685 0.1969966 1.909031
N5 2.969686 1.618647 0.3744599 1.940112
O1 4.816055 1.129530 -0.8973669 3.427703
O2 4.286786 1.565152 -0.5856790 2.188889
O3 4.438312 1.220901 -0.7730516 3.304364
O4 4.892319 1.221250 -1.2182471 4.082686
O5 2.489568 1.327959 0.7384818 2.763009
#轉換成 longdata
dtal_desc <- melt(dta_desc)
names(dtal_desc)[1:2] <- c("moments", "items")
#查看前6筆資料
head(dtal_desc)
##    moments items      value
## 1     mean    A1  4.5865661
## 2       sd    A1  1.4077372
## 3 skewness    A1 -0.8254883
## 4 kurtosis    A1  2.6942957
## 5     mean    A2  4.8023801
## 6       sd    A2  1.1720199
#只截出 dtal_desc 這筆資料中,moments 這個項目的 mean
#reorder意思是,按照每個 item 的 value 由大到小照順序排列

ggplot(data = subset(dtal_desc, moments == "mean"),
 aes(x = reorder(items, value, max), y = value, group = moments)) +
 geom_point(size = 1) +
 geom_hline(yintercept = mean(t(dta_desc["mean",])) +
 c(-1.5, 0, 1.5) * sd(t(dta_desc["mean", ])), linetype = "dashed") +
 coord_flip() +
 labs(x = "items",  y = "mean") +
  theme_bw()

在mean當中,O5、N1、N5的值極端小於mean的平均。沒有任何值極端大於平均。

ggplot(data = subset(dtal_desc, moments == "sd"),
 aes(x = reorder(items, value, max), y = value, group = moments)) +
 geom_point(size = 1) +
 geom_hline(yintercept = mean(t(dta_desc["sd",])) +
 c(-1.5, 0, 1.5) * sd(t(dta_desc["sd", ])), linetype = "dashed") +
 coord_flip() +
 labs(x = "items",  y = "sd") +
  theme_bw()

在sd當中,O1的值極端小於sd的平均。沒有任何值極端大於sd的平均。

ggplot(data = subset(dtal_desc, moments == "skewness"),
 aes(x = reorder(items, value, max), y = value, group = moments)) +
 geom_point(size = 1) +
 geom_hline(yintercept = mean(t(dta_desc["skewness",])) +
 c(-1.5, 0, 1.5) * sd(t(dta_desc["skewness", ])), linetype = "dashed") +
 coord_flip() +
 labs(x = "items",  y = "skewness") +
  theme_bw()

在skewness當中,沒有任何值極端小於skewness的平均。O5、N5、N1極端大於skewness的平均。

ggplot(data = subset(dtal_desc, moments == "kurtosis"),
 aes(x = reorder(items, value, max), y = value, group = moments)) +
 geom_point(size = 1) +
 geom_hline(yintercept = mean(t(dta_desc["kurtosis",])) +
 c(-1.5, 0, 1.5) * sd(t(dta_desc["kurtosis", ])), linetype = "dashed") +
 coord_flip() +
 labs(x = "items",  y = "kurtosis") +
  theme_bw()

在kurtosis當中,沒有任何值極端小於kurtosis的平均。O4、A2極端大於kurtosis的平均。

#刪除所有含有缺失資料NA的row
dta1 = na.omit(dta)
dta1$tot <- apply(dta1, 1, sum)
#在 dta 中設一個欄位叫 grp,將所有數值設為 NA
dta1$grp <- NA
#只要分數小於2436*.27,就在 grp 中顯示L代表低;大於2436*.73 則顯示H代表高;其餘仍以NA顯示

dta1$grp[rank(dta1$tot) < 2436*.27] <- "L"
dta1$grp[rank(dta1$tot) > 2436*.73] <- "H"
dta1$grp <- factor(dta1$grp)
head(dta1)
##       A1 A2 A3 A4 A5 C1 C2 C3 C4 C5 E1 E2 E3 E4 E5 N1 N2 N3 N4 N5 O1 O2 O3 O4
## 61617  5  4  3  4  4  2  3  3  3  3  4  4  3  4  4  3  4  2  2  3  3  1  3  4
## 61618  5  4  5  2  5  5  4  4  4  3  6  6  6  4  3  3  3  3  5  5  4  5  4  3
## 61620  2  4  5  4  4  4  5  4  5  2  5  3  4  4  5  4  5  4  2  3  4  5  5  5
## 61621  3  4  6  5  5  4  4  3  2  2  2  4  4  4  4  2  5  2  4  1  3  4  4  3
## 61622  5  3  3  4  5  4  4  5  4  5  5  5  5  4  5  2  3  4  4  3  3  4  4  3
## 61623  1  6  5  6  5  6  6  6  6  4  5  6  6  5  6  3  5  2  2  3  4  4  5  6
##       O5 tot  grp
## 61617  3  81    L
## 61618  3 104 <NA>
## 61620  2  99 <NA>
## 61621  5  89    L
## 61622  3  99 <NA>
## 61623  1 114    H
#算高低分組平均數
dtam <- aggregate(dta1[, 1:25], by=list(dta1$grp), mean)
print(dtam)
##   Group.1       A1       A2       A3       A4       A5       C1       C2
## 1       H 5.007407 5.438519 5.364444 5.325926 5.237037 5.136296 5.102222
## 2       L 4.202080 4.017831 3.693908 3.918276 3.796434 3.897474 3.632987
##         C3       C4       C5       E1       E2       E3       E4       E5
## 1 4.885926 5.040000 4.358519 4.746667 4.617778 4.856296 5.145185 5.229630
## 2 3.742942 3.832095 3.005944 3.077266 2.921248 3.062407 3.401189 3.423477
##         N1       N2       N3       N4       N5       O1       O2       O3
## 1 3.364444 3.955556 3.680000 3.269630 3.330370 5.299259 4.691852 5.091852
## 2 2.585438 3.200594 2.783061 3.194651 2.649331 4.326895 4.031204 3.790490
##         O4       O5
## 1 5.272593 2.334815
## 2 4.679049 2.563150
#將第一欄刪除
dtam <- t(dtam[, -1])

t-test

item_t <- sapply(dta1[, 1:25], function(x) t.test(x ~ dta1$grp)$statistic)
print(item_t)
##       A1.t       A2.t       A3.t       A4.t       A5.t       C1.t       C2.t 
## 10.8435846 24.1029485 26.7156154 18.7839003 23.0699742 19.1247670 22.3832065 
##       C3.t       C4.t       C5.t       E1.t       E2.t       E3.t       E4.t 
## 17.1453481 16.9457542 16.0464821 20.4585020 21.2538284 28.5111542 23.8114957 
##       E5.t       N1.t       N2.t       N3.t       N4.t       N5.t       O1.t 
## 28.1321438  9.2082776  9.2587359 10.4863674  0.8665417  7.6373698 16.2839271 
##       O2.t       O3.t       O4.t       O5.t 
##  7.7166981 20.8345645  9.2886876 -3.1290125
#將結果儲存進rslt2中
rslt2 <- data.frame(Item = rownames(dtam), low.mean.score = dtam[, 2], 
                    high.mean.score = dtam[, 1], mean.dif = dtam[, 1]-dtam[,2], 
                    t.value = item_t)

rslt2 |> knitr::kable()
Item low.mean.score high.mean.score mean.dif t.value
A1 A1 4.202080 5.007407 0.8053272 10.8435846
A2 A2 4.017831 5.438518 1.4206879 24.1029485
A3 A3 3.693908 5.364444 1.6705366 26.7156154
A4 A4 3.918276 5.325926 1.4076496 18.7839003
A5 A5 3.796434 5.237037 1.4406032 23.0699742
C1 C1 3.897474 5.136296 1.2388223 19.1247670
C2 C2 3.632987 5.102222 1.4692356 22.3832065
C3 C3 3.742942 4.885926 1.1429839 17.1453481
C4 C4 3.832095 5.040000 1.2079049 16.9457542
C5 C5 3.005943 4.358518 1.3525750 16.0464821
E1 E1 3.077266 4.746667 1.6694007 20.4585020
E2 E2 2.921248 4.617778 1.6965296 21.2538284
E3 E3 3.062407 4.856296 1.7938892 28.5111542
E4 E4 3.401189 5.145185 1.7439965 23.8114957
E5 E5 3.423477 5.229630 1.8061527 28.1321438
N1 N1 2.585438 3.364444 0.7790061 9.2082776
N2 N2 3.200594 3.955556 0.7549612 9.2587359
N3 N3 2.783061 3.680000 0.8969391 10.4863674
N4 N4 3.194651 3.269630 0.0749788 0.8665417
N5 N5 2.649331 3.330370 0.6810390 7.6373698
O1 O1 4.326894 5.299259 0.9723648 16.2839271
O2 O2 4.031204 4.691852 0.6606483 7.7166981
O3 O3 3.790490 5.091852 1.3013615 20.8345645
O4 O4 4.679049 5.272593 0.5935436 9.2886876
O5 O5 2.563150 2.334815 -0.2283353 -3.1290125
ggplot(data = rslt2, aes(x = reorder(Item, t.value, max), y = t.value)) +
 geom_point() +
 geom_hline(yintercept = 2, linetype = "dashed") +
 coord_flip() +
 labs(x = "Items", y = "t-value") +
 theme_bw()