#load data
data("bfi")
str(bfi)
## 'data.frame': 2800 obs. of 28 variables:
## $ A1 : int 2 2 5 4 2 6 2 4 4 2 ...
## $ A2 : int 4 4 4 4 3 6 5 3 3 5 ...
## $ A3 : int 3 5 5 6 3 5 5 1 6 6 ...
## $ A4 : int 4 2 4 5 4 6 3 5 3 6 ...
## $ A5 : int 4 5 4 5 5 5 5 1 3 5 ...
## $ C1 : int 2 5 4 4 4 6 5 3 6 6 ...
## $ C2 : int 3 4 5 4 4 6 4 2 6 5 ...
## $ C3 : int 3 4 4 3 5 6 4 4 3 6 ...
## $ C4 : int 4 3 2 5 3 1 2 2 4 2 ...
## $ C5 : int 4 4 5 5 2 3 3 4 5 1 ...
## $ E1 : int 3 1 2 5 2 2 4 3 5 2 ...
## $ E2 : int 3 1 4 3 2 1 3 6 3 2 ...
## $ E3 : int 3 6 4 4 5 6 4 4 NA 4 ...
## $ E4 : int 4 4 4 4 4 5 5 2 4 5 ...
## $ E5 : int 4 3 5 4 5 6 5 1 3 5 ...
## $ N1 : int 3 3 4 2 2 3 1 6 5 5 ...
## $ N2 : int 4 3 5 5 3 5 2 3 5 5 ...
## $ N3 : int 2 3 4 2 4 2 2 2 2 5 ...
## $ N4 : int 2 5 2 4 4 2 1 6 3 2 ...
## $ N5 : int 3 5 3 1 3 3 1 4 3 4 ...
## $ O1 : int 3 4 4 3 3 4 5 3 6 5 ...
## $ O2 : int 6 2 2 3 3 3 2 2 6 1 ...
## $ O3 : int 3 4 5 4 4 5 5 4 6 5 ...
## $ O4 : int 4 3 5 3 3 6 6 5 6 5 ...
## $ O5 : int 3 3 2 5 3 1 1 3 1 2 ...
## $ gender : int 1 2 2 2 1 2 1 1 1 2 ...
## $ education: int NA NA NA NA NA 3 NA 2 1 NA ...
## $ age : int 16 18 17 17 17 21 18 19 19 17 ...
keys <-
list(agree=c("-A1","A2","A3","A4","A5"),conscientious=c("C1","C2","C3","-C4","-C5"),
extraversion=c("-E1","-E2","E3","E4","E5"),neuroticism=c("N1","N2","N3","N4","N5"),
openness = c("O1","-O2","O3","O4","-O5"))
head(keys)
## $agree
## [1] "-A1" "A2" "A3" "A4" "A5"
##
## $conscientious
## [1] "C1" "C2" "C3" "-C4" "-C5"
##
## $extraversion
## [1] "-E1" "-E2" "E3" "E4" "E5"
##
## $neuroticism
## [1] "N1" "N2" "N3" "N4" "N5"
##
## $openness
## [1] "O1" "-O2" "O3" "O4" "-O5"
#反向題
bfid <- bfi
bfid[, c(1,9,10,11,12,22)] = 7-bfid[, c(1,9,10,11,12,22)]
head(bfid)
## A1 A2 A3 A4 A5 C1 C2 C3 C4 C5 E1 E2 E3 E4 E5 N1 N2 N3 N4 N5 O1 O2 O3 O4
## 61617 5 4 3 4 4 2 3 3 3 3 4 4 3 4 4 3 4 2 2 3 3 1 3 4
## 61618 5 4 5 2 5 5 4 4 4 3 6 6 6 4 3 3 3 3 5 5 4 5 4 3
## 61620 2 4 5 4 4 4 5 4 5 2 5 3 4 4 5 4 5 4 2 3 4 5 5 5
## 61621 3 4 6 5 5 4 4 3 2 2 2 4 4 4 4 2 5 2 4 1 3 4 4 3
## 61622 5 3 3 4 5 4 4 5 4 5 5 5 5 4 5 2 3 4 4 3 3 4 4 3
## 61623 1 6 5 6 5 6 6 6 6 4 5 6 6 5 6 3 5 2 2 3 4 4 5 6
## O5 gender education age
## 61617 3 1 NA 16
## 61618 3 2 NA 18
## 61620 2 2 NA 17
## 61621 5 2 NA 17
## 61622 3 1 NA 17
## 61623 1 2 3 21
summary(bfid)
## A1 A2 A3 A4 A5
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.0 Min. :1.00
## 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:4.0 1st Qu.:4.00
## Median :5.000 Median :5.000 Median :5.000 Median :5.0 Median :5.00
## Mean :4.587 Mean :4.802 Mean :4.604 Mean :4.7 Mean :4.56
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.0 3rd Qu.:5.00
## Max. :6.000 Max. :6.000 Max. :6.000 Max. :6.0 Max. :6.00
## NA's :16 NA's :27 NA's :26 NA's :19 NA's :16
## C1 C2 C3 C4 C5
## Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:4.00 1st Qu.:4.000 1st Qu.:3.000 1st Qu.:2.000
## Median :5.000 Median :5.00 Median :5.000 Median :5.000 Median :4.000
## Mean :4.502 Mean :4.37 Mean :4.304 Mean :4.447 Mean :3.703
## 3rd Qu.:5.000 3rd Qu.:5.00 3rd Qu.:5.000 3rd Qu.:6.000 3rd Qu.:5.000
## Max. :6.000 Max. :6.00 Max. :6.000 Max. :6.000 Max. :6.000
## NA's :21 NA's :24 NA's :20 NA's :26 NA's :16
## E1 E2 E3 E4
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:3.000 1st Qu.:3.000 1st Qu.:4.000
## Median :4.000 Median :4.000 Median :4.000 Median :5.000
## Mean :4.026 Mean :3.858 Mean :4.001 Mean :4.422
## 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:6.000
## Max. :6.000 Max. :6.000 Max. :6.000 Max. :6.000
## NA's :23 NA's :16 NA's :25 NA's :9
## E5 N1 N2 N3
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000
## Median :5.000 Median :3.000 Median :4.000 Median :3.000
## Mean :4.416 Mean :2.929 Mean :3.508 Mean :3.217
## 3rd Qu.:5.000 3rd Qu.:4.000 3rd Qu.:5.000 3rd Qu.:4.000
## Max. :6.000 Max. :6.000 Max. :6.000 Max. :6.000
## NA's :21 NA's :22 NA's :21 NA's :11
## N4 N5 O1 O2 O3
## Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:2.00 1st Qu.:4.000 1st Qu.:3.000 1st Qu.:4.000
## Median :3.000 Median :3.00 Median :5.000 Median :5.000 Median :5.000
## Mean :3.186 Mean :2.97 Mean :4.816 Mean :4.287 Mean :4.438
## 3rd Qu.:4.000 3rd Qu.:4.00 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:5.000
## Max. :6.000 Max. :6.00 Max. :6.000 Max. :6.000 Max. :6.000
## NA's :36 NA's :29 NA's :22 NA's :28
## O4 O5 gender education age
## Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.00 Min. : 3.00
## 1st Qu.:4.000 1st Qu.:1.00 1st Qu.:1.000 1st Qu.:3.00 1st Qu.:20.00
## Median :5.000 Median :2.00 Median :2.000 Median :3.00 Median :26.00
## Mean :4.892 Mean :2.49 Mean :1.672 Mean :3.19 Mean :28.78
## 3rd Qu.:6.000 3rd Qu.:3.00 3rd Qu.:2.000 3rd Qu.:4.00 3rd Qu.:35.00
## Max. :6.000 Max. :6.00 Max. :2.000 Max. :5.00 Max. :86.00
## NA's :14 NA's :20 NA's :223
#simply check whether there is any outlier
#take out only the construct items
bfidta <- bfid[, 1:25]
xr_summary <- function(x){
require(moments)
funs <- c(mean, sd, skewness, kurtosis)
sapply(funs, function(f) f(x, na.rm = T))
}
bfidta_des <- apply(bfidta, 2, xr_summary)
rownames(bfidta_des) <- c("mean", "sd", "skewness", "kurtosis")
bfidta_des
## A1 A2 A3 A4 A5 C1
## mean 4.5865661 4.802380 4.603821 4.699748 4.560345 4.5023390
## sd 1.4077372 1.172020 1.301834 1.479633 1.258512 1.2413465
## skewness -0.8254883 -1.124894 -0.998997 -1.031499 -0.847690 -0.8551631
## kurtosis 2.6942957 4.057765 3.444524 3.042640 3.161176 3.3068088
## C2 C3 C4 C5 E1 E2
## mean 4.3699568 4.3039568 4.4466474 3.70330460 4.0255672 3.8581178
## sd 1.3183465 1.2885518 1.3751181 1.62854187 1.6315055 1.6052103
## skewness -0.7422207 -0.6918287 -0.5964955 -0.06620282 -0.3736569 -0.2209396
## kurtosis 2.8656243 2.8697332 2.3802970 1.78461246 1.9090390 1.8526925
## E3 E4 E5 N1 N2 N3
## mean 4.0007207 4.4224292 4.416337 2.9290857 3.50773660 3.2165651
## sd 1.3527188 1.4575174 1.334768 1.5709175 1.52594359 1.6029021
## skewness -0.4706335 -0.8241831 -0.777486 0.3714298 -0.07698521 0.1506797
## kurtosis 2.5367154 2.6977079 2.908401 1.9885722 1.95035250 1.8227046
## N4 N5 O1 O2 O3 O4
## mean 3.1856006 2.9696860 4.8160547 4.286786 4.4383117 4.892319
## sd 1.5696851 1.6186474 1.1295303 1.565152 1.2209011 1.221250
## skewness 0.1969966 0.3744599 -0.8973669 -0.585679 -0.7730516 -1.218247
## kurtosis 1.9090309 1.9401121 3.4277033 2.188889 3.3043641 4.082686
## O5
## mean 2.4895683
## sd 1.3279590
## skewness 0.7384818
## kurtosis 2.7630094
rslt1 <- as.data.frame(t(bfidta_des))
rslt1 |> knitr::kable()
| mean | sd | skewness | kurtosis | |
|---|---|---|---|---|
| A1 | 4.586566 | 1.407737 | -0.8254883 | 2.694296 |
| A2 | 4.802380 | 1.172020 | -1.1248938 | 4.057765 |
| A3 | 4.603821 | 1.301834 | -0.9989970 | 3.444524 |
| A4 | 4.699748 | 1.479633 | -1.0314991 | 3.042640 |
| A5 | 4.560345 | 1.258512 | -0.8476900 | 3.161176 |
| C1 | 4.502339 | 1.241346 | -0.8551631 | 3.306809 |
| C2 | 4.369957 | 1.318347 | -0.7422207 | 2.865624 |
| C3 | 4.303957 | 1.288552 | -0.6918287 | 2.869733 |
| C4 | 4.446647 | 1.375118 | -0.5964955 | 2.380297 |
| C5 | 3.703305 | 1.628542 | -0.0662028 | 1.784612 |
| E1 | 4.025567 | 1.631506 | -0.3736569 | 1.909039 |
| E2 | 3.858118 | 1.605210 | -0.2209396 | 1.852693 |
| E3 | 4.000721 | 1.352719 | -0.4706335 | 2.536715 |
| E4 | 4.422429 | 1.457517 | -0.8241831 | 2.697708 |
| E5 | 4.416337 | 1.334768 | -0.7774860 | 2.908401 |
| N1 | 2.929086 | 1.570917 | 0.3714298 | 1.988572 |
| N2 | 3.507737 | 1.525944 | -0.0769852 | 1.950352 |
| N3 | 3.216565 | 1.602902 | 0.1506797 | 1.822705 |
| N4 | 3.185601 | 1.569685 | 0.1969966 | 1.909031 |
| N5 | 2.969686 | 1.618647 | 0.3744599 | 1.940112 |
| O1 | 4.816055 | 1.129530 | -0.8973669 | 3.427703 |
| O2 | 4.286786 | 1.565152 | -0.5856790 | 2.188889 |
| O3 | 4.438312 | 1.220901 | -0.7730516 | 3.304364 |
| O4 | 4.892319 | 1.221250 | -1.2182471 | 4.082686 |
| O5 | 2.489568 | 1.327959 | 0.7384818 | 2.763009 |
#melt from reshape package makes data frame a longer data
bfidtal_des <- melt(bfidta_des)
names(bfidtal_des)[1:2] <-c("moments", "items")
head(bfidtal_des)
## moments items value
## 1 mean A1 4.5865661
## 2 sd A1 1.4077372
## 3 skewness A1 -0.8254883
## 4 kurtosis A1 2.6942957
## 5 mean A2 4.8023801
## 6 sd A2 1.1720199
#only choose mean inside moments of the data
ggplot(data = subset(bfidtal_des, moments == "mean"),
aes(x = reorder(items, value, max), y = value, group = moments)) +
geom_point(size = 3) +
geom_hline(yintercept = mean(t(bfidta_des["mean",])) +
c(-1.5, 0, 1.5) * sd(t(bfidta_des["mean", ])), linetype = "dashed") +
coord_flip() +
labs(x = "items", y = "mean") +
theme_bw()
ggplot(data = subset(bfidtal_des, moments == "sd"),
aes(x = reorder(items, value, max), y = value, group = moments)) +
geom_point(size = 2) +
geom_hline(yintercept = mean(t(bfidta_des["sd",])) +
c(-1.5, 0, 1.5) * sd(t(bfidta_des["sd", ])), linetype = "dashed") +
coord_flip() +
labs(x = "items", y = "sd") +
theme_bw()
ggplot(data = subset(bfidtal_des, moments == "sd"),
aes(x = reorder(items, value, max), y = value, group = moments)) +
geom_point(size = 3) +
geom_hline(yintercept = mean(t(bfidta_des["sd",])) +
c(-1.5, 0, 1.5) * sd(t(bfidta_des["sd", ])), linetype = "dashed") +
coord_flip() +
labs(x = "items", y = "sd") +
theme_bw()
ggplot(data = subset(bfidtal_des, moments == "kurtosis"),
aes(x = reorder(items, value, max), y = value, group = moments)) +
geom_point(size = 3) +
geom_hline(yintercept = mean(t(bfidta_des["kurtosis",])) +
c(-1.5, 0, 1.5) * sd(t(bfidta_des["kurtosis", ])), linetype = "dashed") +
coord_flip() +
labs(x = "items", y = "kurtosis") +
theme_bw()
Kurtosis of A2 and O4 is more centralized
rm.bfidta$tot <- apply(rm.bfidta, 1 , sum)
rm.bfidta$grp <- NA
rm.bfidta$grp[rank(rm.bfidta$tot) < 301*.27] <- "L"
rm.bfidta$grp[rank(rm.bfidta$tot) > 301*.73] <- "H"
rm.bfidta$grp <- factor(rm.bfidta$grp)
#count the mean of High and Low groups
rm.bfidtam <- aggregate(rm.bfidta, by=list(rm.bfidta$grp), mean)
## Warning in mean.default(X[[i]], ...): 引數不是數值也不是邏輯值:回覆 NA
## Warning in mean.default(X[[i]], ...): 引數不是數值也不是邏輯值:回覆 NA
print(rm.bfidtam)
## Group.1 A1 A2 A3 A4 A5 C1 C2
## 1 H 4.660642 4.928152 4.76638 4.838681 4.678717 4.625847 4.487573
## 2 L 3.850000 2.787500 2.36250 2.662500 2.750000 3.187500 2.862500
## C3 C4 C5 E1 E2 E3 E4 E5
## 1 4.388613 4.542702 3.799819 4.161771 3.988703 4.121554 4.587438 4.549028
## 2 3.162500 3.387500 2.537500 2.325000 1.987500 2.000000 2.125000 2.262500
## N1 N2 N3 N4 N5 O1 O2 O3
## 1 3.002259 3.562585 3.281518 3.186624 3.028468 4.881609 4.351107 4.55897
## 2 2.275000 2.912500 2.275000 3.337500 2.175000 3.800000 4.100000 2.95000
## O4 O5 tot grp
## 1 4.945775 2.456846 104.3814 NA
## 2 4.850000 2.462500 71.3875 NA
#delete columm 1
rm.bfidtame <- t(rm.bfidtam[, -1])
rm.bfidtame
## [,1] [,2]
## A1 4.660642 3.8500
## A2 4.928152 2.7875
## A3 4.766380 2.3625
## A4 4.838681 2.6625
## A5 4.678717 2.7500
## C1 4.625847 3.1875
## C2 4.487573 2.8625
## C3 4.388613 3.1625
## C4 4.542702 3.3875
## C5 3.799819 2.5375
## E1 4.161771 2.3250
## E2 3.988703 1.9875
## E3 4.121554 2.0000
## E4 4.587438 2.1250
## E5 4.549028 2.2625
## N1 3.002259 2.2750
## N2 3.562585 2.9125
## N3 3.281518 2.2750
## N4 3.186624 3.3375
## N5 3.028468 2.1750
## O1 4.881609 3.8000
## O2 4.351107 4.1000
## O3 4.558970 2.9500
## O4 4.945775 4.8500
## O5 2.456846 2.4625
## tot 104.381383 71.3875
## grp NA NA
item_t <- sapply(rm.bfidta[,1:25], function(x) t.test(x ~ rm.bfidta$grp)$statistic)
print(item_t)
## A1.t A2.t A3.t A4.t A5.t C1.t
## 4.47219109 13.77314182 15.37932397 12.98970378 11.93670271 8.47491628
## C2.t C3.t C4.t C5.t E1.t E2.t
## 8.75193200 6.87082229 6.88104138 7.25616161 9.53340031 12.82266313
## E3.t E4.t E5.t N1.t N2.t N3.t
## 15.74856522 17.30307699 16.20374213 4.39490628 3.97403001 6.55019502
## N4.t N5.t O1.t O2.t O3.t O4.t
## -0.77605870 5.27961321 5.93331126 1.23353410 8.88295022 0.64780939
## O5.t
## -0.03500555