#############independent samples
dat <- data.frame(
Sex = as.factor(c(rep("Girl", 12), rep("Boy", 12))),
Grade = c(
19, 18, 9, 17, 8, 7, 16, 19, 20, 9, 11, 18,
16, 5, 15, 2, 14, 15, 4, 7, 15, 6, 7, 14
)
)
dat
## Sex Grade
## 1 Girl 19
## 2 Girl 18
## 3 Girl 9
## 4 Girl 17
## 5 Girl 8
## 6 Girl 7
## 7 Girl 16
## 8 Girl 19
## 9 Girl 20
## 10 Girl 9
## 11 Girl 11
## 12 Girl 18
## 13 Boy 16
## 14 Boy 5
## 15 Boy 15
## 16 Boy 2
## 17 Boy 14
## 18 Boy 15
## 19 Boy 4
## 20 Boy 7
## 21 Boy 15
## 22 Boy 6
## 23 Boy 7
## 24 Boy 14
table(dat$Sex)
##
## Boy Girl
## 12 12
library(ggplot2)
## Warning: 程辑包'ggplot2'是用R版本4.2.3 来建造的
ggplot(dat) +
aes(x = Sex, y = Grade) +
geom_boxplot(fill = "#0c4c8a") +
theme_minimal()

hist(subset(dat, Sex == "Girl")$Grade,
main = "Grades for girls",
xlab = "Grades"
)

hist(subset(dat, Sex == "Boy")$Grade,
main = "Grades for boys",
xlab = "Grades"
)

shapiro.test(subset(dat, Sex == "Girl")$Grade)
##
## Shapiro-Wilk normality test
##
## data: subset(dat, Sex == "Girl")$Grade
## W = 0.84548, p-value = 0.0323
shapiro.test(subset(dat, Sex == "Boy")$Grade)
##
## Shapiro-Wilk normality test
##
## data: subset(dat, Sex == "Boy")$Grade
## W = 0.84313, p-value = 0.03023
###The p-value of the Shapiro-Wilk test confirms that the data does not follow a normal distribution
test <- wilcox.test(dat$Grade ~ dat$Sex)
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): 无法精確計
## 算带连结的p值
test
##
## Wilcoxon rank sum test with continuity correction
##
## data: dat$Grade by dat$Sex
## W = 31.5, p-value = 0.02056
## alternative hypothesis: true location shift is not equal to 0
test <- wilcox.test(dat$Grade ~ dat$Sex,
alternative = "less"
)
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): 无法精確計
## 算带连结的p值
test
##
## Wilcoxon rank sum test with continuity correction
##
## data: dat$Grade by dat$Sex
## W = 31.5, p-value = 0.01028
## alternative hypothesis: true location shift is less than 0
#Conclusion: There is a significant difference between the scores of girls and boys
########non-independent samples
dat <- data.frame(
Beginning = c(16, 5, 15, 2, 14, 15, 4, 7, 15, 6, 7, 14),
End = c(19, 18, 9, 17, 8, 7, 16, 19, 20, 9, 11, 18)
)
#Convert data to its format
dat2 <- data.frame(
Time = c(rep("Before", 12), rep("After", 12)),
Grade = c(dat$Beginning, dat$End)
)
dat2
## Time Grade
## 1 Before 16
## 2 Before 5
## 3 Before 15
## 4 Before 2
## 5 Before 14
## 6 Before 15
## 7 Before 4
## 8 Before 7
## 9 Before 15
## 10 Before 6
## 11 Before 7
## 12 Before 14
## 13 After 19
## 14 After 18
## 15 After 9
## 16 After 17
## 17 After 8
## 18 After 7
## 19 After 16
## 20 After 19
## 21 After 20
## 22 After 9
## 23 After 11
## 24 After 18
# Reordering dat2$Time
dat2$Time <- factor(dat2$Time,
levels = c("Before", "After")
)
ggplot(dat2) +
aes(x = Time, y = Grade) +
geom_boxplot(fill = "#0c4c8a") +
theme_minimal()

test <- wilcox.test(dat2$Grade ~ dat2$Time,
paired = TRUE)
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): 无法精確計
## 算带连结的p值
test
##
## Wilcoxon signed rank test with continuity correction
##
## data: dat2$Grade by dat2$Time
## V = 21, p-value = 0.1692
## alternative hypothesis: true location shift is not equal to 0
#ref https://www.jianshu.com/p/610dec2cb55e