#############independent samples
dat <- data.frame(
  Sex = as.factor(c(rep("Girl", 12), rep("Boy", 12))),
  Grade = c(
    19, 18, 9, 17, 8, 7, 16, 19, 20, 9, 11, 18,
    16, 5, 15, 2, 14, 15, 4, 7, 15, 6, 7, 14
  )
)
dat
##     Sex Grade
## 1  Girl    19
## 2  Girl    18
## 3  Girl     9
## 4  Girl    17
## 5  Girl     8
## 6  Girl     7
## 7  Girl    16
## 8  Girl    19
## 9  Girl    20
## 10 Girl     9
## 11 Girl    11
## 12 Girl    18
## 13  Boy    16
## 14  Boy     5
## 15  Boy    15
## 16  Boy     2
## 17  Boy    14
## 18  Boy    15
## 19  Boy     4
## 20  Boy     7
## 21  Boy    15
## 22  Boy     6
## 23  Boy     7
## 24  Boy    14
table(dat$Sex)
## 
##  Boy Girl 
##   12   12
library(ggplot2)
## Warning: 程辑包'ggplot2'是用R版本4.2.3 来建造的
ggplot(dat) +
  aes(x = Sex, y = Grade) +
  geom_boxplot(fill = "#0c4c8a") +
  theme_minimal()

hist(subset(dat, Sex == "Girl")$Grade,
     main = "Grades for girls",
     xlab = "Grades"
)

hist(subset(dat, Sex == "Boy")$Grade,
     main = "Grades for boys",
     xlab = "Grades"
)

shapiro.test(subset(dat, Sex == "Girl")$Grade)
## 
##  Shapiro-Wilk normality test
## 
## data:  subset(dat, Sex == "Girl")$Grade
## W = 0.84548, p-value = 0.0323
shapiro.test(subset(dat, Sex == "Boy")$Grade)
## 
##  Shapiro-Wilk normality test
## 
## data:  subset(dat, Sex == "Boy")$Grade
## W = 0.84313, p-value = 0.03023
###The p-value of the Shapiro-Wilk test confirms that the data does not follow a normal distribution
test <- wilcox.test(dat$Grade ~ dat$Sex)
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): 无法精確計
## 算带连结的p值
test
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  dat$Grade by dat$Sex
## W = 31.5, p-value = 0.02056
## alternative hypothesis: true location shift is not equal to 0
test <- wilcox.test(dat$Grade ~ dat$Sex,
                    alternative = "less"
)
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): 无法精確計
## 算带连结的p值
test
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  dat$Grade by dat$Sex
## W = 31.5, p-value = 0.01028
## alternative hypothesis: true location shift is less than 0
#Conclusion: There is a significant difference between the scores of girls and boys

########non-independent samples
dat <- data.frame(
  Beginning = c(16, 5, 15, 2, 14, 15, 4, 7, 15, 6, 7, 14),
  End = c(19, 18, 9, 17, 8, 7, 16, 19, 20, 9, 11, 18)
)
#Convert data to its format
dat2 <- data.frame(
  Time = c(rep("Before", 12), rep("After", 12)),
  Grade = c(dat$Beginning, dat$End)
)
dat2
##      Time Grade
## 1  Before    16
## 2  Before     5
## 3  Before    15
## 4  Before     2
## 5  Before    14
## 6  Before    15
## 7  Before     4
## 8  Before     7
## 9  Before    15
## 10 Before     6
## 11 Before     7
## 12 Before    14
## 13  After    19
## 14  After    18
## 15  After     9
## 16  After    17
## 17  After     8
## 18  After     7
## 19  After    16
## 20  After    19
## 21  After    20
## 22  After     9
## 23  After    11
## 24  After    18
# Reordering dat2$Time
dat2$Time <- factor(dat2$Time,
                    levels = c("Before", "After")
)
ggplot(dat2) +
  aes(x = Time, y = Grade) +
  geom_boxplot(fill = "#0c4c8a") +
  theme_minimal()

test <- wilcox.test(dat2$Grade ~ dat2$Time,
                    paired = TRUE)
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): 无法精確計
## 算带连结的p值
test
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  dat2$Grade by dat2$Time
## V = 21, p-value = 0.1692
## alternative hypothesis: true location shift is not equal to 0
#ref https://www.jianshu.com/p/610dec2cb55e