This may take for a few minutes.
if(!require(rstatix)){install.packages("rstatix")}
if(!require(dplyr)){install.packages("dplyr")}
if(!require(lsr)){install.packages("lsr")}
if(!require(ggplot2)){install.packages("ggplot2")}
if(!require(coin)){install.packages("coin")}
library(readr) # load csv file
library(dplyr) # data_frame, %>%, filter, summarise, group_by
library(effectsize)
library(tidyverse) # for data manipulation and visualization
library(rstatix) # provides pipe-friendly R functions for easy statistical analyses.
library(ggplot2) # ggplot, stat_…, geom_…, etc
We can use google spreadsheet to preprocess raw data first, and then load the csv file directly from our computer.
NOTE: Data preprocessing includes checking the reversed items in the survey, calculating averaged score for a specific concept/dimension, removing invalid data.
Let’s use the example data I created here first while making sense of the data analysis process. You can find how to load csv file in code cell below.
#== create a dummy dataset as an example
mydata <- data.frame(score = c(45 ,85, 66, 78, 92, 94, 91, 85, 62, 60,
40, 56, 70, 80, 90, 88, 95, 90, 45, 55,
84, 88, 88, 90, 92, 93, 91, 85, 80, 73,
97, 100, 93, 91, 90, 87, 94, 83, 92, 93,
45 ,85, 66, 78, 92, 94, 91, 85, 62, 60,
97, 100, 93, 91, 90, 87, 94, 83, 92, 93),
condition = c(rep('condition1', 20), rep('condition2', 20), rep('condition3', 20)))
##== view our data
#mydata
### Load csv file ###
#== If you'd like to load data from your PC, use the following code.
#== put the csv in the same folder with your Rmd file, otherwise, the file path needs to be changed accordingly.
#mydata <- read_csv("myFileName.csv")
#View(mydata)
### Convert numeric variable to factor variable (skip this when the independent variable has already converted) ###
mydata$condition <- as.factor(mydata$condition)
### show descriptive stat ###
#summary(mydata)
#==find sample size, mean, and standard deviation for each group
mydata %>%
group_by(condition) %>%
summarise(
count = n(),
mean = mean(score),
sd = sd(score)
)
## # A tibble: 3 × 4
## condition count mean sd
## <fct> <int> <dbl> <dbl>
## 1 condition1 20 73.4 18.3
## 2 condition2 20 89.2 6.09
## 3 condition3 20 83.9 14.5
3.1 Normality check
3.2 1-way ANOVA or Kruskal–Wallis H test
3.3 Post-hoc comparison and effect size
#### 3.1 perform shapiro-wilk test
normality_check <- shapiro.test(mydata$score)
if (normality_check$p.value > 0.05){
print("The data comes from a population that is normally distributed. Please check the result of one-way ANOVA test.")
} else {
print("The data is not normally distribued. Please check the result of Kruskal–Wallis H test.")
}
## [1] "The data is not normally distribued. Please check the result of Kruskal–Wallis H test."
### 3.2 1-way ANOVA ###
anova_result <- aov(score ~ condition, data = mydata)
summary(anova_result)
## Df Sum Sq Mean Sq F value Pr(>F)
## condition 2 2604 1302.0 6.688 0.00246 **
## Residuals 57 11098 194.7
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
### 3.3 ANOVA result with effect size ###
eta_squared(anova_result)
## For one-way between subjects designs, partial eta squared is equivalent to eta squared.
## Returning eta squared.
## # Effect Size for ANOVA
##
## Parameter | Eta2 | 95% CI
## -------------------------------
## condition | 0.19 | [0.05, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
### 3.3 Post-hoc comparison ###
#perform the Bonferroni post-hoc method
pairwise.t.test(mydata$score, mydata$condition, p.adj='bonferroni')
##
## Pairwise comparisons using t tests with pooled SD
##
## data: mydata$score and mydata$condition
##
## condition1 condition2
## condition2 0.0021 -
## condition3 0.0604 0.7040
##
## P value adjustment method: bonferroni
### 3.2 Kruskal-Wallis Test ###
kruskal.test(score ~ condition, data = mydata)
##
## Kruskal-Wallis rank sum test
##
## data: score by condition
## Kruskal-Wallis chi-squared = 8.3215, df = 2, p-value = 0.0156
### 3.3 Kruskal-Wallis Test with effect size ###
kruskal_effsize(score ~ condition, data = mydata)
## # A tibble: 1 × 5
## .y. n effsize method magnitude
## * <chr> <int> <dbl> <chr> <ord>
## 1 score 60 0.111 eta2[H] moderate
### 3.3 Post-hoc comparison for Kruskal-Wallis Test ###
pairwise.wilcox.test(mydata$score, mydata$condition, p.adjust.method = "BH") # change different p-value corrections: p.adjust.method = "bonferroni"
## Warning in wilcox.test.default(xi, xj, paired = paired, ...): cannot compute
## exact p-value with ties
## Warning in wilcox.test.default(xi, xj, paired = paired, ...): cannot compute
## exact p-value with ties
## Warning in wilcox.test.default(xi, xj, paired = paired, ...): cannot compute
## exact p-value with ties
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: mydata$score and mydata$condition
##
## condition1 condition2
## condition2 0.015 -
## condition3 0.065 0.578
##
## P value adjustment method: BH
### Boxplot ###
ggplot(mydata, aes(x = condition, y = score)) +
geom_boxplot(width=0.3) +
stat_summary(fun = mean, geom = "point", col = "black") + # Add points to plot
stat_summary(fun = mean, geom = "text", col = "black", size = 2.7, # Add text to plot
vjust = 3, aes(label = paste("Mean:", round(..y.., digits = 2)))) +
xlab("Conditions")+
ylab("Score")+
#theme(legend.position="top")+ # position of the legend
#scale_fill_brewer(palette="Dark2") # set color codes
ylim(0, 100)
#ggsave("yourFileName.png", dpi=300)
### Bar chart
barchart_fig <- ggplot(mydata, aes(x = condition, y = score)) +
geom_bar(position = "dodge",
stat = "summary",
fun = "mean",
width=0.3,
color="black",
fill="white")+
stat_summary(fun = mean, geom = "text", col = "black", size = 3, # Add text to plot
vjust = -0.5, aes(label = paste("Mean:", round(..y.., digits = 2)))) +
xlab("Conditions")+
ylab("Score")
barchart_fig