library(dplyr)
library(ggplot2)
library(ggpubr)
library(purrr)
library(car)
library(reshape)
library(ggforce)
library(knitr)
library(PairedData)
knitr::opts_knit$set(root.dir = ".")
a = Selecting a suitable topic area for study b = Developing a logical rationale for a particular research idea c = Articulating a clear purpose for the research
scholar_evulations <- read.delim("scholar_eval_2022.txt")
print(scholar_evulations)
## ID group a b c
## 1 1 before 0 0 0
## 2 2 before 3 2 3
## 3 3 before 2 2 5
## 4 4 before 1 2 2
## 5 5 before 3 4 4
## 6 6 before 4 3 3
## 7 1 after 4 4 4
## 8 2 after 5 6 5
## 9 3 after 5 5 6
## 10 4 after 5 5 5
## 11 5 after 6 5 6
## 12 6 after 6 5 6
# get data summary information for the topic
group_by(scholar_evulations, group) %>%
summarise(
count = n(),
median = median(a, na.rm = TRUE),
IQR = IQR(a, na.rm = TRUE)
)
## # A tibble: 2 × 4
## group count median IQR
## <chr> <int> <dbl> <dbl>
## 1 after 6 5 0.75
## 2 before 6 2.5 1.75
# first subset the data to examine the before and after values separately
before <- subset(scholar_evulations, group == "before")
after <- subset(scholar_evulations, group == "after")
#QQplot to visually investigate normality
par(mfrow=c(1,2))
# before
qqnorm(before$a)
qqline(before$a, col=c("red"))
# after
qqnorm(after$a)
qqline(after$a, col=c("red"))
# formally testing normality via shapiro-wilks test
d <- with(scholar_evulations, a[group == "before"] - a[group == "after"])
shapiro.test(d)
##
## Shapiro-Wilk normality test
##
## data: d
## W = 0.85319, p-value = 0.167
# formally testing for equal variance
# first calculate variance and sd
before_var <- var(before$a) # variance
sqrt(before_var) # standard deviation
## [1] 1.47196
after_var <- var(after$a)
sqrt(after_var)
## [1] 0.7527727
# Levene Test - homogeneity of variance across groups.
# homogeneity assumption: the population variances of the
# dependent variable must equal for all groups.
# Typically can ignore this assumption if roughly equal sample sizes for each group.
levTest <- leveneTest(scholar_evulations$a ~ scholar_evulations$group)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
levTest
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 2.7586 0.1277
## 10
# the assumptions of t-test have been met.
# Perform t-test
t.test(scholar_evulations$a ~ scholar_evulations$group,
paired = TRUE, alternative = "greater")
##
## Paired t-test
##
## data: scholar_evulations$a by scholar_evulations$group
## t = 8.2158, df = 5, p-value = 0.0002175
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 2.264208 Inf
## sample estimates:
## mean of the differences
## 3
# repeat for non-parametric test
wilcox.test(scholar_evulations$a ~ scholar_evulations$group,
paired = TRUE, alternative = "greater")
## Warning in wilcox.test.default(x = c(4L, 5L, 5L, 5L, 6L, 6L), y = c(0L, : cannot
## compute exact p-value with ties
##
## Wilcoxon signed rank test with continuity correction
##
## data: scholar_evulations$a by scholar_evulations$group
## V = 21, p-value = 0.01725
## alternative hypothesis: true location shift is greater than 0
# plot results
ggboxplot(scholar_evulations, x = "group", y = "a",
color = "group", palette = "jco",
order = c("before", "after"),
ylab = "confidence value",
xlab = "Selecting a suitable topic area for study") +
geom_point(colour="gray", size=2.5, alpha=0.8) +
geom_line(aes(x=as.numeric(group), y=as.numeric(a), group=ID),
colour="gray", linetype="11", size = 1) +
stat_compare_means(method = "t.test", paired = TRUE,
method.args = list(alternative = "greater"), label= "p.format")
# first subset the data to examine the before and after values separately
before <- subset(scholar_evulations, group == "before")
after <- subset(scholar_evulations, group == "after")
#QQplot to visually investigate normality
par(mfrow=c(1,2))
# before
qqnorm(before$b)
qqline(before$b, col=c("red"))
# after
qqnorm(after$b)
qqline(after$b, col=c("red"))
# formally testing normality via shapiro-wilks test
d <- with(scholar_evulations, b[group == "before"] - b[group == "after"])
shapiro.test(d)
##
## Shapiro-Wilk normality test
##
## data: d
## W = 0.90758, p-value = 0.4207
# formally testing for equal variance
# first calculate variance and sd
before_var <- var(before$b) # variance
sqrt(before_var) # standard deviation
## [1] 1.32916
after_var <- var(after$b)
sqrt(after_var)
## [1] 0.6324555
# Levene Test - homogeneity of variance across groups.
# homogeneity assumption: the population variances of the
# dependent variable must equal for all groups.
# Typically can ignore this assumption if roughly equal sample sizes for each group.
levTest <- leveneTest(scholar_evulations$b ~ scholar_evulations$group)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
levTest
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 1.2162 0.2959
## 10
# the assumptions of t-test have been met.
# Perform t-test
t.test(scholar_evulations$b ~ scholar_evulations$group,
paired = TRUE, alternative = "greater")
##
## Paired t-test
##
## data: scholar_evulations$b by scholar_evulations$group
## t = 5.9367, df = 5, p-value = 0.0009679
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 1.87163 Inf
## sample estimates:
## mean of the differences
## 2.833333
# repeat for non-parametric test
wilcox.test(scholar_evulations$b ~ scholar_evulations$group,
paired = TRUE,
alternative = "greater")
## Warning in wilcox.test.default(x = c(4L, 6L, 5L, 5L, 5L, 5L), y = c(0L, : cannot
## compute exact p-value with ties
##
## Wilcoxon signed rank test with continuity correction
##
## data: scholar_evulations$b by scholar_evulations$group
## V = 21, p-value = 0.01751
## alternative hypothesis: true location shift is greater than 0
# plot results
ggboxplot(scholar_evulations, x = "group", y = "b",
color = "group", palette = "jco",
order = c("before", "after"),
ylab = "confidence value",
xlab = "Developing a logical rationale for a particular research idea") +
geom_point(colour="gray", size=2.5, alpha=0.8) +
geom_line(aes(x=as.numeric(group), y=as.numeric(b),
group=ID), colour="gray", linetype="11", size = 1) +
stat_compare_means(method = "t.test", paired = TRUE,
method.args = list(alternative = "greater"), label= "p.format")
# get data summary information for the topic
group_by(scholar_evulations, group) %>%
summarise(
count = n(),
median = median(c, na.rm = TRUE),
IQR = IQR(c, na.rm = TRUE)
)
## # A tibble: 2 × 4
## group count median IQR
## <chr> <int> <dbl> <dbl>
## 1 after 6 5.5 1
## 2 before 6 3 1.5
# first subset the data to examine the before and after values separately
before <- subset(scholar_evulations, group == "before")
after <- subset(scholar_evulations, group == "after")
#QQplot to visually investigate normality
par(mfrow=c(1,2))
# before
qqnorm(before$c)
qqline(before$c, col=c("red"))
# after
qqnorm(after$c)
qqline(after$c, col=c("red"))
# formally testing normality via shapiro-wilks test
d <- with(scholar_evulations, c[group == "before"] - c[group == "after"])
shapiro.test(d)
##
## Shapiro-Wilk normality test
##
## data: d
## W = 0.96004, p-value = 0.8201
# formally testing for equal variance
# first calculate variance and sd
before_var <- var(before$c) # variance
sqrt(before_var) # standard deviation
## [1] 1.722401
after_var <- var(after$c)
sqrt(after_var)
## [1] 0.8164966
# Levene Test - homogeneity of variance across groups.
# homogeneity assumption: the population variances of the
# dependent variable must equal for all groups.
# Typically can ignore this assumption if roughly equal sample sizes for each group.
leveneTest(scholar_evulations$b ~ scholar_evulations$group)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 1.2162 0.2959
## 10
# the assumptions of t-test have been met.
# Perform t-test
t.test(scholar_evulations$c ~ scholar_evulations$group, paired = TRUE,
alternative = "greater")
##
## Paired t-test
##
## data: scholar_evulations$c by scholar_evulations$group
## t = 5.8387, df = 5, p-value = 0.001042
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 1.637208 Inf
## sample estimates:
## mean of the differences
## 2.5
# repeat for non-parametric test
wilcox.test(scholar_evulations$c ~ scholar_evulations$group, paired = TRUE,
alternative = "greater")
## Warning in wilcox.test.default(x = c(4L, 5L, 6L, 5L, 6L, 6L), y = c(0L, : cannot
## compute exact p-value with ties
##
## Wilcoxon signed rank test with continuity correction
##
## data: scholar_evulations$c by scholar_evulations$group
## V = 21, p-value = 0.01751
## alternative hypothesis: true location shift is greater than 0
# plot results
ggboxplot(scholar_evulations, x = "group", y = "c",
color = "group", palette = "jco",
order = c("before", "after"),
ylab = "confidence value", xlab = "Developing a logical rationale for a particular research idea") +
geom_point(colour="gray", size=2.5, alpha=0.8) +
geom_line(aes(x=as.numeric(group), y=as.numeric(c), group=ID), colour="gray", linetype="11", size = 1) +
stat_compare_means(method = "t.test", paired = TRUE, method.args = list(alternative = "greater"), label= "p.format")
melt_scholar_eval <- melt(scholar_evulations, id.vars = c("ID", "group"),
measure.vars = c("a", "b", "c"))
melt_scholar_eval
## ID group variable value
## 1 1 before a 0
## 2 2 before a 3
## 3 3 before a 2
## 4 4 before a 1
## 5 5 before a 3
## 6 6 before a 4
## 7 1 after a 4
## 8 2 after a 5
## 9 3 after a 5
## 10 4 after a 5
## 11 5 after a 6
## 12 6 after a 6
## 13 1 before b 0
## 14 2 before b 2
## 15 3 before b 2
## 16 4 before b 2
## 17 5 before b 4
## 18 6 before b 3
## 19 1 after b 4
## 20 2 after b 6
## 21 3 after b 5
## 22 4 after b 5
## 23 5 after b 5
## 24 6 after b 5
## 25 1 before c 0
## 26 2 before c 3
## 27 3 before c 5
## 28 4 before c 2
## 29 5 before c 4
## 30 6 before c 3
## 31 1 after c 4
## 32 2 after c 5
## 33 3 after c 6
## 34 4 after c 5
## 35 5 after c 6
## 36 6 after c 6
eval_names <- c(`a` = "Selecting a suitable
topic area for study",
`b` = "Developing a logical
rationale for a
particular research idea",
`c` = "Articulating a clear
purpose for the research")
# Box plot facetted by topic
ggboxplot(melt_scholar_eval, x = "group", y = "value",
color = "group", palette = "jco",
facet.by = "variable") +
facet_wrap(~variable, labeller = as_labeller(eval_names)) +
geom_point(colour="gray", size=2.5, alpha=0.8) +
geom_line(aes(x=as.numeric(group), group=ID),
colour="gray", linetype="11", size = 1) +
stat_compare_means(method = "t.test", paired = TRUE,
method.args = list(alternative = "greater"), label= "p.format")