load libraries

library(dplyr)
library(ggplot2)
library(ggpubr)
library(purrr)
library(car)
library(reshape)
library(ggforce)
library(knitr)
library(PairedData)

set working directory

knitr::opts_knit$set(root.dir = ".")

read in evaluation data

a = Selecting a suitable topic area for study b = Developing a logical rationale for a particular research idea c = Articulating a clear purpose for the research

scholar_evulations <- read.delim("scholar_eval_2022.txt")
print(scholar_evulations)
##    ID  group a b c
## 1   1 before 0 0 0
## 2   2 before 3 2 3
## 3   3 before 2 2 5
## 4   4 before 1 2 2
## 5   5 before 3 4 4
## 6   6 before 4 3 3
## 7   1  after 4 4 4
## 8   2  after 5 6 5
## 9   3  after 5 5 6
## 10  4  after 5 5 5
## 11  5  after 6 5 6
## 12  6  after 6 5 6

a) Selecting a suitable topic area for study

# get data summary information for the topic 
group_by(scholar_evulations, group) %>%
  summarise(
    count = n(),
    median = median(a, na.rm = TRUE),
    IQR = IQR(a, na.rm = TRUE)
  )
## # A tibble: 2 × 4
##   group  count median   IQR
##   <chr>  <int>  <dbl> <dbl>
## 1 after      6    5    0.75
## 2 before     6    2.5  1.75
# first subset the data to examine the before and after values separately
before <- subset(scholar_evulations, group == "before")
after <- subset(scholar_evulations, group == "after")

#QQplot to visually investigate normality 
par(mfrow=c(1,2))
# before
qqnorm(before$a) 
qqline(before$a, col=c("red"))
# after
qqnorm(after$a) 
qqline(after$a, col=c("red"))

# formally testing normality via shapiro-wilks test
d <- with(scholar_evulations, a[group == "before"] - a[group == "after"])
shapiro.test(d)
## 
##  Shapiro-Wilk normality test
## 
## data:  d
## W = 0.85319, p-value = 0.167
# formally testing for equal variance
# first calculate variance and sd  
before_var <- var(before$a) # variance
sqrt(before_var) # standard deviation
## [1] 1.47196
after_var <- var(after$a)
sqrt(after_var) 
## [1] 0.7527727
# Levene Test - homogeneity of variance across groups.
# homogeneity assumption: the population variances of the 
# dependent variable must equal for all groups.
# Typically can ignore this assumption if roughly equal sample sizes for each group. 
levTest <- leveneTest(scholar_evulations$a ~ scholar_evulations$group)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
levTest
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  1  2.7586 0.1277
##       10
# the assumptions of t-test have been met. 
# Perform t-test
t.test(scholar_evulations$a ~ scholar_evulations$group, 
       paired = TRUE, alternative = "greater")
## 
##  Paired t-test
## 
## data:  scholar_evulations$a by scholar_evulations$group
## t = 8.2158, df = 5, p-value = 0.0002175
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  2.264208      Inf
## sample estimates:
## mean of the differences 
##                       3
# repeat for non-parametric test
wilcox.test(scholar_evulations$a ~ scholar_evulations$group, 
            paired = TRUE, alternative = "greater")
## Warning in wilcox.test.default(x = c(4L, 5L, 5L, 5L, 6L, 6L), y = c(0L, : cannot
## compute exact p-value with ties
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  scholar_evulations$a by scholar_evulations$group
## V = 21, p-value = 0.01725
## alternative hypothesis: true location shift is greater than 0
# plot results 
ggboxplot(scholar_evulations, x = "group", y = "a", 
          color = "group", palette = "jco",
          order = c("before", "after"),
          ylab = "confidence value", 
          xlab = "Selecting a suitable topic area for study") +
          geom_point(colour="gray", size=2.5, alpha=0.8)  + 
          geom_line(aes(x=as.numeric(group), y=as.numeric(a), group=ID), 
                    colour="gray", linetype="11", size = 1) +
        stat_compare_means(method = "t.test", paired = TRUE, 
                           method.args = list(alternative = "greater"), label= "p.format")

b) Developing a logical rationale for a particular research idea

# first subset the data to examine the before and after values separately
before <- subset(scholar_evulations, group == "before")
after <- subset(scholar_evulations, group == "after")

#QQplot to visually investigate normality 
par(mfrow=c(1,2))
# before
qqnorm(before$b) 
qqline(before$b, col=c("red"))
# after
qqnorm(after$b) 
qqline(after$b, col=c("red"))

# formally testing normality via shapiro-wilks test
d <- with(scholar_evulations, b[group == "before"] - b[group == "after"])
shapiro.test(d)
## 
##  Shapiro-Wilk normality test
## 
## data:  d
## W = 0.90758, p-value = 0.4207
# formally testing for equal variance
# first calculate variance and sd  
before_var <- var(before$b) # variance
sqrt(before_var) # standard deviation
## [1] 1.32916
after_var <- var(after$b)
sqrt(after_var) 
## [1] 0.6324555
# Levene Test - homogeneity of variance across groups.
# homogeneity assumption: the population variances of the 
# dependent variable must equal for all groups.
# Typically can ignore this assumption if roughly equal sample sizes for each group. 
levTest <- leveneTest(scholar_evulations$b ~ scholar_evulations$group)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
levTest
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  1  1.2162 0.2959
##       10
# the assumptions of t-test have been met. 
# Perform t-test
t.test(scholar_evulations$b ~ scholar_evulations$group, 
       paired = TRUE, alternative = "greater")
## 
##  Paired t-test
## 
## data:  scholar_evulations$b by scholar_evulations$group
## t = 5.9367, df = 5, p-value = 0.0009679
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  1.87163     Inf
## sample estimates:
## mean of the differences 
##                2.833333
# repeat for non-parametric test
wilcox.test(scholar_evulations$b ~ scholar_evulations$group, 
            paired = TRUE,
            alternative = "greater")
## Warning in wilcox.test.default(x = c(4L, 6L, 5L, 5L, 5L, 5L), y = c(0L, : cannot
## compute exact p-value with ties
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  scholar_evulations$b by scholar_evulations$group
## V = 21, p-value = 0.01751
## alternative hypothesis: true location shift is greater than 0
# plot results 
ggboxplot(scholar_evulations, x = "group", y = "b", 
          color = "group", palette = "jco",
          order = c("before", "after"),
          ylab = "confidence value", 
          xlab = "Developing a logical rationale for a particular research idea") +
          geom_point(colour="gray", size=2.5, alpha=0.8)  + 
          geom_line(aes(x=as.numeric(group), y=as.numeric(b), 
                        group=ID), colour="gray", linetype="11", size = 1) +
        stat_compare_means(method = "t.test", paired = TRUE, 
                           method.args = list(alternative = "greater"), label= "p.format")

c) Articulating a clear purpose for the research

# get data summary information for the topic 
group_by(scholar_evulations, group) %>%
  summarise(
    count = n(),
    median = median(c, na.rm = TRUE),
    IQR = IQR(c, na.rm = TRUE)
  )
## # A tibble: 2 × 4
##   group  count median   IQR
##   <chr>  <int>  <dbl> <dbl>
## 1 after      6    5.5   1  
## 2 before     6    3     1.5
# first subset the data to examine the before and after values separately
before <- subset(scholar_evulations, group == "before")
after <- subset(scholar_evulations, group == "after")

#QQplot to visually investigate normality 
par(mfrow=c(1,2))
# before
qqnorm(before$c) 
qqline(before$c, col=c("red"))
# after
qqnorm(after$c) 
qqline(after$c, col=c("red"))

# formally testing normality via shapiro-wilks test
d <- with(scholar_evulations, c[group == "before"] - c[group == "after"])
shapiro.test(d)
## 
##  Shapiro-Wilk normality test
## 
## data:  d
## W = 0.96004, p-value = 0.8201
# formally testing for equal variance
# first calculate variance and sd  
before_var <- var(before$c) # variance
sqrt(before_var) # standard deviation
## [1] 1.722401
after_var <- var(after$c)
sqrt(after_var) 
## [1] 0.8164966
# Levene Test - homogeneity of variance across groups.
# homogeneity assumption: the population variances of the 
# dependent variable must equal for all groups.
# Typically can ignore this assumption if roughly equal sample sizes for each group. 
leveneTest(scholar_evulations$b ~ scholar_evulations$group)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  1  1.2162 0.2959
##       10
# the assumptions of t-test have been met. 
# Perform t-test
t.test(scholar_evulations$c ~ scholar_evulations$group, paired = TRUE, 
       alternative = "greater")
## 
##  Paired t-test
## 
## data:  scholar_evulations$c by scholar_evulations$group
## t = 5.8387, df = 5, p-value = 0.001042
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  1.637208      Inf
## sample estimates:
## mean of the differences 
##                     2.5
# repeat for non-parametric test
wilcox.test(scholar_evulations$c ~ scholar_evulations$group, paired = TRUE,
            alternative = "greater")
## Warning in wilcox.test.default(x = c(4L, 5L, 6L, 5L, 6L, 6L), y = c(0L, : cannot
## compute exact p-value with ties
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  scholar_evulations$c by scholar_evulations$group
## V = 21, p-value = 0.01751
## alternative hypothesis: true location shift is greater than 0
# plot results 
ggboxplot(scholar_evulations, x = "group", y = "c", 
          color = "group", palette = "jco",
          order = c("before", "after"),
          ylab = "confidence value", xlab = "Developing a logical rationale for a particular research idea") +
          geom_point(colour="gray", size=2.5, alpha=0.8)  + 
          geom_line(aes(x=as.numeric(group), y=as.numeric(c), group=ID), colour="gray", linetype="11", size = 1) +
        stat_compare_means(method = "t.test", paired = TRUE, method.args = list(alternative = "greater"), label= "p.format")

plot together

melt_scholar_eval <- melt(scholar_evulations, id.vars = c("ID", "group"),
                measure.vars = c("a", "b", "c"))
melt_scholar_eval
##    ID  group variable value
## 1   1 before        a     0
## 2   2 before        a     3
## 3   3 before        a     2
## 4   4 before        a     1
## 5   5 before        a     3
## 6   6 before        a     4
## 7   1  after        a     4
## 8   2  after        a     5
## 9   3  after        a     5
## 10  4  after        a     5
## 11  5  after        a     6
## 12  6  after        a     6
## 13  1 before        b     0
## 14  2 before        b     2
## 15  3 before        b     2
## 16  4 before        b     2
## 17  5 before        b     4
## 18  6 before        b     3
## 19  1  after        b     4
## 20  2  after        b     6
## 21  3  after        b     5
## 22  4  after        b     5
## 23  5  after        b     5
## 24  6  after        b     5
## 25  1 before        c     0
## 26  2 before        c     3
## 27  3 before        c     5
## 28  4 before        c     2
## 29  5 before        c     4
## 30  6 before        c     3
## 31  1  after        c     4
## 32  2  after        c     5
## 33  3  after        c     6
## 34  4  after        c     5
## 35  5  after        c     6
## 36  6  after        c     6
eval_names <- c(`a` = "Selecting a suitable 
topic area for study", 
                `b` = "Developing a logical 
rationale for a 
particular research idea", 
                `c` = "Articulating a clear 
purpose for the research")

# Box plot facetted by topic
ggboxplot(melt_scholar_eval, x = "group", y = "value",
          color = "group", palette = "jco",
          facet.by = "variable") +
          facet_wrap(~variable, labeller = as_labeller(eval_names)) +
          geom_point(colour="gray", size=2.5, alpha=0.8)  + 
          geom_line(aes(x=as.numeric(group), group=ID), 
                    colour="gray", linetype="11", size = 1) +
          stat_compare_means(method = "t.test", paired = TRUE, 
          method.args = list(alternative = "greater"), label= "p.format")