R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

# Define student names and their submission counts per module
student <- c("Paige", "Casey", "Olivia", "Molly", "Michael", "Lillian")
module2 <- c(41, 10, 21, 8, 2, 8)
module3 <- c(36, 24, 7, 7, 3, 26)
module4 <- c(19, 20, 5, 1, 2, 11)

# Convert to long-format data
student_vector <- rep(student, each = 3)
module_vector <- rep(c("Module2", "Module3", "Module4"), times = length(student))
submission_vector <- c(module2, module3, module4)

submission.data <- data.frame(
  student = student_vector,
  module = module_vector,
  submissions = submission_vector
)

submission.aov <- aov(submissions ~ module + Error(student/module), data = submission.data)
summary(submission.aov)
## 
## Error: student
##           Df Sum Sq Mean Sq F value Pr(>F)
## Residuals  5  974.9     195               
## 
## Error: student:module
##           Df Sum Sq Mean Sq F value Pr(>F)
## module     2  224.8   112.4    0.92   0.43
## Residuals 10 1221.2   122.1
boxplot(submissions ~ module,
        data = submission.data,
        col = "lightblue",
        xlab = "Module",
        ylab = "Submission Count",
        main = "Submission Count per Module")

stripchart(submissions ~ module,
           data = submission.data,
           vertical = TRUE,
           method = "jitter",
           pch = 19,
           col = "red",
           add = TRUE)

## Including Plots

You can also embed plots, for example:

plot(1, type = "n",
     xlim = c(1, 3),
     ylim = range(submission_vector),
     xlab = "Module",
     ylab = "Submissions",
     xaxt = "n",
     main = "Student Submissions Across Modules")

axis(1, at = 1:3, labels = c("Module2", "Module3", "Module4"))

for(i in 1:length(student)) {
  lines(1:3,
        c(module2[i], module3[i], module4[i]),
        type = "b", pch = 19, col = i)
}

legend("topright", legend = student, col = 1:length(student), pch = 19, lty = 1)

hist(submission.data$submissions,
     breaks = 10,
     col = "skyblue",
     main = "Histogram of All Submissions",
     xlab = "Submission Count",
     freq = FALSE)

lines(density(submission.data$submissions), col = "black", lwd = 2)

module_means <- tapply(submission.data$submissions, submission.data$module, mean)
module_means
##  Module2  Module3  Module4 
## 18.66667 10.16667 13.00000
overall_mean <- mean(submission.data$submissions)
overall_mean
## [1] 13.94444
barplot_heights <- as.vector(module_means)
bar_names <- names(module_means)

barplot(barplot_heights,
        names.arg = bar_names,
        ylim = c(0, max(barplot_heights) + 5),
        col = "lightblue",
        main = "Average Submission Counts by Module",
        ylab = "Submission Count")

abline(h = overall_mean, col = "red", lty = 2)

legend("topright", legend = "Overall Mean", col = "red", lty = 2, bty = "n")

# Add a density plot on a secondary y-axis
par(new = TRUE)
plot(density(submission.data$submissions),
     col = "black",
     axes = FALSE,
     xlab = "",
     ylab = "",
     main = "",
     lwd = 2)

axis(4)
mtext("Density", side = 4, line = 3)

# Equal variance across modules
leveneTest(submissions ~ module, data = submission.data)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  2  1.4641 0.2625
##       15
# Normality of residuals
submission.model <- lm(submissions ~ module, data = submission.data)
residuals <- residuals(submission.model)
residuals
##           1           2           3           4           5           6 
##  22.3333333  -0.1666667   8.0000000 -10.6666667  -8.1666667  -5.0000000 
##           7           8           9          10          11          12 
##  17.3333333  13.8333333  -6.0000000 -11.6666667  -7.1666667  13.0000000 
##          13          14          15          16          17          18 
##   0.3333333   9.8333333  -8.0000000 -17.6666667  -8.1666667  -2.0000000
shapiro.test(residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.93268, p-value = 0.2164
qqnorm(residuals)
qqline(residuals, col = "red")

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.