Moodle Log Analysis

Setup and Packages

library(tidyverse)
library(lubridate)
library(chron)
library(rebus)
library(stringr)
library(corrplot)
library(ggpubr)
library(ggthemes)
library(scales)
library(plotly)
library(ggrepel)
library(readxl)


#base classes
class1 <- read_csv("class1.csv")    # 25 students T/TR 2:30 - 3:50
class2 <- read_csv("class2.csv")    # 25 students M/W  2:00 - 3:50
class3 <- read_csv("class3.csv")    # 2 students  online
class4 <- read_csv("class4.csv")    # 24 students T/TR 2:30 - 3:50
class5 <- read_csv("class5.csv")    # 22 students online

#plugin classes
class6 <- read_excel("class6.xlsx") # 7 students MWF 9:00 - 9:50
class7 <- read_csv("class7.csv")    # 8 students W 5:00 - 7:50

class8 <- read_csv("class8.csv")    # 14 students T/TR 12:30 - 2:20
class9 <- read_csv("class9.csv")    # 20 students MW 2:00 - 3:50

class10 <- read_csv("class10.csv")  # 23 students arranged
class11 <- read_csv("class11.csv")  # 25 students MWF 10:00 - 10:50
class12 <- read_csv("class12.csv")  # 13 students MWF 2:00 - 2:50
class13 <- read_csv("class13.csv")  # 17 students MWF 9:00 - 9:50

clean <- function(dataframe) {
  
  #fix column names
  dataframe <- dataframe %>%
    rename(Eventcontext = `Event context`,
           Eventname = `Event name`,
             ) 
  
  #make time coloumn usable 
  dataframe$Time <- as.POSIXct(dataframe$Time, format = "%m/%d/%Y, %H:%M")
  dataframe$roundedtime <- round_date(dataframe$Time,unit="hour")
  
  dataframe$month = as.numeric(format(dataframe$roundedtime, "%m"))
  dataframe$day = as.numeric(format(dataframe$roundedtime, "%d"))
  dataframe$year = as.numeric(format(dataframe$roundedtime, "%Y"))
  dataframe$roundedtime = format(dataframe$roundedtime, "%H:%M")
  
  dataframe$weekday <- wday(dataframe$Time, label=TRUE)
  dataframe$week <- epiweek(dataframe$Time) #new week starts sunday
  
  #extract Moodle assigned userid to be an identifier 
  dataframe$userid <- str_extract(dataframe$Description, pattern = 
                                    one_or_more("'") %R%
                                    capture(one_or_more(DGT)) %R%
                                    one_or_more("'")
  )
  
  dataframe$userid <- gsub("'", "", dataframe$userid)
  
  return(dataframe)
}

#clean classes

class1 <- clean(class1)
class1$classname <- "class1"
class1$testgroup <- 0
class1$numStudents <- 25
class1$class <- "Class1"

class2 <- clean(class2)
class2$classname <- "class2"
class2$testgroup <- 0
class2$numStudents <- 25
class2$class <- "Class2"

class3 <- clean(class3)
class3$classname <- "class3"
class3$testgroup <- 0
class3$numStudents <- 2
class3$class <- "Class3"

class4 <- clean(class4)
class4$classname <- "class4"
class4$testgroup <- 0
class4$numStudents <- 24
class4$class <- "Class4"

class5 <- clean(class5)
class5$classname <- "class5"
class5$testgroup <- 0
class5$numStudents <- 22
class5$class <- "Class5"

#############
class6 <- clean(class6)
class6$classname <- "class6"
class6$testgroup <- 1
class6$numStudents <- 7
class6$class <- "Class6"

class7 <- clean(class7)
class7$classname <- "class7"
class7$testgroup <- 1
class7$numStudents <- 8
class7$class <- "Class7"
#############
class8 <- clean(class8)
class8$classname <- "class8"
class8$testgroup <- 1
class8$numStudents <- 14
class8$class <- "Class8"

class9 <- clean(class9)
class9$classname <- "class9"
class9$testgroup <- 1
class9$numStudents <- 20
class9$class <- "Class9"

#############
class10 <- clean(class10)
class10$classname <- "class10"
class10$testgroup <- 1
class10$numStudents <- 23
class10$class <- "Class10"

class11 <- clean(class11)
class11$classname <- "class11"
class11$testgroup <- 1
class11$numStudents <- 25
class11$class <- "Class11"

class12 <- clean(class12)
class12$classname <- "class12"
class12$testgroup <- 1
class12$numStudents <- 13
class12$class <- "Class12"

class13 <- clean(class13)
class13$classname <- "class13"
class13$testgroup <- 1
class13$numStudents <- 17
class13$class <- "Class13"

alldata <- bind_rows(
  class1,
  class2,
  class3,
  class4,
  class5,
  class6,
  class7,
  class8,
  class9,
  class10,
  class11,
  class12,
  class13
)

Project Introduction

The objective of this analysis is to research how effective the new plug-ins are that are currently being tested during the Spring 2021 semester and research how students have been using Moodle during the 2020-2021 year. The goal is for this analysis to give faculty and instructors a better idea of how students are using Moodle and how they can structure classes to work seamlessly with students. Along with providing information on how the test plug-ins were used to aid in deciding which plug-ins should be implemented in future classes.

The Data

This research was done with 13 total classes, 5 classes from the Fall 2020 semester, and 8 classes from the Spring 2021 semester that were using some of the test plug-ins. The majority of of these classes had 10-25 students and class start times from 9:00 - 2:30.

How Long Does it Take for Students to View Their Feedback?

#data setup function
q1fun <- function(dataframe) {
  
  # select only relevant columns
  dataframe <- dataframe %>%
    filter(Eventname %in% c("Feedback viewed", "The submission has been graded."))
  
  #gather assignmentid and the userid that was graded
  dataframe$tempassignmentid <- str_extract_all(dataframe$Description, pattern = 
                                           one_or_more("'") %R%
                                           capture(one_or_more(DGT)) %R%
                                           one_or_more("'")
  )
  
  dataframe$tempassignmentid <- sapply(dataframe$tempassignmentid, paste, collapse = ",")
  dataframe$tempassignmentid <- str_replace_all(dataframe$tempassignmentid, "'", "")
  dataframe <- separate(dataframe, tempassignmentid, into = c("c1", "c2", "c3", "c4"))
  
  dataframe <- dataframe %>%
    mutate(
      assignmentid = if_else(Eventname == "Feedback viewed", dataframe$c3, dataframe$c4),
      usergraded = if_else(Eventname == "The submission has been graded.", dataframe$c3, "NA")
    ) %>%
    select(-c1, -c2, -c3, -c4) 
  
  #graded assignments
  dataframe11 <- dataframe %>%  
    filter(Eventname == "The submission has been graded.") %>%
    group_by(usergraded, assignmentid) %>% 
    slice(which.min(Time)) #only the first time an instructor graded an assignment (removes changes)
  
  #feedbacks viewed
  dataframe12 <- dataframe %>%
    filter(Eventname == "Feedback viewed") %>%
    group_by(userid, assignmentid) %>%
    slice(which.min(Time)) #only the first time a student views feedback (removes extras)
  
  #join data
  dataframe <- left_join(dataframe11, dataframe12, by = c('assignmentid' = 'assignmentid','usergraded' = 'userid'))
  
  #select the first time a student viewed the assignment only
  dataframe <- dataframe %>%
    group_by(assignmentid, usergraded) %>% 
    slice(which.min(Time.y)) 
  
  fbviewedna <- as.numeric(nrow(dataframe11)) - as.numeric(nrow(dataframe)) 
  dataframe$fbnotviewed <- fbviewedna
  dataframe$fbviewed <- as.numeric(nrow(dataframe))
  
  return(dataframe)
}

#runs on each dataframe to elimnate duplications errors from the left join
q1<- q1fun(class1)
q2<- q1fun(class2)
q3<- q1fun(class3)
q4<- q1fun(class4)
q5<- q1fun(class5)
q6<- q1fun(class6)
q7<- q1fun(class7)
q8<- q1fun(class8)
q9<- q1fun(class9)
q10<- q1fun(class10)
q11<- q1fun(class11)
q12<- q1fun(class12)
q13<- q1fun(class13)

q1data <- bind_rows(q1,
                    q2,
                    q3,
                    q4,
                    q5,
                    q6,
                    q7,
                    q8,
                    q9,
                    q10,
                    q11,
                    q12,
                    q13
)

q1data <- na.omit(q1data)

Most students viewed their feedback within a couple days but almost every class had outliers where a student would not view their feedback for days.

Note: This data does not include feedback that was never checked.

#find time diff
q1datac <- q1data %>%
  mutate(timetoviewfb = difftime(Time.x, Time.y, units = "hours")) %>%
  filter(timetoviewfb < 0) %>% #removes a few data anomalies
  mutate(timetoviewfb = timetoviewfb * -1)

ggplot(q1datac, aes(x=class.x, y=timetoviewfb)) + 
  geom_jitter(alpha=0.3, width = 0.2, color = "#2438f0") +
  scale_y_continuous(breaks = pretty_breaks(n = 8)) +
  theme_clean() +
  labs(x = "Class Name", 
       y = "Number of Hours Between Feedback Posted and Viewed", 
       title = "How Long it Takes for Students to View Feedback")

Despite the outliers, 80% of student feedback was viewed within about 50 hours from the time their feedback was posted.

q1quantile <- quantile(as.numeric(q1datac$timetoviewfb), .8)

ggplot(q1datac, aes(x= timetoviewfb)) + 
  geom_histogram(binwidth = 20, colour="white", fill="#c9401e") +
  geom_vline(xintercept=q1quantile, color="blue", linetype="dashed", size=1) +
  scale_x_continuous(breaks = pretty_breaks(n = 15)) +
  theme_clean() +
  geom_text(aes(x=q1quantile + 15, label="80% of Students", y=75), colour="blue", angle=90) + 
  labs(x = "Time Before Feedback was Viewed (Hours)", 
       y = "Count of Students", 
       title = "Time Between Feedback Posted and Viewed (Hours)")

When including all classes, the median time to view feedback was 17.6 hours and the mean was 51.1 hours. Here it is best to focus on the median time to view feedback to lessen the impact of the outliers.

For most classes, the median time to view feedback was at or under 20 hours. This includes many of the classes with the largest sample sizes suggesting that the median time to view feedback is a better representation of how long it takes for students to view feedback than the mean.

#gather averages
q1datasum <- q1data %>%
  group_by(class.x) %>%
  mutate(timetoviewfb = -1 * (difftime(Time.x, Time.y, units = "hours"))) %>%
  summarise(med = median(timetoviewfb), avg = mean(timetoviewfb), SampleSize = n())

#gather medians
q1dataAvgMed <- q1data %>%
  ungroup() %>%
  mutate(timetoviewfb = -1 * (difftime(Time.x, Time.y, units = "hours"))) %>%
  summarise(med = round(median(timetoviewfb), 1), avg = round(mean(timetoviewfb), 1), SampleSize = n())

ggplot(q1datasum, aes(x=class.x, y = med, fill = SampleSize)) + 
  geom_col() +
  theme_clean() +
  labs(x = "Class Name", 
       y = "Median Time for Students to View Feedback (Hours)", 
       title = "Median Time for Students to View Feedback",
       subtitle = paste("Overall: Median = ", q1dataAvgMed$med, "Mean = ", q1dataAvgMed$avg)) +
  guides(fill=guide_legend(title="Sample Size"))

Which Moodle Resources get the Most and Least Number of Interactions?

# set up function
q2fun <- function(dataframe) {
  #gather views per type
  numstudent <- dataframe$numStudents[1]
  dataframe2 <- dataframe
  
  #gather module types
  dataframe$Moduletype <- str_extract(dataframe$Eventcontext, pattern = 
                                 capture(one_or_more(WRD)) %R%
                                 ":" 
  )
  
  dataframe$Moduletype <- gsub(":", "", dataframe$Moduletype)
  
  dataframe <- dataframe %>%
    filter(Eventname == 'Course module viewed') %>%
    group_by(Moduletype) %>%
    summarise(TotalModuleviews = n())
  
  #gather posts per type
  #course was restore so Course module created is not very helpful
  #instead group by event context, get n (placeholder), then count module types
  dataframe2 <- dataframe2 %>%
    group_by(Eventcontext) %>%
    summarise(count = n())
  
  dataframe2$Moduletype <- str_extract(dataframe2$Eventcontext, pattern = 
                                  capture(one_or_more(WRD)) %R%
                                  ":" 
  )
  
  dataframe2$Moduletype <- gsub(":", "", dataframe2$Moduletype)
  
  dataframe2 <- dataframe2 %>%
    group_by(Moduletype) %>%
    summarise(ModulesAdded = n())
  
  #join
  
  dataframe22 <- inner_join(dataframe, dataframe2, by = c("Moduletype" = "Moduletype")) %>%
    mutate(viewsperadd = TotalModuleviews / ModulesAdded, viewsperstudent = viewsperadd / numstudent) %>% 
    arrange(desc(viewsperstudent))
  
  return(dataframe22)
}

q21<- q2fun(class1)
q22<- q2fun(class2)
q23<- q2fun(class3)
q24<- q2fun(class4)
q25<- q2fun(class5)
q26<- q2fun(class6)
q27<- q2fun(class7)
q28<- q2fun(class8)
q29<- q2fun(class9)
q210<- q2fun(class10)
q211<- q2fun(class11)
q212<- q2fun(class12)
q213<- q2fun(class13)

q2data <- bind_rows(q21,
                    q22,
                    q23,
                    q24,
                    q25,
                    q26,
                    q27,
                    q28,
                    q29,
                    q210,
                    q211,
                    q212,
                    q213
)

#remove na's that result from some rarely used module types
q2data <- q2data %>%
  mutate(Moduletype = str_replace(Moduletype, "tool", "External Tool")) %>%
  filter(!is.na(Moduletype))

The assignment module type received the most views as expected. The next highest viewed module was files, which also had the highest number of modules added so its high view count is expected.

The viewership of URL is surprising since it had a large number of modules added but a low number of views. The other module types having low total viewership is expected given that there was less of those module types posted.

#gather totals
q2datasumtotals <- q2data %>%
  group_by(Moduletype) %>%
  summarise(
    TotalModuleviews = sum(TotalModuleviews),
    TotalModulesAdded = sum(ModulesAdded)
  ) %>%
  mutate(ViewsPerModule = TotalModuleviews / TotalModulesAdded)

ggplot(q2datasumtotals, aes(x=Moduletype, y=TotalModuleviews, fill=TotalModulesAdded)) +
  geom_col(position = "dodge") +
  theme_clean() +
  labs(x = "Module Type", 
       y = "Views",
       title = "How Much Each Moodle Resource is Interacted",
       subtitle = "Total Views for each Module Type") +
  geom_text(aes(label = TotalModuleviews), vjust = -0.125) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10)) +
  guides(fill=guide_legend(title="Total Modules Added"))

When looking at views per module, glossary is clearly at the top. Only one glossary module was posted in the sample of logs but 450 views is very high and suggests it was used by students frequently. The choice model was similar, it was only used in one course but received quite a few views.

Outside of glossary and choice modules, assignment and quiz modules received the most views per module posted. This is likely because students have to interact with those modules for a grade as opposed to viewing a file or URL that are not always required.

Forums also had an amount of views per module posted similar to quizzes suggesting that when forums are used, students are willing to interact with them as much as they do with important module types like quizzes.

ggplot(q2datasumtotals, aes(x=Moduletype, y=ViewsPerModule, fill=TotalModulesAdded)) +
  geom_col(position = "dodge") +
  theme_clean() +
  labs(x = "Module Type", 
       y = "Views",
       title = "How Much Each Moodle Resource is Interacted",
       subtitle = "Views Per Module Added for each Module Type") +
  geom_text(aes(label = round(ViewsPerModule)), vjust = -0.125) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10)) +
  guides(fill=guide_legend(title="Total Modules Added"))

Students viewed most modules on average 1-2 times with the exception of assignments, choices, folders, and quizzes that were viewed an average of 3-7 times.

File and URL module types were both viewed on average about 1 time per student. Most lecture materials are posted as those modules types so I would have expected them to have more viewership. Two possible reasons to explain this are that students could be returning to downloaded copies of files which would eliminate the need to view the module again. Similarly they could be going directly to a URL instead of clicking on the module. The other possible reason could be that students are leaving files and URLs open until they are done working with them.

How to Read - The assignment module received an average of 103.5 views on each module posted, each student viewed an assignment module on average 6.7 times, and each class had an average of 9.8 assignment modules added

#gather sums
q2datasum <- q2data %>%
  group_by(Moduletype) %>%
  summarise(
    AVGTotalModuleviews = mean(TotalModuleviews),
    AVGModulesAdded = mean(ModulesAdded),
    AVGViewsPerModule = mean(viewsperadd),
    AVGModuleViewsPerStudent = mean(viewsperstudent)
  )

#pivot to long format
q2datalong <- q2datasum %>%
  pivot_longer(
    cols = c(AVGTotalModuleviews, AVGModulesAdded, AVGViewsPerModule, AVGModuleViewsPerStudent),
    names_to = "Measure",
    values_to = "Value"
  )

#cleaner version of sums
q2datalongnoviews <- q2datasum %>%
  pivot_longer(
    cols = c(AVGTotalModuleviews, AVGModulesAdded, AVGViewsPerModule, AVGModuleViewsPerStudent),
    names_to = "Measure",
    values_to = "Value"
  ) %>%
  filter(Measure != "AVGTotalModuleviews" & Moduletype != "Glossary")


ggplot(q2datalongnoviews, aes(x=Moduletype, y=Value, fill=Measure)) + 
  geom_col() +
  theme_clean() +
  labs(x = "Module Type", 
       y = "Count", 
       title = "How Much Each Moodle Resource is Interacted",
       subtitle = "Average of Class Totals, Glossary Removed") +
  geom_text(aes(label = round(Value, 1)), size = 3,  position = position_stack(vjust = .5)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10))

select(q2datasum, -AVGTotalModuleviews)

## # A tibble: 12 x 4
##    Moduletype    AVGModulesAdded AVGViewsPerModule AVGModuleViewsPerStudent
##    <chr>                   <dbl>             <dbl>                    <dbl>
##  1 Assignment               9.85             103.                     6.69 
##  2 Chat                     1                 20                      1    
##  3 Checklist                8                 38.3                    2.30 
##  4 Choice                   1                 87                      3.48 
##  5 External Tool            8.75              25.4                    1.34 
##  6 File                    23.4               18.9                    1.20 
##  7 Folder                   3                 66.8                    4.36 
##  8 Forum                    4.23              33.7                    1.97 
##  9 Glossary                 1                453                     19.7  
## 10 Page                    11.3               23.2                    2.65 
## 11 Quiz                     4.75              48.8                    3.22 
## 12 URL                     23.7               12.5                    0.700

How Often are Students Viewing Recorded Lectures?

#clean data names and factors dates
q31 <- class4 %>%
  filter(str_detect(Eventcontext, "URL: Lecture Video") & Eventname == 'Course module viewed') %>% 
  group_by(Eventcontext) %>%
  summarise(Views = n()) %>%
  mutate(Eventcontext = str_replace(Eventcontext, "URL: Lecture Video for ", ""),
         Eventcontext = str_replace(Eventcontext, "URL: Lecture Video For ", ""),
         Date = as.Date(Eventcontext, "%a, %B %d, %y")) %>%
  arrange(Date) %>%
  mutate(Date = factor(Date, labels=format(Date,"%m-%d"), ordered=TRUE))

Only one class in the sample of logs posted recorded lectures consistently so this is a small sample size. In that class, viewership mostly stayed the same throughout the semester, each recording was viewed about 3-9 times out of a class size of 24.

Most students viewed a recorded lecture within about 5 days of a recording being posted. However another decently sized group viewed recorded lectures 13+ days after the recording was posted.

#how many days after post do students watch

#gather module added dates
q321 <- class4 %>%
  filter(str_detect(Eventcontext, "URL: Lecture Video") & Eventname == 'Course module created')

#gather module viewed dats
q322 <- class4 %>%
  filter(str_detect(Eventcontext, "URL: Lecture Video") & Eventname == 'Course module viewed')

#join data
q32 <- inner_join(q322, q321, by = c("Eventcontext" = "Eventcontext"))

q32 <- q32 %>%
  mutate(timetoview = difftime(Time.x, Time.y, units = "days")) 

q3quantile <- quantile(as.numeric(q32$timetoview), .8)

ggplot(q32, aes(x=timetoview)) + 
  geom_histogram(binwidth = 1, colour="white", fill = "#c9401e") +
  geom_vline(xintercept=q3quantile, color="blue", linetype="dashed", size=1) +
  scale_x_continuous(breaks = pretty_breaks(n = 16)) +
  theme_clean() +
  geom_text(aes(x=q3quantile + 0.5, label="80% of Students", y=30), colour="blue", angle=90) + 
  labs(x = "Time Between Lecture Recording Post and View (Days)", 
       y = "Count of Students", 
       title = "When Students View Lecture Recordings")

Examining further, it does seem that there may be two groups of views for recorded lectures. The first group views recordings a few days after the recording was posted, likely for homework, missing the class, or just going over it again. A second group views the recorded lecture days after they were posted, likely for test preparation. This trend is strongest in November. Most November lectures received more views 5+ days after the lecture was posted than other months suggesting that the views may have been for preparing for end of semester exams or finals.

#clean names and date for ggplot
q32 <- q32 %>%
  mutate(Eventcontext = str_replace(Eventcontext, "URL: Lecture Video for ", ""),
         Eventcontext = str_replace(Eventcontext, "URL: Lecture Video For ", ""),
         Date = as.Date(Eventcontext, "%a, %B %d, %y"))

ggplot(q32, aes(x=reorder(Eventcontext, Time.y), y =timetoview)) + 
  coord_flip() + 
  geom_jitter(width = 0.1, color = "#c9401e", size = 2, alpha = 0.75) +
  theme_minimal() +
  labs(x = "Lecture Recording", 
       y = "Time Between Lecture Recording Post and View (Days)", 
       title = "When Students View Lecture Recordings")

How Long Does it Take For Students to Submit an Assignment After First Viewing the Assignment?

#data setup function
q4fun <- function(dataframe) {
  
  #filters to only views abd submissions
  dataframe <- dataframe %>%
    filter(Eventname %in% c("Course module viewed", "A submission has been submitted."))
 
  #gather assignmentid 
  dataframe$tempassignmentid <- str_extract_all(dataframe$Description, pattern = 
                                           one_or_more("'") %R%
                                           capture(one_or_more(DGT)) %R%
                                           one_or_more("'")
  )
  
  dataframe$tempassignmentid <- sapply(dataframe$tempassignmentid, paste, collapse = ",")
  dataframe$tempassignmentid <- str_replace_all(dataframe$tempassignmentid, "'", "")
  dataframe <- separate(dataframe, tempassignmentid, into = c("c1", "c2", "c3", "c4"))
  
dataframe <- dataframe %>%
  mutate(
    assignmentid = if_else(Eventname == "A submission has been submitted.", dataframe$c3, dataframe$c2)
  ) %>%
  select(-c1, -c2, -c3, -c4) 

#gather submissions
dataframe11 <- dataframe %>%  
  filter(Eventname == "A submission has been submitted.") %>%
  group_by(userid, assignmentid) %>% 
  slice(which.min(Time)) #only the first time a student turned in an assignment (removes changes)

#gather views
dataframe12 <- dataframe %>%
  filter(Eventname == "Course module viewed") %>%
  group_by(userid, assignmentid) %>% 
  slice(which.min(Time)) #only the first time a student viewed an assignment

#join
dataframe <- left_join(dataframe11, dataframe12, by = c('userid' = 'userid', 'assignmentid' = 'assignmentid'))

  
  return(dataframe)
}

#runs on each dataframe to eliminate duplication errors from the left join
q1<- q4fun(class1)
q2<- q4fun(class2)
q3<- q4fun(class3)
q4<- q4fun(class4)
q5<- q4fun(class5)
q6<- q4fun(class6)
q7<- q4fun(class7)
q8<- q4fun(class8)
q9<- q4fun(class9)
q10<- q4fun(class10)
q11<- q4fun(class11)
q12<- q4fun(class12)
q13<- q4fun(class13)

q4data <- bind_rows(q1,
                    q2,
                    q3,
                    q4,
                    q5,
                    q6,
                    q7,
                    q8,
                    q9,
                    q10,
                    q11,
                    q12,
                    q13
)

Students turned in their assignments on average 3.8 days after first viewing them. Similar to the time it took for students to view feedback, the median of 3 days is a good representation of the data as result of outliers from assignments that were viewed many days before submitting.

The two highest values of turn in times were 1 and 2 days suggesting that students like to complete assignments quickly once they do look at them. This data is well distributed however and there is a decent number of students in most numbers of days from view to submission.

Note - I removed any assignment that was submitted within 1.5 hours from the first view to remove any in-class assignments or extremely short assignments from the dataset

#removes short assignments
q41 <- q4data %>%
  mutate(timeview_submit = difftime(Time.x, Time.y, units = "days")) %>%
  filter(timeview_submit > 0.0625) ## removes anything turned in less than 1 1/2 hours to avoid in class or extremely short assignments


q4quantile <- quantile(as.numeric(q41$timeview_submit), .8)

ggplot(q41, aes(x = timeview_submit)) + 
  geom_histogram(color = "white", binwidth  = 1, fill = "#c9401e") +
  scale_x_continuous(breaks = pretty_breaks(n = 32)) +
  geom_vline(xintercept=q4quantile, color="blue", linetype="dashed", size=1) +
  geom_text(aes(x=q4quantile + 0.75, label="80% of Students", y=125), colour="blue", angle=90) +
  theme_clean() +
  labs(x = "Time Between First View and Submission (Days)",
       y = "Count",
       title = "How Long it Took Students to Submit Assignments After Viewing",
       subtitle = paste("Mean =", round(mean(q41$timeview_submit),1), "Days   Median =", round(median(q41$timeview_submit),1), "Days"))

Looking at the average time from first view to submission for each student by class, it is clear that the times vary between classes. Some classes like class 10 and 11 are grouped tightly while others like class 1 and 9 have turn in times spread out widely.

The diverse levels of submission times between classes suggest that class structure may have a strong impact on how long students spend between viewing an assignment and submitting it but there is not quite enough data here to confirm that assumption.

#gathers average for each class/student
q42 <- q4data %>%
  mutate(timeview_submit = difftime(Time.x, Time.y, units = "days")) %>%
  filter(timeview_submit > 0.0625) %>% ## removes turned in less than 1 1/2 hours to avoid in class or extremely short assignments
  group_by(class.x, userid) %>%
  summarise(meantime = mean(timeview_submit), medtime = median(timeview_submit))

ggplot(q42, aes(x = class.x, y = meantime, color = class.x)) + 
  geom_jitter(size = 2, alpha = 0.45, width = 0.2) +
  theme_clean() +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, size = 10)) +
  labs(x = "Student",
       y = "Average Time Between First View and Submission (Days)",
       title = "Average Time Between First View and Submission For Each Student",
       color = "Class")

Looking at the average time from first view to submission by assignment rather than class shows that the bulk of the data is under 10 days. Additionally, assignments with higher average view to submission times have a more spread out group of submissions while the lower average assignments tend to have tighter groupings of submissions.

The logs give no clear answer to why the groupings differ because they do not capture assignment due dates. One possible reason could be that when assignments have due dates shortly after they are posted, students are forced to complete the assignment not long after they view it, leading to tighter groups. Assignments posted with longer due dates means that students are not forced to complete the assignment shortly after viewing, leading to more flexibility in when they can complete the assignment and more spread out groupings.

#removes short assignments
q43 <- q4data %>%
  mutate(timeview_submit = difftime(Time.x, Time.y, units = "days")) %>%
  filter(timeview_submit > 0.0625) 

#finds mean for each assignment
q431 <- q4data %>%
  mutate(timeview_submit = difftime(Time.x, Time.y, units = "days")) %>%
  filter(timeview_submit > 0.0625) %>%
  group_by(assignmentid) %>%
  summarise(meantime = mean(timeview_submit))

ggplot() + 
  geom_point(data = q43, aes(x=reorder(assignmentid, timeview_submit), y = timeview_submit, color = "Submission"), alpha = 0.5) +
  geom_point(data = q431, aes(x=assignmentid, y = meantime, color = "Average Submission Time"), size = 1.5) +
  scale_y_continuous(breaks = pretty_breaks(n = 15)) +
  scale_color_fivethirtyeight() +
  coord_flip() +
  theme_minimal() +
  theme(axis.text.y = element_text(size = 4)) +
  labs(x = "Assignment ID",
        y = "Time Between First View and Submission (Days)",
        title = "Time Between First View and Submission For Each Assignment",
        subtitle = "Time of Each Submission and Average Submission Time Plotted",
        color='Measure')

How Are Students Interacting With Video Content Posted on Moodle?

#data setup
#filters to data containing video keywords
q51 <- alldata %>%
  filter(str_detect(Eventcontext, "Video") | 
           str_detect(Eventcontext,"video") | 
           str_detect(Eventcontext, "Recording") | 
           str_detect(Eventcontext, "recording")) %>%
  filter(!str_detect(Eventcontext, "Assignment:") & #remove assignments because they are mandatory = not a good representation 
           Eventname == "Course module viewed")

#gathers assignmentid
  q51$tempassignmentid <- str_extract_all(q51$Description, pattern = 
                                           one_or_more("'") %R%
                                           capture(one_or_more(DGT)) %R%
                                           one_or_more("'")
  )
  
  q51$tempassignmentid <- sapply(q51$tempassignmentid, paste, collapse = ",")
  q51$tempassignmentid <- str_replace_all(q51$tempassignmentid, "'", "")
  q51 <- separate(q51, tempassignmentid, into = c("c1", "c2"))
  
q51 <- q51 %>%
  rename(assignmentid = c2) %>%
  select(-c1) %>%
  filter(assignmentid != "571150") #removes one misleading outlier that contains all info students need for a week (including a video)

Almost all video content was viewed by a proportionate amount of students in comparison to class size.

Note - I cannot see if a student actually watched a video or how long they watched a video, just that they viewed the module containing the video

Note - There may be missing data here. The logs do not capture what file type a module is, to gather this data I used any module with “Video” or “Recording” in the title which means that videos without clear naming were not used in this visualization.

#gather sums
q52 <- q51 %>%
  group_by(class, numStudents, assignmentid) %>%
  summarise(count = n())

ggplot(q52, aes(x = 1:nrow(q52), y = count, color = numStudents)) + 
  geom_point(size = 3) +
  theme_clean() +
  guides(fill=guide_legend(title="Number of Students in Class")) +
  labs(x = "Moodle Video ID",
       y = "Number of Views",
       title = "How Many Students Watched Video Content on Moodle",
       color = "Number of Students in Class")

When Are Students Using Moodle?

#heat map with views
activityheatmap <- alldata %>%
  filter(Eventname == "Course viewed") %>%
  group_by(weekday, roundedtime) %>%
  summarise(count = n())

#heat map with submissions
activityheatmap2 <- alldata %>%
  filter(Eventname == "A submission has been submitted.") %>%
  group_by(weekday, roundedtime) %>%
  summarise(count = n())

This visualization shows how many course views occurred on Moodle during each time of the day, for each day of the week.

Most activity between 8:00 - 15:00 is due to classes occurring during that time range which brings students on to Moodle.

During the weekdays, with the exception of Friday, student views on Moodle after normal class times peak at about 20:00 and views stay high until 01:00 in the morning. During those weekdays, Tuesday and Wednesday night received more views than Thursday and Friday night suggesting that students spend less time on Moodle at the end of the week.

Friday receives by far the least amount of views with the number lowering right after 14:00 when the main class times start to end.

Sunday is as expected, the day with the most activity. Starting at about 9:00, Students stay fairly active on Sunday until 01:00 in the morning on Monday.

Another interesting point is that there is a large number of students that used Moodle late Saturday night.

ggplot(activityheatmap, aes(roundedtime, weekday)) + geom_tile(aes(fill = count),colour = "white", na.rm = TRUE) +
  scale_fill_gradient(low = "#d8e1cf", high = "#438484", na.value = "grey50") +  
  guides(fill=guide_legend(title="Total Views")) +
  theme_minimal() + 
  labs(title = "Moodle Views by Day of Week and Hour", x = "Views Per Hour", y = "Day of Week") +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.y = element_text(face="bold"), axis.text.x =   element_text(face="bold")) +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 10)) +
  geom_text(aes(label = count), size = 3)

Here is the visualization with assignment submissions rather than course views.

Like in the graph above, the results from 8:00 - 15:00 are likely not indicative of any trends since assignments turned in at that time are likely in-class assignments.

The trend of extra activity on Sunday and Wednesday nights continues here. Along with a large number of submissions late Saturday night (Sunday 12:00 AM - 01:00 AM).

ggplot(activityheatmap2, aes(roundedtime, weekday)) + geom_tile(aes(fill = count),colour = "white", na.rm = TRUE) +
  scale_fill_gradient(low = "#d8e1cf", high = "#438484", na.value = "grey50") +  
  guides(fill=guide_legend(title="Total Submissions")) +
  theme_minimal() + 
  labs(title = "Moodle Submissions by Day of Week and Hour", x = "Submissions Per Hour", y = "Day of Week") +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.y = element_text(face="bold"), axis.text.x = element_text(face="bold"))  +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 10)) +
  geom_text(aes(label = count), size = 3)

#only views and submissions
timedata <- alldata %>%
  filter(Eventname == "Course viewed" | Eventname == "A submission has been submitted.")

Viewing interactions by only time also shows high usage during the class times from about 08:00 - 15:00.

After 14:00, activity steadily drops until 20:00 suggesting further that this is when a large amount of students start going on Moodle again after class hours. There is also a larger number of assignment submissions starting at 20:00 until 01:00 the next day.

ggplot(timedata, aes(x=roundedtime, fill = Eventname)) + 
  stat_count(position = "dodge") +
  theme_clean() +
  guides(fill=guide_legend(title="Event Name")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, size = 10)) +
  geom_text(aes(label = ..count..), stat = "count", position = position_dodge(width = 1),  size = 2, vjust = -0.75) +
  labs(x = "Rounded Time", 
       y = "Count", 
       title = "Moodle Interactions by Time of Day")

Similarly looking at totals by day of the week, the activity of a week peaks on Tuesday and then falls each day until Sunday. Sunday also has by far the most assignment submissions, further reinforcing that students do a large amount of work on Sundays.

ggplot(timedata, aes(x=weekday, fill = Eventname)) + 
  stat_count(position = "dodge") +
  theme_clean() +
  geom_text(aes(label = ..count..), stat = "count", position = position_dodge(width = 1),  size = 3, vjust = -0.75) +
  guides(fill=guide_legend(title="Event Name")) +
  labs(x = "Weekday", 
       y = "Count", 
       title = "Moodle Interactions by Day of Week")

How Long Are Students Using Moodle?

#gathers only users that have submitted an assignment or taken a quiz
#this is used to filter out most of the moodle administrative userids that do not reflect students
q6students <- alldata %>%
  filter(Eventname == "A submission has been submitted." | Eventname == "Quiz attempt started") %>%
  select(userid)


#function to increment values
inc <- function(x){
  eval.parent(substitute(x <- x + 1))
}

#classifies activity into sessions
q6fun <- function(x, value) {

#filters out non student ids
x <- x %>%
  filter(userid %in% q6students$userid)

x$sessionid <- 0

#arranges by userid/time, removes na
x <- x %>%
  arrange(userid, Time) %>%
  filter(!is.na(userid)) %>%
  mutate(tdiff = Time - lag(Time))

nval <- as.numeric(nrow(x))

#for each row - if the time between rows is greater than an hour, create a new sessions else - create a new sessions
#if the userid do not match, also create a new session
  for( i in 2:nval){
    if(x[i,20] < 3600){
      if(x[i, 14] != x[i-1,14]){
        x[i,19] <- inc(value)
      } else{
        x[i,19] <- value
      }
    }
    else{
      x[i,19] <- inc(value)
    }
  }

x$value <- value
  return(x)
}

#runs function on each class individually because a few userid intersect between classes
q661 <- q6fun(class1, 0)
q662 <- q6fun(class2, q661[1,21] + 1)
q663 <- q6fun(class3, q662[1,21] + 1)
q664 <- q6fun(class4, q663[1,21] + 1)
q665 <- q6fun(class5, q664[1,21] + 1)
q666 <- q6fun(class6, q665[1,21] + 1)
q667 <- q6fun(class7, q666[1,21] + 1)
q668 <- q6fun(class8, q667[1,21] + 1)
q669 <- q6fun(class9, q668[1,21] + 1)
q6610 <- q6fun(class10, q669[1,21] + 1)
q6611 <- q6fun(class11, q6610[1,21] + 1)
q6612 <- q6fun(class12, q6611[1,21] + 1)
q6613 <- q6fun(class13, q6612[1,21] + 1)

#join data
q61 <- rbind(
  q661,
  q662,
  q663,
  q664,
  q665,
  q666,
  q667,
  q668,
  q669,
  q6610,
  q6611,
  q6612,
  q6613
  )

Most Students are not on a Moodle course very long. The average session length was about 13.5 minutes while about 73% of Moodle sessions were under 10 minutes.

Note - These logs only capture how long students spent on one Moodle course. If a student were to leave one of the Moodle courses used in this analysis and spend time in one of their other Moodle courses, it will not be tracked. This means that we can assume that students are likely spending the average 13.5 minutes in their other Moodle courses as well.

Note - A Moodle session’s length is defined as the time from the first action taken in a session to the last action taken. If there is more than an hour between a student’s last two actions, the current session ends and a new session begins. An hour was chosen because students may be spending a long time viewing a single resource in a module, creating no new log files despite them being on Moodle for an extended period of time.

#gather sums
q62 <- q61 %>%
  group_by(sessionid) %>%
  summarise(start = max(Time), end = min(Time)) %>%
  mutate(sessionlength = difftime(start, end, units = "mins")) %>%
  filter(sessionlength < 300) #removes a few extremely long sessions created by moodle admin functions

#find percent less than 10 minutes 
percentless10 <- q62 %>%
  mutate(less10 = if_else(as.numeric(sessionlength) <= 10, "yes", "no")) %>%
  group_by(less10) %>%
  summarise( percent = 100 * n() / nrow(q62))


q62quantile <- quantile(as.numeric(q62$sessionlength), 0.7326612)

ggplot(q62, aes(x = as.numeric(sessionlength))) + 
  geom_histogram(binwidth = 5, color = "white", fill="#c9401e") +
  scale_x_continuous(breaks = pretty_breaks(n = 15)) +
  theme_clean() +
  geom_vline(xintercept=q62quantile, color="blue", linetype="dashed", size=1) +
  geom_text(aes(x=q62quantile + 5, label=paste(round(percentless10[2,2],1),"% of Sessions"), y=4000), colour="blue", angle=90) +
  labs(x = "Session Length (Minutes)",
       y = "Count of Sessions",
       title = "How Long Students Were On Moodle",
       subtitle = paste(round(percentless10[2,2],1), "% of Moodle Sessions are Under 10 Minutes"))

Here, sessions less than 10 minutes are removed to get a closer look at the medium to long length sessions. The highest two amounts were 20 and 25 minutes sessions. After reaching 60 minute sessions, the number of sessions longer than 60 minutes decreases rapidly. This suggests that when students are doing work on Moodle, most will either switch to a working on a different course or stop using Moodle after about 60 minutes during long sessions.

# removes quick hop ons that are less than 10 minutes
q63 <- q62 %>%
  filter(sessionlength > 10 & sessionlength < 300) #removes a few extremely long sessions created by moodle admin functions 

ggplot(q63, aes(x=sessionlength)) + 
  geom_histogram(binwidth = 5, color = "White", fill="#c9401e") +
  scale_x_continuous(breaks = pretty_breaks(n = 15)) +
  theme_clean() +
  labs(x = "Session Length (Minutes)",
       y = "Count of Sessions",
       title = "How Long Students Were On Moodle",
       subtitle = "Sessions Under 10 Minutes Removed")

Returning to the time and week visualization with amount of sessions, we can see when students are having short and long Moodle sessions.

On Monday, Tuesday, and Wednesday there is a similar amount of long Moodle sessions after classes. On Sunday, long Moodle sessions start about 12:00 and continue until 01:00 on Monday.

With the short sessions, one interesting point is that there is a large amount of late night short sessions, likely checking due dates or assignments due in next class.

#under 10 minute sessions
q64 <- q62 %>%
  filter(sessionlength < 10) %>%
  mutate(roundedstart = format(round_date(start,unit="hour"),"%H:%M"),
         weekday = wday(start, label=TRUE))

#over 10 minute sessions
q65 <- q62 %>%
  filter(sessionlength > 10) %>%
  mutate(roundedstart = format(round_date(start,unit="hour"),"%H:%M"),
         weekday = wday(start, label=TRUE))
#under 10 minute sessions
q64heatmap <- q64 %>%
  group_by(weekday, roundedstart) %>%
  summarise(count = n())

#over 10 minute sessions
q65heatmap <- q65 %>%
  group_by(weekday, roundedstart) %>%
  summarise(count = n())

## OLD COLORS =   scale_fill_gradient(low = "#d8e1cf", high = "#438484", na.value = "grey50") 
ggplot(q65heatmap, aes(roundedstart, weekday)) + geom_tile(aes(fill = count),colour = "white", na.rm = TRUE) +
  scale_fill_gradient_tableau() +
  guides(fill=guide_legend(title="Total Sessions")) +
  theme_minimal() +
  labs(title = "Over 10 Minute Moodle Sessions by Day of Week and Hour", x = "Submissions Per Hour", y = "Day of Week") +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.y = element_text(face="bold"), axis.text.x = element_text(face="bold"))  +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 10)) +
  geom_text(aes(label = count), size = 3)

ggplot(q64heatmap, aes(roundedstart, weekday)) + geom_tile(aes(fill = count),colour = "white", na.rm = TRUE) +
  scale_fill_gradient_tableau() +
  guides(fill=guide_legend(title="Total Sessions")) +
  theme_minimal() +
  labs(title = "Under 10 Minute Moodle Sessions by Day of Week and Hour", x = "Submissions Per Hour", y = "Day of Week") +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.y = element_text(face="bold"), axis.text.x = element_text(face="bold"))  +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 10)) +
  geom_text(aes(label = count), size = 3)

Looking at the average by class, there is quite a difference between classes. The high differences suggest that class structure has an impact on the length of Moodle sessions as well. I would expect classes that commonly have quizzes on Moodle or have a high number of weekly modules have longer Moodle sessions on average due to those activities keeping students on Moodle for longer.

# most of this data is already shown in the other visuals - probably not needed, keeping her for future use
q66 <- q61 %>%
  group_by(userid, sessionid) %>%
  summarise(start = max(Time), end = min(Time)) %>%
  mutate(sessionlength = difftime(start, end, units = "mins")) %>%
  filter(sessionlength < 500) %>% #removes a few extremely long sessions created by moodle admin function
  group_by(userid) %>%
  summarise(avg = mean(sessionlength))

## average per class
q67 <- q61 %>%
  group_by(class, sessionid) %>%
  summarise(start = max(Time), end = min(Time)) %>%
  mutate(sessionlength = difftime(start, end, units = "mins")) %>%
  filter(sessionlength < 300) %>%#removes a few extremely long sessions created by moodle admin function
  group_by(class) %>%
  summarise(avg = mean(sessionlength))

ggplot(q67, aes(x = reorder(class, avg), y = avg, fill = class)) + 
  geom_col() +
  theme_clean() +
  guides(fill=guide_legend(title="Class")) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10)) +
  labs(x = "Class Name",
       y = "Average Moodle Session Length (Minutes)",
       title = "Average Length of Moodle Session For Each Class")

Students visited their Moodle course on average 1.8 times a day. Since the logs only capture student activity for one course, we can assume students are checking each of the classes they need to 1.8 times a day.

##gathers sessions per day
q69 <- q61 %>%
  group_by(userid, sessionid) %>%
  summarise(start = max(Time), end = min(Time)) %>%
  mutate(sessionlength = difftime(start, end, units = "mins")) %>%
  filter(sessionlength < 300) %>% #removes a few extremely long sessions created by moodle admin function
  mutate(day = day(start),
         month = month(start)) %>%
  ungroup() %>%
  group_by(day, month, userid) %>%
  summarise(sessions = n())

ggplot(q69, aes(x=as.factor(sessions))) + 
  geom_bar(fill="#c9401e") +
  theme_clean() +
  labs(x = "Number of Sessions Per Day",
       y = "Count",
       title = "How Many Times Students Visited Moodle Each Day",
       subtitle = paste("Average = ", round(mean(q69$sessions),1), "Sessions Per Day"))

Checklist Module Analysis

#filters to only checklists and removes some unneeded events and removes instructor from the data
checklist <- class8 %>%
  filter(Component == 'Checklist' & userid != 6989 & !(Eventname %in% c("Edit page viewed", 
                                                       "Report viewed", 
                                                       "Course module instance list viewed",
                                                       "Checklist complete",
                                                       "Teacher checks updated"))) %>% #edit page and report are done by instructor only
  group_by(week, Eventname) %>%
  summarise(count = n())

#filters to only checklists and removes some unneeded events and removes instructor from the data
checklist2 <- class9 %>%
  filter(Component == 'Checklist' & userid != 6989 & !(Eventname %in% c("Edit page viewed",
                                                       "Report viewed",
                                                       "Course module instance list viewed",
                                                       "Checklist complete",
                                                       "Teacher checks updated" ))) %>% #edit page and report are done by instructor only
  group_by(week, Eventname) %>%
  summarise(count = n())

#join
checklist3 <- bind_rows(class9, class8) %>%
  filter(Component == 'Checklist' & userid != 6989 & !(Eventname %in% c("Edit page viewed",
                                                       "Report viewed",
                                                       "Course module instance list viewed",
                                                       "Checklist complete",
                                                       "Teacher checks updated"))) %>% #edit page and report are done by instructor only
  group_by(userid) %>%
  summarise(count = n())

Overall, the new checklist plug-in modules received a good amount of usage in both of the classes they were used in. In each class, the amount of times students looked at checklists seems to relate to how often they actually “checked” activities.

Both classes were very “up and down” in their usage but I assume this is just due to fluctuations in workload each week leading to less checks needing to be made for the weekly list.

ggplot(checklist, aes(x=week, y = count, color = Eventname)) + 
  geom_line(size = 2) +
  labs(x = "Week",
       y = "Number of Occurances",
       title ="Class 1 Checklist Activity",
       color = "Event Name") + 
  theme_minimal()

ggplot(checklist2, aes(x=week, y = count, color = Eventname)) + 
  geom_line(size = 2)  +
  labs(x = "Week",
       y = "Number of Occurances",
       title ="Class 2 Checklist Activity",
       color = "Event Name") + 
  theme_minimal()

Although there were students that almost completely ignored the checklists, most used the checklists. The average number of views + checks for a student was almost 30 and a decent number of students used the checklist extensively.

ggplot(checklist3, aes(x=reorder(userid, count), y = count)) + 
  geom_point(color = "red", size = 4) + 
  coord_flip() + 
  scale_y_continuous(breaks = pretty_breaks(n = 8)) +
  theme_minimal() +
  geom_hline(yintercept=mean(checklist3$count), color="blue", linetype="dashed", size=1)  +
  geom_text(aes(x=10, label=paste("Average: ", round(mean(checklist3$count),1)), y=30), colour="blue", angle=90) +
  labs(x="Student",
       y = "Total Views + Checks",
       title = "Number of Checklist Views + Checks Made by Each Student")

Looking at the weekly view, most students had around 5 views + checks each week. Every week has some students using the checklist at high numbers and some barely using it in the same week. I think this suggests that the usage of the checklist is dependent on how much the student wants to use the checklist rather than the number of tasks on the checklists. Some will always check off all of their checklist while some will check activities off from time to time.

We do see that the usage of the checklists trends down slightly over the course of the semester however it is not by a significant amount.

#gathers checklist data again
checklist4 <- bind_rows(class9, class8) %>%
  filter(Component == 'Checklist' & userid != 6989 & !(Eventname %in% c("Edit page viewed",
                                                       "Report viewed",
                                                       "Course module instance list viewed",
                                                       "Checklist complete",
                                                       "Teacher checks updated"))) %>%
  group_by(week, userid) %>%
  summarise(count = n()) 

  

#activity did trend down but not by a lot
ggplot(checklist4, aes(x=week, y=count)) + 
  geom_jitter(width = 0.1, alpha=0.5, color = "red", size = 2) + 
  geom_smooth(se = FALSE, method = "loess") + 
  theme_clean() +
  labs(x="Week",
       y = "Views + Checks",
       title = "Number of Checklist Views + Checks Made by Each Student Weekly")

Here is the usage of each student by week. There is a variety of patterns on display here. The checklist usage for some students was very volatile and for others it stayed the same over the weeks. Some students also used the checklist less as the semester went on and some used it more.

Unfortunately there is not enough data to fully investigate why there are so many different patterns of usage, but checklists were used considerably by students.

Note - Graphs with no line are students that used the checklist only 0 or 1 times.

#facet for each students activity
#blank graphs only used the checklist once
ggplot(checklist4, aes(x=week, y=count)) + 
  geom_line(size = 1.25) + 
  geom_smooth(se = FALSE, method = lm) + 
  facet_wrap(~userid)

Perusall Module Analysis

#gathers only perusall data and and views
p1 <- class8 %>%
  filter(Component == 'External tool' & Eventname == 'Course module viewed' & Eventcontext != "Other") %>%
  mutate(Eventcontext = str_replace(Eventcontext, "External tool: Perusall", 
                                    "External tool: Perusall a"))

#gathers only perusall data and and views
p2 <- class9 %>%
  filter(Component == 'External tool' & Eventname == 'Course module viewed' & Eventcontext != "Other") %>%
  mutate(Eventcontext = str_replace(Eventcontext, "External tool: Perusall", 
                                    "External tool: Perusall b"))

#join data
p <- bind_rows(p1, p2)

There were two classes that used Perusall in this dataset. The first class had a more balanced amount of activity over the weeks while the second class had a much greater amount of activity in the second half of this dataset.

#sum of views for each week
p3 <-  p %>% 
  filter(Eventname == "Course module viewed") %>%
  group_by(class, week, Eventcontext) %>%
  summarise(count = n())

ggplot(p3, aes(x = week, y = count, fill = class)) + 
  geom_col() + 
  facet_wrap(~class) +
  theme_clean() +
  labs(x = "Week",
       y = "Number of Views",
       Title = "Perusall Module Views by Week") +
  guides(fill=guide_legend(title="Class"))

The views per student of each Perusall module was fairly spread out. Almost every Perusall module had at least 1 view per student and the average views per student was 2.3.

## views per student for each module
p4 <- p %>% 
  group_by(class, numStudents, Eventcontext) %>%
  summarise(count = n()) %>%
  mutate(Viewsperstudent = count / numStudents)

# anonymized assignment names
p4$names <- fct_anon(as.factor(p4$Eventcontext), prefix = "Reading ")

ggplot(p4, aes(x = reorder(names, Viewsperstudent), y = Viewsperstudent, fill = class)) +
  geom_col() +
  coord_flip() +
  geom_hline(yintercept=mean(p4$Viewsperstudent), color="blue", linetype="dashed", size=1)  +
  geom_text(aes(x=10, label=paste("Average: ", round(mean(p4$Viewsperstudent),1)), y= mean(p4$Viewsperstudent) + 0.2), colour="blue", angle=90) +
  theme_clean() +
  labs(x = "Views Per Student",
       y = "Perusall Module Name",
       title = "Views Per Student for Each Perusall Module") +
  guides(fill=guide_legend(title="Class"))

Perusall modules were viewed by each student an average of 2.9 times. This is positive as noted previously, students viewed URL modules 0.7 times on average and file modules 1.2 times on average. The Perusall module outperformed both of those module types significantly.

#mean views per student
p5 <- p %>%   
  group_by(class, userid, Eventcontext) %>%
  summarise(count = n()) %>%
  ungroup() %>%
  group_by(class, Eventcontext) %>%
  summarise(AVGStudentVisits = mean(count)) 

# anonymized assignment names
p5$names <- fct_anon(as.factor(p5$Eventcontext), prefix = "Reading ")

ggplot(p5, aes(x = reorder(names, AVGStudentVisits), y = AVGStudentVisits, fill = class)) + 
  geom_col() + 
  coord_flip() +
  geom_hline(yintercept=mean(p5$AVGStudentVisits), color="blue", linetype="dashed", size=1)  +
  geom_text(aes(x=10, label=paste("Average: ", round(mean(p5$AVGStudentVisits),1)), y= mean(p5$AVGStudentVisits) + 0.2), colour="blue", angle=90) +
  theme_clean() +
  labs(x = "Perusall Module Name",
       y = "Average Number of Times Viewed by Students",
       title = "Average Number of Times Each Student Viewed Perusall Module") +
  guides(fill=guide_legend(title="Class"))

Overall Comparisons Between Test Classes and Regular Classes

#course views per student
compare <- alldata %>%
  filter(Eventname == "Course viewed") %>%
  group_by(testgroup, class, numStudents) %>%
  summarise(count = n()) %>%
  mutate(countstudentratio = count / numStudents) 

#simple table for viewing
comparedatatable <- compare %>% 
  ungroup() %>%
  group_by(testgroup) %>%
  summarise(median = median(countstudentratio), avg = mean(countstudentratio))

While only the Perusall and Checklist plug-ins were captured by the logs, these plug-ins combined with the plug-ins that could not be captured by the logs did seem to have an effect on the test group of classes.

Comparing the views per student of the test classes and the regular classes, the test classes had more views per student. The regular classes had an average of 127 views per student and the test classes had an average of 141 views per student, an increase of 14 views per student.

While we do not have enough data here to confirm that the increase was the result of the new plug-ins rather than normal class variation, having an increase in Moodle activity in the test classes is good news for the effectiveness of the plug-ins.

ggplot(compare, aes(x=reorder(class, countstudentratio), y = countstudentratio, fill = as.factor(testgroup))) + 
  geom_col() +
  labs(x = "Class", 
       y = "Course Views Per Student", 
       title = "Course Views Per Student For Each Class") +
  theme_clean() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10)) +
  geom_text(aes(label = round(countstudentratio, 1)), vjust = -0.75, size = 3) +
  guides(fill=guide_legend(title="Test Group Label"))

comparedatatable

## # A tibble: 2 x 3
##   testgroup median   avg
## *     <dbl>  <dbl> <dbl>
## 1         0   124.  127.
## 2         1   151.  141.