#Here, I have made up grades for each student in order to run the script in the absensence of having only the analytics data.
load("analytics_raw.rds")
Last <- analytics_raw %>%
select(c(`User ID`, `Sortable Name`)) %>%
separate(`Sortable Name`, into = c("Last", NA)) %>%
unique()
load("X241_spring19.rds")
X241_spring19_x <- X241_spring19 %>%
rename("T1" = `Exam 1`, "T2" = `Exam 2`, "T3" = `Exam 3`, "T4" = `Exam 4`) %>%
select(c(Name, T1, T2, T3, T4, Final)) %>%
mutate(Final = as.numeric(Final),
T1 = as.numeric(T1),
T2 = as.numeric(T2),
T3 = as.numeric(T3),
T4 = as.numeric(T4)) %>%
mutate(Final = Final/2) %>%
drop_na() %>%
separate(Name, into = c("Last", NA)) %>%
left_join(Last, by = "Last") %>%
select(-Last) %>%
gather(T1, T2, T3, T4, Final, key = "Exam", value = "Grade" ) %>%
mutate(Exam = Exam %>% fct_relevel("T1", "T2", "T3", "T4", "Final", "avg"))
#Here, I have made up grades for each student in order to run the script in the absensence of having only the analytics data.
analytics_full <- analytics_raw %>%
left_join(X241_spring19_x, by = "User ID")
ggplot(data = X241_spring19_x) +
geom_boxplot(mapping = aes(x = Exam, y = Grade, fill = Exam)) +
geom_jitter(mapping = aes(x = Exam, y = Grade, color = Exam), alpha = .3) +
theme(panel.background = element_blank(),
axis.line = element_line(),
legend.position="none") +
coord_cartesian(ylim = c(0:100)) +
labs(y = "Grade (%)")
It would be cool to then later ID kids as “video watchers” and “non-video watchers” and then split the fill = video_vatching_status se that T1, T2, etc. grades can be seen for those who do and don’t watch videos
test_test <- analytics_raw %>%
select(`User ID`, `Sortable Name`, `Title`, `Views`, `Last Access`, `First Access`) %>%
mutate(Category =
if_else(str_detect(Title, "T5 PRACTICE") |
str_detect(Title, "Test 2") |
str_detect(Title, "T4.pdf") |
str_detect(Title, "Test 1") |
str_detect(Title, "Test 3"), "Test/Key",
if_else(str_detect(Title, "#") |
str_detect(Title, "^Q") |
str_detect(Title, "T.Q"), "Videos", "Other"
)
),
`Last Access` = ymd_hms(`Last Access`),
`First Access` = ymd_hms(`First Access`),
Ex_num = if_else(`Last Access` < ymd_hms("2019-01-31 09:15:00"), "1", if_else(
`Last Access` < ymd_hms("2019-02-28 09:15:00"), "2", if_else(
`Last Access` < ymd_hms("2019-03-28 09:15:00"), "3", if_else(
`Last Access` < ymd_hms("2019-04-18 09:15:00"), "4", "Final"
)
)
)
)) %>%
arrange(`Sortable Name`)
# for some reason, this is the better way to annotate ggplots with geom_text. Doing so othrwise makes the labels print twice and appear blurry.
####### T1 vid, Test/answer, other----
df_T1_x_vid <- tibble(
`Sortable Name` = unique(filter(test_test, Ex_num == 1 & Category == "Videos")$`Sortable Name`),
T1_vid = tabulate(with(filter(test_test, Ex_num == 1 & Category == "Videos"), interaction(`Sortable Name`)))
)
df_T1_x_TK <- tibble(
`Sortable Name` = unique(filter(test_test, Ex_num == 1 & Category == "Test/Key")$`Sortable Name`),
T1_TK = tabulate(with(filter(test_test, Ex_num == 1 & Category == "Test/Key"), interaction(`Sortable Name`)))
)
df_T1_x_other <- tibble(
`Sortable Name` = unique(filter(test_test, Ex_num == 1 & Category == "Other")$`Sortable Name`),
T1_other = tabulate(with(filter(test_test, Ex_num == 1 & Category == "Other"), interaction(`Sortable Name`)))
)
####### T2 vid, Test/answer, other----
df_T2_x_vid <- tibble(
`Sortable Name` = unique(filter(test_test, Ex_num == 2 & Category == "Videos")$`Sortable Name`),
T2_vid = tabulate(with(filter(test_test, Ex_num == 2 & Category == "Videos"), interaction(`Sortable Name`)))
)
df_T2_x_TK <- tibble(
`Sortable Name` = unique(filter(test_test, Ex_num == 2 & Category == "Test/Key")$`Sortable Name`),
T2_TK = tabulate(with(filter(test_test, Ex_num == 2 & Category == "Test/Key"), interaction(`Sortable Name`)))
)
df_T2_x_other <- tibble(
`Sortable Name` = unique(filter(test_test, Ex_num == 2 & Category == "Other")$`Sortable Name`),
T2_other = tabulate(with(filter(test_test, Ex_num == 2 & Category == "Other"), interaction(`Sortable Name`)))
)
####### T3 vid, Test/answer, other----
df_T3_x_vid <- tibble(
`Sortable Name` = unique(filter(test_test, Ex_num == 3 & Category == "Videos")$`Sortable Name`),
T3_vid = tabulate(with(filter(test_test, Ex_num == 3 & Category == "Videos"), interaction(`Sortable Name`)))
)
df_T3_x_TK <- tibble(
`Sortable Name` = unique(filter(test_test, Ex_num == 3 & Category == "Test/Key")$`Sortable Name`),
T3_TK = tabulate(with(filter(test_test, Ex_num == 3 & Category == "Test/Key"), interaction(`Sortable Name`)))
)
df_T3_x_other <- tibble(
`Sortable Name` = unique(filter(test_test, Ex_num == 3 & Category == "Other")$`Sortable Name`),
T3_other = tabulate(with(filter(test_test, Ex_num == 3 & Category == "Other"), interaction(`Sortable Name`)))
)
####### T4 vid, Test/answer, other----
df_T4_x_vid <- tibble(
`Sortable Name` = unique(filter(test_test, Ex_num == 4 & Category == "Videos")$`Sortable Name`),
T4_vid = tabulate(with(filter(test_test, Ex_num == 4 & Category == "Videos"), interaction(`Sortable Name`)))
)
df_T4_x_TK <- tibble(
`Sortable Name` = unique(filter(test_test, Ex_num == 4 & Category == "Test/Key")$`Sortable Name`),
T4_TK = tabulate(with(filter(test_test, Ex_num == 4 & Category == "Test/Key"), interaction(`Sortable Name`)))
)
df_T4_x_other <- tibble(
`Sortable Name` = unique(filter(test_test, Ex_num == 4 & Category == "Other")$`Sortable Name`),
T4_other = tabulate(with(filter(test_test, Ex_num == 4 & Category == "Other"), interaction(`Sortable Name`)))
)
####### Final vid, Test/answer, other----
df_final_x_vid <- tibble(
`Sortable Name` = unique(filter(test_test, Ex_num == "Final" & Category == "Videos")$`Sortable Name`),
final_vid = tabulate(with(filter(test_test, Ex_num == "Final" & Category == "Videos"), interaction(`Sortable Name`)))
)
df_final_x_TK <- tibble(
`Sortable Name` = unique(filter(test_test, Ex_num == "Final" & Category == "Test/Key")$`Sortable Name`),
final_TK = tabulate(with(filter(test_test, Ex_num == "Final" & Category == "Test/Key"), interaction(`Sortable Name`)))
)
df_final_x_other <- tibble(
`Sortable Name` = unique(filter(test_test, Ex_num == "Final" & Category == "Other")$`Sortable Name`),
final_other = tabulate(with(filter(test_test, Ex_num == "Final" & Category == "Other"), interaction(`Sortable Name`)))
)
######----
usage <- test_test %>% select(`Sortable Name`) %>%
unique() %>%
left_join(df_T1_x_vid, by = "Sortable Name") %>%
left_join(df_T1_x_TK, by = "Sortable Name") %>%
left_join(df_T1_x_other, by = "Sortable Name") %>%
left_join(df_T2_x_vid, by = "Sortable Name") %>%
left_join(df_T2_x_other, by = "Sortable Name") %>%
left_join(df_T2_x_TK, by = "Sortable Name") %>%
left_join(df_T3_x_vid, by = "Sortable Name") %>%
left_join(df_T3_x_other, by = "Sortable Name") %>%
left_join(df_T3_x_TK, by = "Sortable Name") %>%
left_join(df_T4_x_vid, by = "Sortable Name") %>%
left_join(df_T4_x_other, by = "Sortable Name") %>%
left_join(df_T4_x_TK, by = "Sortable Name") %>%
left_join(df_final_x_vid, by = "Sortable Name") %>%
left_join(df_final_x_other, by = "Sortable Name") %>%
left_join(df_final_x_TK, by = "Sortable Name") %>%
gather(T1_vid, T1_TK, T1_other,
T2_vid, T2_TK, T2_other,
T3_vid, T3_TK, T3_other,
T4_vid, T4_TK, T4_other,
final_vid, final_TK, final_other,
key = view_type, value = count) %>% arrange(`Sortable Name`) %>% separate(view_type, into = c("test", "type")) %>%
mutate(count = replace_na(count, 0)) %>%
mutate(test = test %>% fct_relevel("T1", "T2", "T3", "T4", "final")) %>%
mutate(type = type %>% fct_relevel("vid", "TK", "other"))
####----
ggplot(data = usage) +
geom_boxplot(mapping = aes(x = test, y = count, fill = type)) +
theme_classic() +
labs(y = "Views", x = "View Type (by date viewed)")
################################################
Similar to boxplots but more sensitive to outliers. However, I feel like it does a better job of visualising trends in use than do the box plots due to a large number of people accumulating 0 clicks
Here, I want to explore the relationship between how far out you use the videos and your score.
Also plotly would be really nice here. Need to adjust the label output though, right now it is displaying it in raw seconds
graph_2_labs <- tibble(
x = c(ymd_hms("2019-02-01 09:15:00") + minutes(60*24*6),
ymd_hms("2019-03-01 09:15:00") + minutes(60*24*6),
ymd_hms("2019-03-29 09:15:00") + minutes(60*24*6),
ymd_hms("2019-04-19 09:15:00") + minutes(60*24*6),
ymd_hms("2019-05-3 09:15:00") + minutes(60*24*4)),
y = 250,
label = c("Exam 1",
"Exam 2",
"Exam 3",
"Exam 4",
"Final")
)
freq_dt <- test_test %>% filter(Category != "Other") %>% select(`Sortable Name`, `Last Access`) %>% unique()
ggplot(freq_dt) +
geom_freqpoly(aes(x = `Last Access`), binwidth = 86400, color = "red", size = .8) + # binwidth = on day in seconds
theme_classic() +
geom_vline(xintercept = as.numeric(c(ymd_hms("2019-02-01 09:15:00",
"2019-03-01 09:15:00",
"2019-03-29 09:15:00",
"2019-04-19 09:15:00",
"2019-05-3 09:15:00")))) + #These are the dates of one day after each exam
geom_text(data = graph_2_labs, aes(x = x, y = y, label = label)) +
labs(y = "Views", x = "Date")
get ready
corr_names <- analytics_raw %>% select(`User ID`, `Sortable Name`) %>% unique()
tot_x_vid <- tibble(
`Sortable Name` = unique(filter(test_test, Category == "Videos")$`Sortable Name`),
TOTAL_vid = tabulate(with(filter(test_test, Category == "Videos"), interaction(`Sortable Name`)))
)
tot_x_TK <- tibble(
`Sortable Name` = unique(filter(test_test, Category == "Test/Key")$`Sortable Name`),
TOTAL_TK = tabulate(with(filter(test_test, Category == "Test/Key"), interaction(`Sortable Name`)))
)
corr <- X241_spring19 %>%
rename("T1" = `Exam 1`, "T2" = `Exam 2`, "T3" = `Exam 3`, "T4" = `Exam 4`) %>%
select(c(Name, T1, T2, T3, T4, Final, Percent)) %>%
mutate(Final = as.numeric(Final),
T1 = as.numeric(T1),
T2 = as.numeric(T2),
T3 = as.numeric(T3),
T4 = as.numeric(T4),
Percent = as.numeric(Percent)) %>%
mutate(Final = Final/2) %>%
drop_na() %>%
separate(Name, into = c("Last", NA)) %>%
left_join(Last, by = "Last") %>%
left_join(corr_names, by = "User ID") %>%
select(-c(Last, `User ID`)) %>%
left_join(df_T1_x_vid, by = "Sortable Name") %>%
left_join(df_T1_x_TK, by = "Sortable Name") %>%
left_join(df_T2_x_vid, by = "Sortable Name") %>%
left_join(df_T2_x_TK, by = "Sortable Name") %>%
left_join(df_T3_x_vid, by = "Sortable Name") %>%
left_join(df_T3_x_TK, by = "Sortable Name") %>%
left_join(df_T4_x_vid, by = "Sortable Name") %>%
left_join(df_T4_x_TK, by = "Sortable Name") %>%
left_join(df_final_x_vid, by = "Sortable Name") %>%
left_join(df_final_x_TK, by = "Sortable Name") %>%
left_join(tot_x_vid, by = "Sortable Name") %>%
left_join(tot_x_TK, by = "Sortable Name") %>%
na_replace(0) %>%
mutate(T1_tot_resource = T1_vid + T1_TK,
T2_tot_resource = T2_vid + T2_TK,
T3_tot_resource = T3_vid + T3_TK,
T4_tot_resource = T4_vid + T4_TK,
final_tot_resource = final_vid + final_TK,
TOTAL = TOTAL_vid + TOTAL_TK)
### Making lm, extracting values, and assessing for transformation
PLOTS <- function(x, y, dataset) {
lm <- lm(y ~ x)
lm
p_val <- round(summary(lm)$coefficients[2,4], 3)
r_sq <- round(summary(lm)$r.squared, 3)
ad_r_sq <- summary(lm)$adj.r.squared
intercept <- round(summary(lm)$coefficients[1, 1], 3)
slope <- round(summary(lm)$coefficients[2, 1], 3)
label_1 <- paste('y = ', slope,' * x + ', intercept, '; R^2 = ', r_sq, '; p = ', p_val, sep='')
#boxcox(lm)
### Plotting graph and residuals
graph_3_labs <- tibble(
x_1 = (min(x) + (max(x)/2 - min(x)/2)),
y_1 = 100,
label_2 = paste('y = ', slope,' * x + ', intercept, '\n', 'R^2 = ', r_sq, '; p = ', p_val, sep='')
)
p2 <- ggplot(dataset, aes(x = x, y = y)) +
geom_jitter() +
stat_smooth(method = "lm") +
theme_classic() +
geom_text(data = graph_3_labs, aes(x = x_1, y = y_1, label = label_2), col = "red") +
labs(x = "Clicks", y = "Grade") +
coord_cartesian(xlim = c(0 : max(x)), ylim = 0 : 100)
res_plot_labs <- tibble(
x_2 = (min(x) + (max(x)/2 - min(x)/2)),
y_2 = 50,
label_3 = paste("Shapiro-Wilk p: ", round(shapiro.test(summary(lm)$residuals)$p.val, 3), sep = "")
)
p3 <- ggplot(dataset, aes(x = x, y = summary(lm)$residuals)) +
geom_jitter() +
geom_hline(yintercept = 0) +
theme_classic() +
geom_text(data = res_plot_labs, aes(x = x_2, y = y_2, label = label_3), col = "red") +
labs(x = "Clicks", y = "Residuals") +
coord_cartesian(xlim = c(0 : max(x)))
grid.arrange(p2, p3, nrow = 1)
}
PLOTS(corr$TOTAL, corr$Percent, corr)
PLOTS(corr$TOTAL_vid, corr$Percent, corr)
PLOTS(corr$TOTAL_TK, corr$Percent, corr)
PLOTS(corr$T1_tot_resource, corr$T1, corr)
PLOTS(corr$T1_vid, corr$T1, corr)
PLOTS(corr$T1_TK, corr$T1, corr)
PLOTS(corr$T2_tot_resource, corr$T2, corr)
PLOTS(corr$T2_vid, corr$T2, corr)
PLOTS(corr$T2_TK, corr$T2, corr)
PLOTS(corr$T3_tot_resource, corr$T3, corr)
PLOTS(corr$T3_vid, corr$T3, corr)
PLOTS(corr$T3_TK, corr$T3, corr)
PLOTS(corr$T4_tot_resource, corr$T4, corr)
PLOTS(corr$T4_vid, corr$T4, corr)
PLOTS(corr$T4_TK, corr$T4, corr)
PLOTS(corr$final_tot_resource, corr$Final, corr)
PLOTS(corr$final_vid, corr$Final, corr)
PLOTS(corr$final_TK, corr$Final, corr)
This is a plot of Total views vs. course grade for only the IQR
In other words, I chopped off the top scorers and the very bottom of the class to look at the relationship. As you can se, p-value shoots up and the trend disappears
av_bear_corr <- corr %>% filter(Percent > (mean(Percent) - sd(Percent)) &
Percent < (mean(Percent) + sd(Percent)))
lm_2 <- lm(Percent ~ TOTAL, data = av_bear_corr)
p_val_2 <- round(summary(lm_2)$coefficients[2,4], 3)
r_sq_2 <- round(summary(lm_2)$r.squared, 3)
ad_r_sq_2 <- summary(lm_2)$adj.r.squared
intercept_2 <- round(summary(lm_2)$coefficients[1, 1], 3)
slope_2 <- round(summary(lm_2)$coefficients[2, 1], 3)
graph_avg_labs <- tibble(
x_1 = 30,
y_1 = 100,
label_2 = paste('y = ', slope_2,' * x + ', intercept_2, '; R^2 = ', r_sq_2, '; p = ', p_val_2, sep='')
)
ggplot(av_bear_corr, aes(x = TOTAL, y = Percent)) +
geom_jitter() +
stat_smooth(method = "lm") +
theme_classic() +
geom_text(data = graph_avg_labs, aes(x = x_1, y = y_1, label = label_2), col = "red") +
labs(x = "Clicks", y = "Grade")
p_vals_var <-c()
n <- 0
for (i in min(corr$TOTAL) + 20 : max(corr$TOTAL) - 20){
corr_bin <- corr %>%
mutate(viewer_type = ifelse(
TOTAL > i,
"High",
"Low"
))
var_p <- var.test(filter(corr_bin, viewer_type == "High")$Percent,
filter(corr_bin, viewer_type == "Low")$Percent)$p.val
ifelse(
var_p > .05,
t_p <- t.test(filter(corr_bin, viewer_type == "High")$Percent,
filter(corr_bin, viewer_type == "Low")$Percent,
var.equal = TRUE)$p.val,
t_p <- t.test(filter(corr_bin, viewer_type == "High")$Percent,
filter(corr_bin, viewer_type == "Low")$Percent,
var.equal = FALSE)$p.val
)
n <- n + 1
p_vals_var <- c(p_vals_var, t_p)
}
newtest <- tibble(cutoff = c(min(corr$TOTAL) + 20 : max(corr$TOTAL) - 20), p = p_vals_var)
x <- ggplot(newtest) +
geom_point(aes(x = cutoff, y = p, col = p)) +
geom_hline(yintercept = 0.05) +
theme_classic()
ggplotly(x)
corr_bin <- corr %>%
mutate(viewer_type = ifelse(
TOTAL > 19,
"High",
"Low"
))
t_bin <- round(t.test(filter(corr_bin, viewer_type == "High")$Percent,
filter(corr_bin, viewer_type == "Low")$Percent,
var.equal = TRUE)$p.val, 3)
gt <- paste("p: ", t_bin, sep = "")
bin_labs <- tibble(
x = 1.5,
y = 90,
label = paste("p: ", t_bin, sep = "")
)
ggplot(corr_bin) +
geom_boxplot(aes(x = viewer_type, y = Percent, fill = viewer_type)) +
geom_jitter(aes(x = viewer_type, y = Percent, fill = viewer_type)) +
geom_text(data = bin_labs, aes(x = x, y = y, label = label), col = "red") +
theme_classic()
var.test(filter(corr_bin, viewer_type == "High")$Percent,
filter(corr_bin, viewer_type == "Low")$Percent)$p.val
## [1] 0.42679