# load packages
library(tidyverse)
# source('correlation_table_plot.R') # plot_cor_table
# plot setup
theme_set(theme_classic(base_size = 10))
DATA_PATH <- "learning_quiz_dataset.csv"
raw_data <- read_csv(DATA_PATH)
tidy_data <- raw_data %>%
mutate_if(is.character, as.factor)
count(tidy_data, user_id, tag, item_type) %>%
ggplot(aes(x = n, fill = item_type)) +
geom_histogram() +
facet_grid(~ item_type ) +
ggtitle("N datapoints per user and section")
From section 1 to 2, there is no improvement in vacobualry or grammar. There looks to be a trend toward WORSE perforamnce in grammar.
prop_correct_by_user <- tidy_data %>%
group_by(item_type, user_id,section) %>%
summarize(mean_correct = mean(correct)) %>%
group_by(item_type, section) %>%
multi_boot_standard(col = "mean_correct") %>%
ungroup() %>%
mutate(section = as.factor(section))
ggplot(prop_correct_by_user, aes(x = section, y = mean, group = item_type, color = item_type)) +
geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper,)) +
geom_line()
Let’s try to see if this is due to particular question types. Below is plotted performance change from session 1 to 2 as a function of grammar question type. It looks like the over-all decrease we see above may be due to a few questions: futuro-proximo, oraciones-subordinadas, preterito-ar. In general these seem to be related to tense. These are cases where there was no data from the first session, suggesting that the decrease could be an artefact of missing data.
performance_by_grammar_tag <- tidy_data %>%
filter(item_type == "grammar") %>%
group_by(item_type, user_id,section, tag) %>%
summarize(mean_correct = mean(correct)) %>%
group_by(section, tag) %>%
multi_boot_standard(col = "mean_correct") %>%
ungroup() %>%
mutate(section = as.factor(section))
ggplot(performance_by_grammar_tag, aes(x = section, y = mean, group = tag, color = tag)) +
geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper,)) +
facet_wrap(~ tag) +
geom_line() +
theme(legend.position = "none")
performance_by_grammar_tag %>%
select(section, tag, mean) %>%
spread(section, mean) %>%
#mutate(`1` = case_when(is.na(`1`) ~ 0, TRUE ~ `1`),
# `2` = case_when(is.na(`2`) ~ 0, TRUE ~ `2`)) %>%
mutate(learning_delta = `1` - `2`,
learning_delta = round(learning_delta, 2)) %>%
arrange(learning_delta) %>%
select(tag, learning_delta) %>%
DT::datatable()
Let’s filter to cases where we have data for both section 1 and 2. If we exclude the missing data, there’s no longer a decrease in performance between the two sections, but there’s also no improvement. There’s a lot of variability across the sections, as well
prop_correct_by_user_no_na <- performance_by_grammar_tag %>%
select(section, tag, mean) %>%
spread(section, mean) %>%
drop_na() %>%
gather("section", "value", -tag) %>%
group_by(section) %>%
multi_boot_standard(col = "value") %>%
ungroup() %>%
mutate(section = as.factor(section)) %>%
mutate(group = "grammar")
ggplot(prop_correct_by_user_no_na, aes(x = section, y = mean, group = group)) +
geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
geom_line()
Are users that are good at vocabulary questiosn also good at grammar questions?
by_user_data <- tidy_data %>%
group_by(item_type, user_id) %>%
drop_na() %>%
summarize(mean_performance = mean(correct)) %>%
spread(item_type, mean_performance)
ggplot(by_user_data, aes(x = grammar, y = vocabulary)) +
geom_point() +
geom_smooth() +
geom_smooth(method = "lm", color = "red")
perofrmance_cor <- cor.test(by_user_data$grammar, by_user_data$vocabulary) %>%
tidy()
kable(perofrmance_cor)
| estimate | statistic | p.value | parameter | conf.low | conf.high | method | alternative |
|---|---|---|---|---|---|---|---|
| 0.5658013 | 40.5033 | 0 | 3484 | 0.5427989 | 0.5879554 | Pearson’s product-moment correlation | two.sided |
There’s a pretty systematic relationship between performance on grammar and vocabulary: Users that are good a one type of question are good at the other the correlation 0.57 across all users, looks linear. The red linear shows a linear fit, and the blue line shows a loess fit.