raw_data <- read_excel("raw-data.xlsx") %>%
  clean_names()

Q1

Q1: removing stopwords

Q1 response (Q: Having gone through these mastery problems, do you think that someone who was skilled with these competencies could get 100% for this Level 1: Free Body Diagrams cluster on their first try? Why or why not?)

news_dfm <- dfm(raw_data$q1_response_q_having_gone_through_these_mastery_problems_do_you_think_that_someone_who_was_skilled_with_these_competencies_could_get_100_percent_for_this_level_1_free_body_diagrams_cluster_on_their_first_try_why_or_why_not,
                remove_punct = TRUE, stem = TRUE,
                remove = stopwords("english"))

news_dfm <- news_dfm[ntoken(news_dfm) > 0,]

dtm <- convert(news_dfm, to = "topicmodels")

result <- FindTopicsNumber(
  dtm,
  topics = seq(from = 5, to = 20, by = 5),
  metrics = c("Griffiths2004", "CaoJuan2009", "Arun2010", "Deveaud2014"),
  method = "Gibbs",
  control = list(seed = 77),
  mc.cores = 2L,
  verbose = TRUE
)

## fit models... done.
## calculate metrics:
##   Griffiths2004... done.
##   CaoJuan2009... done.
##   Arun2010... done.
##   Deveaud2014... done.

FindTopicsNumber_plot(result)

lda <- LDA(dtm, k = 10)

ap_topics <- tidy(lda, matrix = "beta")

ap_top_terms <- ap_topics %>%
  group_by(topic) %>%
  top_n(10, beta) %>%
  ungroup() %>%
  arrange(topic, -beta)

p1 <- ap_top_terms %>%
  mutate(term = reorder_within(term, beta, topic)) %>%
  ggplot(aes(beta, term, fill = factor(topic))) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ topic, scales = "free") +
  scale_y_reordered()

p1

Q1: not removing stopwords

news_dfm <- dfm(raw_data$q1_response_q_having_gone_through_these_mastery_problems_do_you_think_that_someone_who_was_skilled_with_these_competencies_could_get_100_percent_for_this_level_1_free_body_diagrams_cluster_on_their_first_try_why_or_why_not,
                remove_punct = TRUE, stem = TRUE)

news_dfm <- news_dfm[ntoken(news_dfm) > 0,]

lda <- LDA(dtm, k = 10)

ap_topics <- tidy(lda, matrix = "beta")

ap_top_terms <- ap_topics %>%
  group_by(topic) %>%
  top_n(10, beta) %>%
  ungroup() %>%
  arrange(topic, -beta)

p1 <- ap_top_terms %>%
  mutate(term = reorder_within(term, beta, topic)) %>%
  ggplot(aes(beta, term, fill = factor(topic))) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ topic, scales = "free") +
  scale_y_reordered()

p1

Q3

Q3: removing stopwords

Question 3 (Q: After completing the previous mastery cluster (Level 1: Free Body Diagrams), two students disagree about whether or not someone who gets 100% on this cluster has mastered the competencies.

Alex: "“Once you get 100% on this cluster, you know how to identify the forces and draw a free body diagram for any object.”"

Dana: ""Even if you can correctly identify forces or draw a free body diagram for 4 situations in a cluster attempt, there are other situations that you haven’t seen before that you won’t know how to do.

Who do you agree with?)

news_dfm <- dfm(raw_data$question_3_q_after_completing_the_previous_mastery_cluster_level_1_free_body_diagrams_two_students_disagree_about_whether_or_not_someone_who_gets_100_percent_on_this_cluster_has_mastered_the_competencies_alex_once_you_get_100_percent_on_this_cluster_you_know_how_to_identify_the_forces_and_draw_a_free_body_diagram_for_any_object_dana_even_if_you_can_correctly_identify_forces_or_draw_a_free_body_diagram_for_4_situations_in_a_cluster_attempt_there_are_other_situations_that_you_havent_seen_before_that_you_wont_know_how_to_do_who_do_you_agree_with,
                remove_punct = TRUE, stem = TRUE,
                remove = stopwords("english"))

news_dfm <- news_dfm[ntoken(news_dfm) > 0,]

dtm <- convert(news_dfm, to = "topicmodels")

result <- FindTopicsNumber(
  dtm,
  topics = seq(from = 5, to = 20, by = 5),
  metrics = c("Griffiths2004", "CaoJuan2009", "Arun2010", "Deveaud2014"),
  method = "Gibbs",
  control = list(seed = 77),
  mc.cores = 2L,
  verbose = TRUE
)

## fit models... done.
## calculate metrics:
##   Griffiths2004... done.
##   CaoJuan2009... done.
##   Arun2010... done.
##   Deveaud2014... done.

FindTopicsNumber_plot(result)

lda <- LDA(dtm, k = 10)

ap_topics <- tidy(lda, matrix = "beta")

ap_top_terms <- ap_topics %>%
  group_by(topic) %>%
  top_n(10, beta) %>%
  ungroup() %>%
  arrange(topic, -beta)

p1 <- ap_top_terms %>%
  mutate(term = reorder_within(term, beta, topic)) %>%
  ggplot(aes(beta, term, fill = factor(topic))) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ topic, scales = "free") +
  scale_y_reordered()

p1

Q3: not removing stopwords

news_dfm <- dfm(raw_data$question_3_q_after_completing_the_previous_mastery_cluster_level_1_free_body_diagrams_two_students_disagree_about_whether_or_not_someone_who_gets_100_percent_on_this_cluster_has_mastered_the_competencies_alex_once_you_get_100_percent_on_this_cluster_you_know_how_to_identify_the_forces_and_draw_a_free_body_diagram_for_any_object_dana_even_if_you_can_correctly_identify_forces_or_draw_a_free_body_diagram_for_4_situations_in_a_cluster_attempt_there_are_other_situations_that_you_havent_seen_before_that_you_wont_know_how_to_do_who_do_you_agree_with,
                remove_punct = TRUE, stem = TRUE)

news_dfm <- news_dfm[ntoken(news_dfm) > 0,]

dtm <- convert(news_dfm, to = "topicmodels")

lda <- LDA(dtm, k = 10)

ap_topics <- tidy(lda, matrix = "beta")

ap_top_terms <- ap_topics %>%
  group_by(topic) %>%
  top_n(10, beta) %>%
  ungroup() %>%
  arrange(topic, -beta)

p1 <- ap_top_terms %>%
  mutate(term = reorder_within(term, beta, topic)) %>%
  ggplot(aes(beta, term, fill = factor(topic))) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ topic, scales = "free") +
  scale_y_reordered()

p1

Initial Exploration of Q1 and Q3

Q1

Q1: removing stopwords

Q1: not removing stopwords

Q3

Q3: removing stopwords

Q3: not removing stopwords