library(tidyverse)
library(lubridate)
library(tidyquant)
data <- read_csv("./multipleChoiceResponses1.csv")
Select only the columns with LearningPlatformUsefulness and remove rows where usefulness is NA. Remove LearningPlatformUsefulness from each string in learning_platform. Use count() to change the dataset to have one row per learning_platform usefulness pair with a column that is the number of entries with that pairing.
q1_result <- data %>%
select(starts_with("LearningPlatformUsefulness")) %>%
pivot_longer(
cols = everything(),
names_to = "learning_platform",
values_to = "usefulness"
) %>%
filter(!is.na(usefulness)) %>%
mutate(learning_platform = str_remove(learning_platform, "LearningPlatformUsefulness")) %>%
count(learning_platform, usefulness)
q1_result
## # A tibble: 54 × 3
## learning_platform usefulness n
## <chr> <chr> <int>
## 1 Arxiv Not Useful 37
## 2 Arxiv Somewhat useful 1038
## 3 Arxiv Very useful 1316
## 4 Blogs Not Useful 45
## 5 Blogs Somewhat useful 2406
## 6 Blogs Very useful 2314
## 7 College Not Useful 101
## 8 College Somewhat useful 1405
## 9 College Very useful 1853
## 10 Communities Not Useful 16
## # ℹ 44 more rows
Filter out rows with “Not Useful” from the learning platform.
q2_result <- q1_result %>%
group_by(learning_platform) %>%
summarise(
count = sum(n[usefulness != "Not Useful"]),
tot = sum(n),
perc_usefulness = count / tot
) %>%
arrange(desc(count))
q2_result
## # A tibble: 18 × 4
## learning_platform count tot perc_usefulness
## <chr> <int> <int> <dbl>
## 1 Kaggle 6527 6583 0.991
## 2 Courses 5945 5992 0.992
## 3 SO 5576 5640 0.989
## 4 YouTube 5125 5229 0.980
## 5 Projects 4755 4794 0.992
## 6 Blogs 4720 4765 0.991
## 7 Textbook 4112 4181 0.983
## 8 College 3258 3359 0.970
## 9 Arxiv 2354 2391 0.985
## 10 Documentation 2279 2321 0.982
## 11 Conferences 2063 2182 0.945
## 12 Friends 1530 1581 0.968
## 13 Tutoring 1394 1426 0.978
## 14 Communities 1126 1142 0.986
## 15 Podcasts 1090 1214 0.898
## 16 Newsletters 1033 1089 0.949
## 17 Company 940 981 0.958
## 18 TradeBook 324 333 0.973
Show the top 10 useful learning platforms. Show the cumulative percent of top 10 useful learning platforms (cum_pct). Other platforms will be grouped as Other.
q3_result <- q2_result %>%
select(learning_platform, count) %>%
slice_max(order_by = count, n = 10) %>%
rename(count1 = count) %>%
mutate(cum_pct = cumsum(count1) / sum(q2_result$count)) %>%
bind_rows(
tibble(
learning_platform = "Other",
count1 = sum(q2_result$count) - sum(.$count1),
cum_pct = 1.0
)
) %>%
mutate(
learning_platform = fct_reorder(learning_platform, count1, .desc = FALSE),
learning_platform = fct_relevel(learning_platform, "Other", after = 0)
)
q3_result
## # A tibble: 11 × 3
## learning_platform count1 cum_pct
## <fct> <int> <dbl>
## 1 Kaggle 6527 0.121
## 2 Courses 5945 0.230
## 3 SO 5576 0.333
## 4 YouTube 5125 0.428
## 5 Projects 4755 0.516
## 6 Blogs 4720 0.603
## 7 Textbook 4112 0.679
## 8 College 3258 0.739
## 9 Arxiv 2354 0.782
## 10 Documentation 2279 0.825
## 11 Other 9500 1
Based on the previous results, show the plotting as follows.
q3_result <- q3_result %>%
mutate(
rank = if_else(learning_platform == "Other", NA_integer_, 11 - row_number()),
label_text = str_glue("Rank: {rank}\nUseful: {scales::comma(count1)}\nCumPct: {scales::percent(cum_pct, accuracy = 0.1)}")
)
q3_result %>%
ggplot(aes(x = count1, y = learning_platform)) +
geom_segment(aes(xend = 0, yend = learning_platform),
color = "black",
linewidth = 0.5) +
geom_point(aes(size = count1),
color = "black") +
geom_label(aes(label = label_text),
hjust = 0,
nudge_x = 200,
size = 2.5,
label.padding = unit(0.12, "lines"),
label.size = 0.25,
lineheight = 0.85) +
scale_size_continuous(range = c(2, 8)) +
scale_x_continuous(
breaks = seq(0, 10000, 2500),
labels = scales::comma,
expand = expansion(mult = c(0.02, 0.25))
) +
labs(
x = "Number of responses with at least usefulness",
y = "Learning platform",
title = "Top 10 learning platform"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 12),
axis.title = element_text(size = 10),
axis.text = element_text(size = 9),
panel.grid.major.x = element_line(color = "gray90"),
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "none"
)