# Load required libraries
library(tidyverse)
library(lubridate)
library(knitr)
# Read the data
data <- read_csv("multipleChoiceResponses1.csv")
Select only the columns with LearningPlatformUsefulness and remove rows where usefulness is NA. Remove “LearningPlatformUsefulness” from each string in learning_platform. Use count() to change the dataset to have one row per learning_platform usefulness pair.
platform_usefulness <- data %>%
select(starts_with("LearningPlatformUsefulness")) %>%
pivot_longer(
cols = everything(),
names_to = "learning_platform",
values_to = "usefulness"
) %>%
filter(!is.na(usefulness)) %>%
mutate(learning_platform = str_remove(learning_platform, "LearningPlatformUsefulness")) %>%
count(learning_platform, usefulness)
# Display first 10 rows
platform_usefulness %>%
head(10) %>%
kable(caption = "Count of Usefulness by Learning Platform (First 10 Rows)")
| learning_platform | usefulness | n |
|---|---|---|
| Arxiv | Not Useful | 37 |
| Arxiv | Somewhat useful | 1038 |
| Arxiv | Very useful | 1316 |
| Blogs | Not Useful | 45 |
| Blogs | Somewhat useful | 2406 |
| Blogs | Very useful | 2314 |
| College | Not Useful | 101 |
| College | Somewhat useful | 1405 |
| College | Very useful | 1853 |
| Communities | Not Useful | 16 |
Compute the number of total responses and number of responses which are at least useful (filter out “Not Useful”).
platform_summary <- platform_usefulness %>%
group_by(learning_platform) %>%
summarize(
count = sum(n[usefulness != "Not Useful"]),
tot = sum(n),
perc_usefulness = count / tot
) %>%
arrange(desc(count))
platform_summary %>%
kable(caption = "Summary of Learning Platform Usefulness",
digits = 3,
col.names = c("Learning Platform", "At Least Useful", "Total", "% Usefulness"))
| Learning Platform | At Least Useful | Total | % Usefulness |
|---|---|---|---|
| Kaggle | 6527 | 6583 | 0.991 |
| Courses | 5945 | 5992 | 0.992 |
| SO | 5576 | 5640 | 0.989 |
| YouTube | 5125 | 5229 | 0.980 |
| Projects | 4755 | 4794 | 0.992 |
| Blogs | 4720 | 4765 | 0.991 |
| Textbook | 4112 | 4181 | 0.983 |
| College | 3258 | 3359 | 0.970 |
| Arxiv | 2354 | 2391 | 0.985 |
| Documentation | 2279 | 2321 | 0.982 |
| Conferences | 2063 | 2182 | 0.945 |
| Friends | 1530 | 1581 | 0.968 |
| Tutoring | 1394 | 1426 | 0.978 |
| Communities | 1126 | 1142 | 0.986 |
| Podcasts | 1090 | 1214 | 0.898 |
| Newsletters | 1033 | 1089 | 0.949 |
| Company | 940 | 981 | 0.958 |
| TradeBook | 324 | 333 | 0.973 |
Select the first two columns (learning_platform and count), show the top 10 useful learning platforms, and calculate cumulative percentage. Other platforms are grouped as “Other”.
top_platforms <- platform_summary %>%
select(learning_platform, count) %>%
slice_head(n = 10) %>%
mutate(
count1 = count,
cum_pct = cumsum(count1) / sum(platform_summary$count)
) %>%
select(learning_platform, count1, cum_pct)
# Add "Other" category
other_count <- platform_summary %>%
slice_tail(n = nrow(platform_summary) - 10) %>%
summarize(count1 = sum(count)) %>%
pull(count1)
top_platforms_with_other <- top_platforms %>%
bind_rows(tibble(
learning_platform = "Other",
count1 = other_count,
cum_pct = 1.0
)) %>%
mutate(learning_platform = fct_inorder(learning_platform)) %>%
mutate(learning_platform = fct_relevel(learning_platform, "Other", after = Inf))
top_platforms_with_other %>%
kable(caption = "Top 10 Learning Platforms with Cumulative Percentage",
digits = 3,
col.names = c("Learning Platform", "At Least Useful Count", "Cumulative %"))
| Learning Platform | At Least Useful Count | Cumulative % |
|---|---|---|
| Kaggle | 6527 | 0.121 |
| Courses | 5945 | 0.230 |
| SO | 5576 | 0.333 |
| YouTube | 5125 | 0.428 |
| Projects | 4755 | 0.516 |
| Blogs | 4720 | 0.603 |
| Textbook | 4112 | 0.679 |
| College | 3258 | 0.739 |
| Arxiv | 2354 | 0.782 |
| Documentation | 2279 | 0.825 |
| Other | 9500 | 1.000 |
Create a horizontal bar chart showing the top 10 learning platforms with rank, useful count, and cumulative percentage labels.
# Prepare data for plotting
plot_data <- top_platforms_with_other %>%
mutate(
rank = ifelse(learning_platform == "Other", NA, row_number()),
label = sprintf(
"Rank: %s\nUseful: %s\nCumPct: %.1f%%",
ifelse(is.na(rank), "NA", as.character(rank)),
format(count1, big.mark = ","),
cum_pct * 100
)
) %>%
arrange(desc(count1)) %>%
mutate(learning_platform = fct_reorder(learning_platform, count1))
# Create the plot
ggplot(plot_data, aes(x = count1, y = learning_platform)) +
geom_col(fill = "steelblue", alpha = 0.8) +
geom_text(aes(label = label),
hjust = -0.1,
size = 3.5,
lineheight = 0.9) +
scale_x_continuous(
limits = c(0, max(plot_data$count1) * 1.4),
expand = c(0, 0)
) +
labs(
x = "Number of responses with at least usefulness",
y = NULL,
title = "Learning Platform Usefulness Analysis"
) +
theme_minimal() +
theme(
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank(),
axis.text = element_text(size = 11),
axis.title = element_text(size = 12),
plot.title = element_text(hjust = 0.5, size = 16, face = "bold")
)