library(tidyverse)
library(lubridate)
library(nycflights13)
library(tidyquant)
# Read the data
# Хэрэв файл өөр газар байвал бүтэн замыг бичнэ үү:
# data <- read_csv("C:/path/to/your/multipleChoiceResponses1.csv")
data <- read_csv("multipleChoiceResponses1.csv")
# Select columns with LearningPlatformUsefulness and pivot
df_platform <- data %>%
select(starts_with("LearningPlatformUsefulness")) %>%
pivot_longer(
cols = everything(),
names_to = "learning_platform",
values_to = "usefulness"
) %>%
filter(!is.na(usefulness)) %>%
mutate(learning_platform = str_remove(learning_platform, "LearningPlatformUsefulness")) %>%
count(learning_platform, usefulness)
# Show first 10 rows
head(df_platform, 10)
## # A tibble: 10 × 3
## learning_platform usefulness n
## <chr> <chr> <int>
## 1 Arxiv Not Useful 37
## 2 Arxiv Somewhat useful 1038
## 3 Arxiv Very useful 1316
## 4 Blogs Not Useful 45
## 5 Blogs Somewhat useful 2406
## 6 Blogs Very useful 2314
## 7 College Not Useful 101
## 8 College Somewhat useful 1405
## 9 College Very useful 1853
## 10 Communities Not Useful 16
# Calculate total responses and percentage of usefulness
df_summary <- df_platform %>%
group_by(learning_platform) %>%
summarize(
count = sum(n[usefulness != "Not Useful"]),
tot = sum(n),
perc_usefulness = count / tot
) %>%
arrange(desc(count))
df_summary
## # A tibble: 18 × 4
## learning_platform count tot perc_usefulness
## <chr> <int> <int> <dbl>
## 1 Kaggle 6527 6583 0.991
## 2 Courses 5945 5992 0.992
## 3 SO 5576 5640 0.989
## 4 YouTube 5125 5229 0.980
## 5 Projects 4755 4794 0.992
## 6 Blogs 4720 4765 0.991
## 7 Textbook 4112 4181 0.983
## 8 College 3258 3359 0.970
## 9 Arxiv 2354 2391 0.985
## 10 Documentation 2279 2321 0.982
## 11 Conferences 2063 2182 0.945
## 12 Friends 1530 1581 0.968
## 13 Tutoring 1394 1426 0.978
## 14 Communities 1126 1142 0.986
## 15 Podcasts 1090 1214 0.898
## 16 Newsletters 1033 1089 0.949
## 17 Company 940 981 0.958
## 18 TradeBook 324 333 0.973
# Select top 10 platforms and calculate cumulative percentage
df_top10 <- df_summary %>%
select(learning_platform, count) %>%
slice_max(order_by = count, n = 10) %>%
mutate(
count1 = count,
cum_pct = cumsum(count1) / sum(df_summary$count)
) %>%
select(learning_platform, count1, cum_pct)
# Add "Other" category
other_count <- sum(df_summary$count) - sum(df_top10$count1)
other_row <- tibble(
learning_platform = "Other",
count1 = other_count,
cum_pct = 1.0
)
df_final <- bind_rows(df_top10, other_row) %>%
mutate(learning_platform = fct_reorder(learning_platform, count1)) %>%
mutate(learning_platform = fct_relevel(learning_platform, "Other", after = 0))
df_final
## # A tibble: 11 × 3
## learning_platform count1 cum_pct
## <fct> <int> <dbl>
## 1 Kaggle 6527 0.121
## 2 Courses 5945 0.230
## 3 SO 5576 0.333
## 4 YouTube 5125 0.428
## 5 Projects 4755 0.516
## 6 Blogs 4720 0.603
## 7 Textbook 4112 0.679
## 8 College 3258 0.739
## 9 Arxiv 2354 0.782
## 10 Documentation 2279 0.825
## 11 Other 9500 1
# Create labels for the plot
df_plot <- df_final %>%
mutate(
rank = if_else(learning_platform == "Other", NA_integer_, row_number()),
label_text = sprintf(
"Rank: %s | Useful: %s | CumPct: %.1f%%",
ifelse(is.na(rank), "NA", as.character(11 - rank)),
format(count1, big.mark = ","),
cum_pct * 100
)
)
# Create the plot
ggplot(df_plot, aes(x = count1, y = learning_platform)) +
geom_col(fill = "#27ae60", alpha = 0.75, width = 0.65) +
geom_text(aes(label = label_text),
hjust = -0.08,
size = 3.2,
color = "#34495e") +
scale_x_continuous(
limits = c(0, max(df_plot$count1) * 1.48),
breaks = seq(0, 10000, 2500),
labels = scales::comma
) +
labs(
x = "Number of responses (at least useful)",
y = "",
title = "Learning Platform Usefulness Analysis"
) +
theme_light(base_size = 11) +
theme(
plot.title = element_text(size = 15, face = "bold", color = "#2c3e50", margin = margin(b = 10)),
panel.grid.major.y = element_line(color = "gray85", size = 0.2),
panel.grid.minor = element_blank(),
panel.grid.major.x = element_line(color = "gray80", size = 0.4),
panel.border = element_rect(color = "gray70", size = 0.5),
axis.text.y = element_text(size = 10.5, color = "#34495e"),
axis.text.x = element_text(size = 9.5),
axis.title.x = element_text(size = 11, margin = margin(t = 10)),
plot.margin = margin(12, 12, 12, 12)
)