Introduction

This analysis examines the usefulness of various learning platforms based on survey responses from the multipleChoiceResponses1.csv dataset.

Load Libraries

# Load required libraries
library(tidyverse)
library(lubridate)
library(knitr)

Read Data

# Read the data
data <- read_csv("multipleChoiceResponses1.csv")

Question 1: Count the Usefulness by Learning Platform

Select only the columns with LearningPlatformUsefulness and remove rows where usefulness is NA. Remove “LearningPlatformUsefulness” from each string in learning_platform. Use count() to change the dataset to have one row per learning_platform usefulness pair.

platform_usefulness <- data %>%
  select(starts_with("LearningPlatformUsefulness")) %>%
  pivot_longer(
    cols = everything(),
    names_to = "learning_platform",
    values_to = "usefulness"
  ) %>%
  filter(!is.na(usefulness)) %>%
  mutate(learning_platform = str_remove(learning_platform, "LearningPlatformUsefulness")) %>%
  count(learning_platform, usefulness)

# Display first 10 rows
platform_usefulness %>%
  head(10)
## # A tibble: 10 × 3
##    learning_platform usefulness          n
##    <chr>             <chr>           <int>
##  1 Arxiv             Not Useful         37
##  2 Arxiv             Somewhat useful  1038
##  3 Arxiv             Very useful      1316
##  4 Blogs             Not Useful         45
##  5 Blogs             Somewhat useful  2406
##  6 Blogs             Very useful      2314
##  7 College           Not Useful        101
##  8 College           Somewhat useful  1405
##  9 College           Very useful      1853
## 10 Communities       Not Useful         16

Question 2: Total Responses and At-Least-Useful Responses

Compute the number of total responses and number of responses which are at least useful (filter out “Not Useful”).

platform_summary <- platform_usefulness %>%
  group_by(learning_platform) %>%
  summarize(
    count = sum(n[usefulness != "Not Useful"]),
    tot = sum(n),
    perc_usefulness = count / tot
  ) %>%
  arrange(desc(count))

platform_summary
## # A tibble: 18 × 4
##    learning_platform count   tot perc_usefulness
##    <chr>             <int> <int>           <dbl>
##  1 Kaggle             6527  6583           0.991
##  2 Courses            5945  5992           0.992
##  3 SO                 5576  5640           0.989
##  4 YouTube            5125  5229           0.980
##  5 Projects           4755  4794           0.992
##  6 Blogs              4720  4765           0.991
##  7 Textbook           4112  4181           0.983
##  8 College            3258  3359           0.970
##  9 Arxiv              2354  2391           0.985
## 10 Documentation      2279  2321           0.982
## 11 Conferences        2063  2182           0.945
## 12 Friends            1530  1581           0.968
## 13 Tutoring           1394  1426           0.978
## 14 Communities        1126  1142           0.986
## 15 Podcasts           1090  1214           0.898
## 16 Newsletters        1033  1089           0.949
## 17 Company             940   981           0.958
## 18 TradeBook           324   333           0.973

Question 3: Top 10 Platforms with Cumulative Percentage

Select the first two columns (learning_platform and count), show the top 10 useful learning platforms, and calculate cumulative percentage. Other platforms are grouped as “Other”.

top_platforms <- platform_summary %>%
  select(learning_platform, count) %>%
  slice_head(n = 10) %>%
  mutate(
    count1 = count,
    cum_pct = cumsum(count1) / sum(platform_summary$count)
  ) %>%
  select(learning_platform, count1, cum_pct)

# Add "Other" category
other_count <- platform_summary %>%
  slice_tail(n = nrow(platform_summary) - 10) %>%
  summarize(count1 = sum(count)) %>%
  pull(count1)

top_platforms_with_other <- top_platforms %>%
  bind_rows(tibble(
    learning_platform = "Other",
    count1 = other_count,
    cum_pct = 1.0
  ))

# Use fct_reorder() and fct_relevel() to maintain order by count
top_platforms_with_other <- top_platforms_with_other %>%
  mutate(learning_platform = fct_reorder(learning_platform, count1, .desc = TRUE)) %>%
  mutate(learning_platform = fct_relevel(learning_platform, "Other", after = Inf))

top_platforms_with_other
## # A tibble: 11 × 3
##    learning_platform count1 cum_pct
##    <fct>              <int>   <dbl>
##  1 Kaggle              6527   0.121
##  2 Courses             5945   0.230
##  3 SO                  5576   0.333
##  4 YouTube             5125   0.428
##  5 Projects            4755   0.516
##  6 Blogs               4720   0.603
##  7 Textbook            4112   0.679
##  8 College             3258   0.739
##  9 Arxiv               2354   0.782
## 10 Documentation       2279   0.825
## 11 Other               9500   1

Question 4: Visualization

Create a horizontal bar chart showing the top 10 learning platforms with rank, useful count, and cumulative percentage labels.

# Prepare data for plotting
plot_data <- top_platforms_with_other %>%
  mutate(
    rank = ifelse(learning_platform == "Other", NA, row_number()),
    label = sprintf(
      "Rank: %s\nUseful: %s\nCumPct: %.1f%%",
      ifelse(is.na(rank), "NA", as.character(rank)),
      format(count1, big.mark = ","),
      cum_pct * 100
    )
  ) %>%
  arrange(desc(count1)) %>%
  mutate(learning_platform = fct_reorder(learning_platform, count1))

# Create the plot
ggplot(plot_data, aes(x = count1, y = learning_platform)) +
  geom_col(fill = "#3d5a80", width = 0.7) +
  geom_label(aes(label = label), 
            hjust = 0, 
            size = 2.8,
            lineheight = 0.85,
            label.padding = unit(0.15, "lines"),
            label.size = 0.2) +
  scale_x_continuous(
    limits = c(0, max(plot_data$count1) * 1.15),
    expand = c(0, 0),
    breaks = seq(0, 10000, 2500)
  ) +
  labs(
    x = "Number of responses with at least usefulness",
    y = "Learning platform",
    title = "Top 10 learning platform"
  ) +
  theme_minimal() +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor.x = element_blank(),
    panel.grid.major.x = element_line(color = "gray90"),
    axis.text.y = element_text(size = 11, color = "black"),
    axis.text.x = element_text(size = 10, color = "black"),
    axis.title.x = element_text(size = 11, margin = margin(t = 10)),
    axis.title.y = element_text(size = 11, margin = margin(r = 10)),
    plot.title = element_text(size = 14, face = "plain", hjust = 0),
    plot.margin = margin(10, 10, 10, 10)
  )

Summary

The analysis reveals that:

  • Kaggle is the most useful learning platform with 6,527 responses
  • The top 10 platforms account for 82.5% of all “at least useful” responses
  • Courses and Stack Overflow (SO) are the 2nd and 3rd most useful platforms
  • Online learning resources (Kaggle, Courses, YouTube) dominate the top rankings

Data Source: multipleChoiceResponses1.csv