Load Libraries

# Load required libraries
library(tidyverse)
library(lubridate)
library(knitr)

Read Data

# Read the data
data <- read_csv("multipleChoiceResponses1.csv")

Question 1: Count the Usefulness by Learning Platform

Select only the columns with LearningPlatformUsefulness and remove rows where usefulness is NA. Remove “LearningPlatformUsefulness” from each string in learning_platform. Use count() to change the dataset to have one row per learning_platform usefulness pair.

platform_usefulness <- data %>%
  select(starts_with("LearningPlatformUsefulness")) %>%
  pivot_longer(
    cols = everything(),
    names_to = "learning_platform",
    values_to = "usefulness"
  ) %>%
  filter(!is.na(usefulness)) %>%
  mutate(learning_platform = str_remove(learning_platform, "LearningPlatformUsefulness")) %>%
  count(learning_platform, usefulness)

# Display first 10 rows
platform_usefulness %>%
  head(10) %>%
  kable(caption = "Count of Usefulness by Learning Platform (First 10 Rows)")
Count of Usefulness by Learning Platform (First 10 Rows)
learning_platform usefulness n
Arxiv Not Useful 37
Arxiv Somewhat useful 1038
Arxiv Very useful 1316
Blogs Not Useful 45
Blogs Somewhat useful 2406
Blogs Very useful 2314
College Not Useful 101
College Somewhat useful 1405
College Very useful 1853
Communities Not Useful 16

Question 2: Total Responses and At-Least-Useful Responses

Compute the number of total responses and number of responses which are at least useful (filter out “Not Useful”).

platform_summary <- platform_usefulness %>%
  group_by(learning_platform) %>%
  summarize(
    count = sum(n[usefulness != "Not Useful"]),
    tot = sum(n),
    perc_usefulness = count / tot
  ) %>%
  arrange(desc(count))

platform_summary %>%
  kable(caption = "Summary of Learning Platform Usefulness", 
        digits = 3,
        col.names = c("Learning Platform", "At Least Useful", "Total", "% Usefulness"))
Summary of Learning Platform Usefulness
Learning Platform At Least Useful Total % Usefulness
Kaggle 6527 6583 0.991
Courses 5945 5992 0.992
SO 5576 5640 0.989
YouTube 5125 5229 0.980
Projects 4755 4794 0.992
Blogs 4720 4765 0.991
Textbook 4112 4181 0.983
College 3258 3359 0.970
Arxiv 2354 2391 0.985
Documentation 2279 2321 0.982
Conferences 2063 2182 0.945
Friends 1530 1581 0.968
Tutoring 1394 1426 0.978
Communities 1126 1142 0.986
Podcasts 1090 1214 0.898
Newsletters 1033 1089 0.949
Company 940 981 0.958
TradeBook 324 333 0.973

Question 3: Top 10 Platforms with Cumulative Percentage

Select the first two columns (learning_platform and count), show the top 10 useful learning platforms, and calculate cumulative percentage. Other platforms are grouped as “Other”.

top_platforms <- platform_summary %>%
  select(learning_platform, count) %>%
  slice_head(n = 10) %>%
  mutate(
    count1 = count,
    cum_pct = cumsum(count1) / sum(platform_summary$count)
  ) %>%
  select(learning_platform, count1, cum_pct)

# Add "Other" category
other_count <- platform_summary %>%
  slice_tail(n = nrow(platform_summary) - 10) %>%
  summarize(count1 = sum(count)) %>%
  pull(count1)

top_platforms_with_other <- top_platforms %>%
  bind_rows(tibble(
    learning_platform = "Other",
    count1 = other_count,
    cum_pct = 1.0
  )) %>%
  mutate(learning_platform = fct_inorder(learning_platform)) %>%
  mutate(learning_platform = fct_relevel(learning_platform, "Other", after = Inf))

top_platforms_with_other %>%
  kable(caption = "Top 10 Learning Platforms with Cumulative Percentage", 
        digits = 3,
        col.names = c("Learning Platform", "At Least Useful Count", "Cumulative %"))
Top 10 Learning Platforms with Cumulative Percentage
Learning Platform At Least Useful Count Cumulative %
Kaggle 6527 0.121
Courses 5945 0.230
SO 5576 0.333
YouTube 5125 0.428
Projects 4755 0.516
Blogs 4720 0.603
Textbook 4112 0.679
College 3258 0.739
Arxiv 2354 0.782
Documentation 2279 0.825
Other 9500 1.000

Question 4: Visualization

Create a horizontal bar chart showing the top 10 learning platforms with rank, useful count, and cumulative percentage labels.

# Prepare data for plotting
plot_data <- top_platforms_with_other %>%
  mutate(
    rank = ifelse(learning_platform == "Other", NA, row_number()),
    label = sprintf(
      "Rank: %s\nUseful: %s\nCumPct: %.1f%%",
      ifelse(is.na(rank), "NA", as.character(rank)),
      format(count1, big.mark = ","),
      cum_pct * 100
    )
  ) %>%
  arrange(desc(count1)) %>%
  mutate(learning_platform = fct_reorder(learning_platform, count1))

# Create the plot
ggplot(plot_data, aes(x = count1, y = learning_platform)) +
  geom_col(fill = "steelblue", alpha = 0.8) +
  geom_text(aes(label = label), 
            hjust = -0.1, 
            size = 3.5,
            lineheight = 0.9) +
  scale_x_continuous(
    limits = c(0, max(plot_data$count1) * 1.4),
    expand = c(0, 0)
  ) +
  labs(
    x = "Number of responses with at least usefulness",
    y = NULL,
    title = "Learning Platform Usefulness Analysis"
  ) +
  theme_minimal() +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text = element_text(size = 11),
    axis.title = element_text(size = 12),
    plot.title = element_text(hjust = 0.5, size = 16, face = "bold")
  )