Question 1

Count the usefulness by learning platform.

# R code provided for user
library(tidyverse)
df <- read_csv("multipleChoiceResponses1.csv")
use_cols <- df %>% select(starts_with("LearningPlatformUsefulness"))
long <- use_cols %>% 
  pivot_longer(everything(), names_to="learning_platform", values_to="usefulness") %>% 
  drop_na() %>% 
  mutate(learning_platform = str_replace(learning_platform, "LearningPlatformUsefulness",""))

table1 <- long %>% count(learning_platform, usefulness)
table1

Result:

learning_platform      usefulness    n
            Arxiv      Not Useful   37
            Arxiv Somewhat useful 1038
            Arxiv     Very useful 1316
            Blogs      Not Useful   45
            Blogs Somewhat useful 2406
            Blogs     Very useful 2314
          College      Not Useful  101
          College Somewhat useful 1405
          College     Very useful 1853
      Communities      Not Useful   16
      Communities Somewhat useful  567
      Communities     Very useful  559
          Company      Not Useful   41
          Company Somewhat useful  502
          Company     Very useful  438
      Conferences      Not Useful  119
      Conferences Somewhat useful 1305
      Conferences     Very useful  758
          Courses      Not Useful   47
          Courses Somewhat useful 1750
          Courses     Very useful 4195
    Documentation      Not Useful   42
    Documentation Somewhat useful 1067
    Documentation     Very useful 1212
          Friends      Not Useful   51
          Friends Somewhat useful  774
          Friends     Very useful  756
           Kaggle      Not Useful   56
           Kaggle Somewhat useful 2451
           Kaggle     Very useful 4076
      Newsletters      Not Useful   56
      Newsletters Somewhat useful  686
      Newsletters     Very useful  347
         Podcasts      Not Useful  124
         Podcasts Somewhat useful  818
         Podcasts     Very useful  272
         Projects      Not Useful   39
         Projects Somewhat useful 1185
         Projects     Very useful 3570
               SO      Not Useful   64
               SO Somewhat useful 2044
               SO     Very useful 3532
         Textbook      Not Useful   69
         Textbook Somewhat useful 1796
         Textbook     Very useful 2316
        TradeBook      Not Useful    9
        TradeBook Somewhat useful  162
        TradeBook     Very useful  162
         Tutoring      Not Useful   32
         Tutoring Somewhat useful  569
         Tutoring     Very useful  825
          YouTube      Not Useful  104
          YouTube Somewhat useful 2605
          YouTube     Very useful 2520

Question 2

tot <- long %>% count(learning_platform, name="tot")
use <- long %>% filter(usefulness!="Not Useful") %>% 
       count(learning_platform, name="count")
merged <- left_join(use, tot)
merged <- merged %>% mutate(perc_usefulness = count/tot)
merged

Result:

learning_platform  count  tot  perc_usefulness
            Arxiv   2354 2391         0.984525
            Blogs   4720 4765         0.990556
          College   3258 3359         0.969932
      Communities   1126 1142         0.985989
          Company    940  981         0.958206
      Conferences   2063 2182         0.945463
          Courses   5945 5992         0.992156
    Documentation   2279 2321         0.981904
          Friends   1530 1581         0.967742
           Kaggle   6527 6583         0.991493
      Newsletters   1033 1089         0.948577
         Podcasts   1090 1214         0.897858
         Projects   4755 4794         0.991865
               SO   5576 5640         0.988652
         Textbook   4112 4181         0.983497
        TradeBook    324  333         0.972973
         Tutoring   1394 1426         0.977560
          YouTube   5125 5229         0.980111

Question 3

top10 <- merged %>% arrange(desc(count)) %>% head(10)
# compute cumulative pct
total_count <- sum(merged$count)
top10 <- top10 %>% mutate(cum_pct = cumsum(count)/total_count)
other <- sum(merged$count) - sum(top10$count)
table3 <- bind_rows(top10, tibble(learning_platform="Other",count=other,cum_pct=1))
table3

Result:

learning_platform  count    tot  perc_usefulness  cum_pct
           Kaggle   6527 6583.0         0.991493 0.120533
          Courses   5945 5992.0         0.992156 0.230319
               SO   5576 5640.0         0.988652 0.333290
          YouTube   5125 5229.0         0.980111 0.427933
         Projects   4755 4794.0         0.991865 0.515743
            Blogs   4720 4765.0         0.990556 0.602907
         Textbook   4112 4181.0         0.983497 0.678842
          College   3258 3359.0         0.969932 0.739008
            Arxiv   2354 2391.0         0.984525 0.782479
    Documentation   2279 2321.0         0.981904 0.824565
            Other   9500    NaN              NaN 1.000000

Question 4

Plotting based on results.

library(ggplot2)
ggplot(top10, aes(x=fct_reorder(learning_platform, count), y=count)) +
  geom_col() +
  coord_flip() +
  labs(title="Top 10 Useful Learning Platforms", x="Platform", y="Count")