R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

data <- read.csv("C:/Users/Badamkhand/Downloads/multipleChoiceResponses1.csv", header = TRUE)

usefulness_columns <- colnames(data)[1:18]
usefulness_counts <- data %>%
  select(all_of(usefulness_columns)) %>%
  pivot_longer(cols = everything(), names_to = "LearningPlatform", values_to = "Usefulness") %>%
  filter(!is.na(Usefulness)) %>%
  mutate(Usefulness = gsub("Not Useful", "Least Useful", Usefulness),
         LearningPlatform = gsub("LearningPlatformUsefulness", "", LearningPlatform)) %>%
  count(LearningPlatform, Usefulness)

print(usefulness_counts)
## # A tibble: 54 × 3
##    LearningPlatform Usefulness          n
##    <chr>            <chr>           <int>
##  1 Arxiv            Least Useful       37
##  2 Arxiv            Somewhat useful  1038
##  3 Arxiv            Very useful      1316
##  4 Blogs            Least Useful       45
##  5 Blogs            Somewhat useful  2406
##  6 Blogs            Very useful      2314
##  7 College          Least Useful      101
##  8 College          Somewhat useful  1405
##  9 College          Very useful      1853
## 10 Communities      Least Useful       16
## # ℹ 44 more rows
atleastuseful <- data %>%
  gather(key = "learning_platform", value = "usefulness", starts_with("LearningPlatformUsefulness")) %>%
  filter(!is.na(usefulness)) %>%
  mutate(learning_platform = gsub("LearningPlatformUsefulness", "", learning_platform)) %>%
  count(learning_platform, name = "count") %>%
  left_join(
    data %>%
      gather(key = "learning_platform", value = "usefulness", starts_with("LearningPlatformUsefulness")) %>%
      filter(!is.na(usefulness) & usefulness != "Not Useful") %>%
      mutate(learning_platform = gsub("LearningPlatformUsefulness", "", learning_platform)) %>%
      count(learning_platform, name = "at_least_useful"),
    by = "learning_platform"
  ) %>%
  mutate(
    tot = ifelse(is.na(at_least_useful), count, at_least_useful),
    perc_usefulness = tot / count
  ) %>%
  select(learning_platform, tot, count, perc_usefulness)

atleastuseful
##    learning_platform  tot count perc_usefulness
## 1              Arxiv 2354  2391       0.9845253
## 2              Blogs 4720  4765       0.9905561
## 3            College 3258  3359       0.9699315
## 4        Communities 1126  1142       0.9859895
## 5            Company  940   981       0.9582059
## 6        Conferences 2063  2182       0.9454629
## 7            Courses 5945  5992       0.9921562
## 8      Documentation 2279  2321       0.9819044
## 9            Friends 1530  1581       0.9677419
## 10            Kaggle 6527  6583       0.9914932
## 11       Newsletters 1033  1089       0.9485767
## 12          Podcasts 1090  1214       0.8978583
## 13          Projects 4755  4794       0.9918648
## 14                SO 5576  5640       0.9886525
## 15          Textbook 4112  4181       0.9834968
## 16         TradeBook  324   333       0.9729730
## 17          Tutoring 1394  1426       0.9775596
## 18           YouTube 5125  5229       0.9801109
top_platforms <- usefulness_counts %>%
  filter(Usefulness != "Not Useful") %>%
  group_by(LearningPlatform) %>%
  summarise(count = sum(n)) %>%
  arrange(desc(count)) %>%
  slice_head(n = 10)

print(top_platforms)
## # A tibble: 10 × 2
##    LearningPlatform count
##    <chr>            <int>
##  1 Kaggle            6583
##  2 Courses           5992
##  3 SO                5640
##  4 YouTube           5229
##  5 Projects          4794
##  6 Blogs             4765
##  7 Textbook          4181
##  8 College           3359
##  9 Arxiv             2391
## 10 Documentation     2321
top_platforms <- top_platforms %>%
  mutate(cum_pct = cumsum(count) / sum(count))

other_platforms <- usefulness_counts %>%
  filter(!(LearningPlatform %in% top_platforms$LearningPlatform)) %>%
  group_by(Usefulness) %>%
  summarise(count = sum(n)) %>%
  arrange(desc(count)) %>%
  slice_head(n = 1) %>%
  mutate(LearningPlatform = "Other")

final_result <- bind_rows(top_platforms, other_platforms) %>%
  select(LearningPlatform, count, cum_pct)

print(final_result)
## # A tibble: 11 × 3
##    LearningPlatform count cum_pct
##    <chr>            <int>   <dbl>
##  1 Kaggle            6583   0.145
##  2 Courses           5992   0.278
##  3 SO                5640   0.402
##  4 YouTube           5229   0.518
##  5 Projects          4794   0.624
##  6 Blogs             4765   0.729
##  7 Textbook          4181   0.822
##  8 College           3359   0.896
##  9 Arxiv             2391   0.949
## 10 Documentation     2321   1    
## 11 Other             5383  NA
 final_result %>%
 ggplot(aes(x = reorder(LearningPlatform, count), y = cum_pct, color = LearningPlatform)) +
 geom_col(width = .8) +
 geom_text(aes(label = LearningPlatform), size = 3, nudge_x = -.05) +
 scale_y_continuous(breaks = seq(0, 1, .1)) +
 scale_color_brewer(palette = "Set1") +
 coord_flip() +
 labs(title = "Cumulative Percent of Top 10 Useful Learning Platforms",
     x = "",
     y = "Cumulative Percent") +
 theme(legend.position = "none",
     axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Removed 1 rows containing missing values (`position_stack()`).
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning: Removed 1 rows containing missing values (`geom_text()`).

## Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.