This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
data <- read.csv("C:/Users/Badamkhand/Downloads/multipleChoiceResponses1.csv", header = TRUE)
usefulness_columns <- colnames(data)[1:18]
usefulness_counts <- data %>%
select(all_of(usefulness_columns)) %>%
pivot_longer(cols = everything(), names_to = "LearningPlatform", values_to = "Usefulness") %>%
filter(!is.na(Usefulness)) %>%
mutate(Usefulness = gsub("Not Useful", "Least Useful", Usefulness),
LearningPlatform = gsub("LearningPlatformUsefulness", "", LearningPlatform)) %>%
count(LearningPlatform, Usefulness)
print(usefulness_counts)
## # A tibble: 54 × 3
## LearningPlatform Usefulness n
## <chr> <chr> <int>
## 1 Arxiv Least Useful 37
## 2 Arxiv Somewhat useful 1038
## 3 Arxiv Very useful 1316
## 4 Blogs Least Useful 45
## 5 Blogs Somewhat useful 2406
## 6 Blogs Very useful 2314
## 7 College Least Useful 101
## 8 College Somewhat useful 1405
## 9 College Very useful 1853
## 10 Communities Least Useful 16
## # ℹ 44 more rows
atleastuseful <- data %>%
gather(key = "learning_platform", value = "usefulness", starts_with("LearningPlatformUsefulness")) %>%
filter(!is.na(usefulness)) %>%
mutate(learning_platform = gsub("LearningPlatformUsefulness", "", learning_platform)) %>%
count(learning_platform, name = "count") %>%
left_join(
data %>%
gather(key = "learning_platform", value = "usefulness", starts_with("LearningPlatformUsefulness")) %>%
filter(!is.na(usefulness) & usefulness != "Not Useful") %>%
mutate(learning_platform = gsub("LearningPlatformUsefulness", "", learning_platform)) %>%
count(learning_platform, name = "at_least_useful"),
by = "learning_platform"
) %>%
mutate(
tot = ifelse(is.na(at_least_useful), count, at_least_useful),
perc_usefulness = tot / count
) %>%
select(learning_platform, tot, count, perc_usefulness)
atleastuseful
## learning_platform tot count perc_usefulness
## 1 Arxiv 2354 2391 0.9845253
## 2 Blogs 4720 4765 0.9905561
## 3 College 3258 3359 0.9699315
## 4 Communities 1126 1142 0.9859895
## 5 Company 940 981 0.9582059
## 6 Conferences 2063 2182 0.9454629
## 7 Courses 5945 5992 0.9921562
## 8 Documentation 2279 2321 0.9819044
## 9 Friends 1530 1581 0.9677419
## 10 Kaggle 6527 6583 0.9914932
## 11 Newsletters 1033 1089 0.9485767
## 12 Podcasts 1090 1214 0.8978583
## 13 Projects 4755 4794 0.9918648
## 14 SO 5576 5640 0.9886525
## 15 Textbook 4112 4181 0.9834968
## 16 TradeBook 324 333 0.9729730
## 17 Tutoring 1394 1426 0.9775596
## 18 YouTube 5125 5229 0.9801109
top_platforms <- usefulness_counts %>%
filter(Usefulness != "Not Useful") %>%
group_by(LearningPlatform) %>%
summarise(count = sum(n)) %>%
arrange(desc(count)) %>%
slice_head(n = 10)
print(top_platforms)
## # A tibble: 10 × 2
## LearningPlatform count
## <chr> <int>
## 1 Kaggle 6583
## 2 Courses 5992
## 3 SO 5640
## 4 YouTube 5229
## 5 Projects 4794
## 6 Blogs 4765
## 7 Textbook 4181
## 8 College 3359
## 9 Arxiv 2391
## 10 Documentation 2321
top_platforms <- top_platforms %>%
mutate(cum_pct = cumsum(count) / sum(count))
other_platforms <- usefulness_counts %>%
filter(!(LearningPlatform %in% top_platforms$LearningPlatform)) %>%
group_by(Usefulness) %>%
summarise(count = sum(n)) %>%
arrange(desc(count)) %>%
slice_head(n = 1) %>%
mutate(LearningPlatform = "Other")
final_result <- bind_rows(top_platforms, other_platforms) %>%
select(LearningPlatform, count, cum_pct)
print(final_result)
## # A tibble: 11 × 3
## LearningPlatform count cum_pct
## <chr> <int> <dbl>
## 1 Kaggle 6583 0.145
## 2 Courses 5992 0.278
## 3 SO 5640 0.402
## 4 YouTube 5229 0.518
## 5 Projects 4794 0.624
## 6 Blogs 4765 0.729
## 7 Textbook 4181 0.822
## 8 College 3359 0.896
## 9 Arxiv 2391 0.949
## 10 Documentation 2321 1
## 11 Other 5383 NA
final_result %>%
ggplot(aes(x = reorder(LearningPlatform, count), y = cum_pct, color = LearningPlatform)) +
geom_col(width = .8) +
geom_text(aes(label = LearningPlatform), size = 3, nudge_x = -.05) +
scale_y_continuous(breaks = seq(0, 1, .1)) +
scale_color_brewer(palette = "Set1") +
coord_flip() +
labs(title = "Cumulative Percent of Top 10 Useful Learning Platforms",
x = "",
y = "Cumulative Percent") +
theme(legend.position = "none",
axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Removed 1 rows containing missing values (`position_stack()`).
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning: Removed 1 rows containing missing values (`geom_text()`).
## Including Plots
You can also embed plots, for example:
Note that the echo = FALSE
parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.