library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
data <- read.csv("multipleChoiceResponses1.csv", header = TRUE)
usefulness_columns <- colnames(data)[1:18]
usefulness_counts <- data %>%
select(all_of(usefulness_columns)) %>%
pivot_longer(cols = everything(), names_to = "LearningPlatform", values_to = "Usefulness") %>%
filter(!is.na(Usefulness)) %>%
mutate(Usefulness = gsub("Not Useful", "Least Useful", Usefulness),
LearningPlatform = gsub("LearningPlatformUsefulness", "", LearningPlatform)) %>%
count(LearningPlatform, Usefulness)
print(usefulness_counts)
## # A tibble: 54 × 3
## LearningPlatform Usefulness n
## <chr> <chr> <int>
## 1 Arxiv Least Useful 37
## 2 Arxiv Somewhat useful 1038
## 3 Arxiv Very useful 1316
## 4 Blogs Least Useful 45
## 5 Blogs Somewhat useful 2406
## 6 Blogs Very useful 2314
## 7 College Least Useful 101
## 8 College Somewhat useful 1405
## 9 College Very useful 1853
## 10 Communities Least Useful 16
## # ℹ 44 more rows
result <- data %>%
gather(key = "learning_platform", value = "usefulness", na.rm = TRUE) %>%
mutate(learning_platform = gsub('LearningPlatformUsefulness', '', learning_platform),
perc_usefulness = as.numeric(usefulness == "Very useful" | usefulness == "Somewhat useful")) %>%
group_by(learning_platform) %>%
summarise(count = n(),
tot = sum(perc_usefulness),
perc_usefulness = mean(perc_usefulness))
print(result)
## # A tibble: 47 × 4
## learning_platform count tot perc_usefulness
## <chr> <int> <dbl> <dbl>
## 1 Age 16385 0 0
## 2 Arxiv 2391 2354 0.985
## 3 Blogs 4765 4720 0.991
## 4 College 3359 3258 0.970
## 5 Communities 1142 1126 0.986
## 6 Company 981 940 0.958
## 7 Conferences 2182 2063 0.945
## 8 Courses 5992 5945 0.992
## 9 CurrentJobTitleSelect 11830 0 0
## 10 DataScienceIdentitySelect 12671 0 0
## # ℹ 37 more rows
learning_data <- data.frame(
learning_platform = c("Kaggle", "Courses", "SO", "YouTube", "Projects",
"Blogs", "Textbook", "College", "Arxiv", "Documentation"),
count1 = c(6527, 5945, 5576, 5125, 4755, 4720, 4112, 3258, 2354, 2279)
)
learning_data <- learning_data %>%
mutate(cum_pct = cumsum(count1) / sum(count1))
learning_data <- learning_data %>%
mutate(learning_platform = fct_reorder(learning_platform, cum_pct))
learning_data <- learning_data %>%
mutate(learning_platform = fct_relevel(learning_platform, "Other", after = Inf))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `learning_platform = fct_relevel(learning_platform, "Other",
## after = Inf)`.
## Caused by warning:
## ! 1 unknown level in `f`: Other
print(learning_data)
## learning_platform count1 cum_pct
## 1 Kaggle 6527 0.1461781
## 2 Courses 5945 0.2793219
## 3 SO 5576 0.4042015
## 4 YouTube 5125 0.5189805
## 5 Projects 4755 0.6254731
## 6 Blogs 4720 0.7311818
## 7 Textbook 4112 0.8232738
## 8 College 3258 0.8962397
## 9 Arxiv 2354 0.9489597
## 10 Documentation 2279 1.0000000
ggplot(result, aes(x = learning_platform, y = perc_usefulness, fill = tot)) +
geom_bar(stat = "identity", position = "stack", width = 0.7, color = "black", size = 0.5) +
scale_fill_gradient(low = "pink", high = "blue") +
coord_flip() +
labs(title = "Percentage of Useful Responses by Learning Platform",
x = "Learning Platform",
y = "Percentage of Usefulness") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
