Make up exam

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(lubridate)
data <- read.csv("multipleChoiceResponses1.csv", header = TRUE)


usefulness_columns <- colnames(data)[1:18]
usefulness_counts <- data %>%
  select(all_of(usefulness_columns)) %>%
  pivot_longer(cols = everything(), names_to = "LearningPlatform", values_to = "Usefulness") %>%
  filter(!is.na(Usefulness)) %>%
mutate(Usefulness = gsub("Not Useful", "Least Useful", Usefulness),
LearningPlatform = gsub("LearningPlatformUsefulness", "", LearningPlatform)) %>%
  count(LearningPlatform, Usefulness)

print(usefulness_counts)

## # A tibble: 54 × 3
##    LearningPlatform Usefulness          n
##    <chr>            <chr>           <int>
##  1 Arxiv            Least Useful       37
##  2 Arxiv            Somewhat useful  1038
##  3 Arxiv            Very useful      1316
##  4 Blogs            Least Useful       45
##  5 Blogs            Somewhat useful  2406
##  6 Blogs            Very useful      2314
##  7 College          Least Useful      101
##  8 College          Somewhat useful  1405
##  9 College          Very useful      1853
## 10 Communities      Least Useful       16
## # ℹ 44 more rows

result <- data %>%
  gather(key = "learning_platform", value = "usefulness", na.rm = TRUE) %>%
  mutate(learning_platform = gsub('LearningPlatformUsefulness', '', learning_platform),
         perc_usefulness = as.numeric(usefulness == "Very useful" | usefulness == "Somewhat useful")) %>%
  group_by(learning_platform) %>%
  summarise(count = n(),
            tot = sum(perc_usefulness),
            perc_usefulness = mean(perc_usefulness))

print(result)

## # A tibble: 47 × 4
##    learning_platform         count   tot perc_usefulness
##    <chr>                     <int> <dbl>           <dbl>
##  1 Age                       16385     0           0    
##  2 Arxiv                      2391  2354           0.985
##  3 Blogs                      4765  4720           0.991
##  4 College                    3359  3258           0.970
##  5 Communities                1142  1126           0.986
##  6 Company                     981   940           0.958
##  7 Conferences                2182  2063           0.945
##  8 Courses                    5992  5945           0.992
##  9 CurrentJobTitleSelect     11830     0           0    
## 10 DataScienceIdentitySelect 12671     0           0    
## # ℹ 37 more rows

learning_data <- data.frame(
  learning_platform = c("Kaggle", "Courses", "SO", "YouTube", "Projects", 
                        "Blogs", "Textbook", "College", "Arxiv", "Documentation"),
  count1 = c(6527, 5945, 5576, 5125, 4755, 4720, 4112, 3258, 2354, 2279)
)
learning_data <- learning_data %>%
  mutate(cum_pct = cumsum(count1) / sum(count1))
learning_data <- learning_data %>%
  mutate(learning_platform = fct_reorder(learning_platform, cum_pct))
learning_data <- learning_data %>%
  mutate(learning_platform = fct_relevel(learning_platform, "Other", after = Inf))

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `learning_platform = fct_relevel(learning_platform, "Other",
##   after = Inf)`.
## Caused by warning:
## ! 1 unknown level in `f`: Other

print(learning_data)

##    learning_platform count1   cum_pct
## 1             Kaggle   6527 0.1461781
## 2            Courses   5945 0.2793219
## 3                 SO   5576 0.4042015
## 4            YouTube   5125 0.5189805
## 5           Projects   4755 0.6254731
## 6              Blogs   4720 0.7311818
## 7           Textbook   4112 0.8232738
## 8            College   3258 0.8962397
## 9              Arxiv   2354 0.9489597
## 10     Documentation   2279 1.0000000

ggplot(result, aes(x = learning_platform, y = perc_usefulness, fill = tot)) +
  geom_bar(stat = "identity", position = "stack", width = 0.7, color = "black", size = 0.5) + 
  scale_fill_gradient(low = "pink", high = "blue") +
  coord_flip() +
  labs(title = "Percentage of Useful Responses by Learning Platform",
       x = "Learning Platform",
       y = "Percentage of Usefulness") +
  theme_minimal()

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Make up exam

Leo Tzang

2024-01-09