# Load libraries 
library(tidyverse) 
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate) 
library(nycflights13) 
library(tidyquant)
## Loading required package: PerformanceAnalytics
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'xts'
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## 
## Attaching package: 'PerformanceAnalytics'
## 
## The following object is masked from 'package:graphics':
## 
##     legend
## 
## Loading required package: quantmod
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(tidyr)
library(forcats)
data <- read.csv("multipleChoiceResponses1.csv")
# Select relevant columns and perform the required operations
result <- data %>%
  select(starts_with("LearningPlatformUsefulness")) %>%
  gather(key = "learning_platform", value = "usefulness", na.rm = TRUE) %>%
  mutate(learning_platform = gsub('LearningPlatformUsefulness', '', learning_platform)) %>%
  count(learning_platform, usefulness)

print(result)
##    learning_platform      usefulness    n
## 1              Arxiv      Not Useful   37
## 2              Arxiv Somewhat useful 1038
## 3              Arxiv     Very useful 1316
## 4              Blogs      Not Useful   45
## 5              Blogs Somewhat useful 2406
## 6              Blogs     Very useful 2314
## 7            College      Not Useful  101
## 8            College Somewhat useful 1405
## 9            College     Very useful 1853
## 10       Communities      Not Useful   16
## 11       Communities Somewhat useful  567
## 12       Communities     Very useful  559
## 13           Company      Not Useful   41
## 14           Company Somewhat useful  502
## 15           Company     Very useful  438
## 16       Conferences      Not Useful  119
## 17       Conferences Somewhat useful 1305
## 18       Conferences     Very useful  758
## 19           Courses      Not Useful   47
## 20           Courses Somewhat useful 1750
## 21           Courses     Very useful 4195
## 22     Documentation      Not Useful   42
## 23     Documentation Somewhat useful 1067
## 24     Documentation     Very useful 1212
## 25           Friends      Not Useful   51
## 26           Friends Somewhat useful  774
## 27           Friends     Very useful  756
## 28            Kaggle      Not Useful   56
## 29            Kaggle Somewhat useful 2451
## 30            Kaggle     Very useful 4076
## 31       Newsletters      Not Useful   56
## 32       Newsletters Somewhat useful  686
## 33       Newsletters     Very useful  347
## 34          Podcasts      Not Useful  124
## 35          Podcasts Somewhat useful  818
## 36          Podcasts     Very useful  272
## 37          Projects      Not Useful   39
## 38          Projects Somewhat useful 1185
## 39          Projects     Very useful 3570
## 40                SO      Not Useful   64
## 41                SO Somewhat useful 2044
## 42                SO     Very useful 3532
## 43          Textbook      Not Useful   69
## 44          Textbook Somewhat useful 1796
## 45          Textbook     Very useful 2316
## 46         TradeBook      Not Useful    9
## 47         TradeBook Somewhat useful  162
## 48         TradeBook     Very useful  162
## 49          Tutoring      Not Useful   32
## 50          Tutoring Somewhat useful  569
## 51          Tutoring     Very useful  825
## 52           YouTube      Not Useful  104
## 53           YouTube Somewhat useful 2605
## 54           YouTube     Very useful 2520
result <- data %>%
  gather(key = "learning_platform", value = "usefulness", na.rm = TRUE) %>%
  mutate(learning_platform = gsub('LearningPlatformUsefulness', '', learning_platform),
         perc_usefulness = as.numeric(usefulness == "Very useful" | usefulness == "Somewhat useful")) %>%
  group_by(learning_platform) %>%
  summarise(count = n(),
            tot = sum(perc_usefulness),
            perc_usefulness = mean(perc_usefulness))

print(result)
## # A tibble: 47 × 4
##    learning_platform         count   tot perc_usefulness
##    <chr>                     <int> <dbl>           <dbl>
##  1 Age                       16385     0           0    
##  2 Arxiv                      2391  2354           0.985
##  3 Blogs                      4765  4720           0.991
##  4 College                    3359  3258           0.970
##  5 Communities                1142  1126           0.986
##  6 Company                     981   940           0.958
##  7 Conferences                2182  2063           0.945
##  8 Courses                    5992  5945           0.992
##  9 CurrentJobTitleSelect     11830     0           0    
## 10 DataScienceIdentitySelect 12671     0           0    
## # ℹ 37 more rows
learning_data <- data.frame(
  learning_platform = c("Kaggle", "Courses", "SO", "YouTube", "Projects", 
                        "Blogs", "Textbook", "College", "Arxiv", "Documentation"),
  count1 = c(6527, 5945, 5576, 5125, 4755, 4720, 4112, 3258, 2354, 2279)
)

# Calculate cumulative percentage
learning_data <- learning_data %>%
  mutate(cum_pct = cumsum(count1) / sum(count1))

# Reorder levels based on cumulative percentage
learning_data <- learning_data %>%
  mutate(learning_platform = fct_reorder(learning_platform, cum_pct))

# Set levels for 'Other' category
learning_data <- learning_data %>%
  mutate(learning_platform = fct_relevel(learning_platform, "Other", after = Inf))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `learning_platform = fct_relevel(learning_platform, "Other",
##   after = Inf)`.
## Caused by warning:
## ! 1 unknown level in `f`: Other
# Print the result
print(learning_data)
##    learning_platform count1   cum_pct
## 1             Kaggle   6527 0.1461781
## 2            Courses   5945 0.2793219
## 3                 SO   5576 0.4042015
## 4            YouTube   5125 0.5189805
## 5           Projects   4755 0.6254731
## 6              Blogs   4720 0.7311818
## 7           Textbook   4112 0.8232738
## 8            College   3258 0.8962397
## 9              Arxiv   2354 0.9489597
## 10     Documentation   2279 1.0000000
result <- result %>%
  mutate(learning_platform = fct_reorder(learning_platform, perc_usefulness))

# Convert 'tot' to numeric for continuous fill scale
result$tot <- as.numeric(result$tot)

# Create the plot
ggplot(result, aes(x = learning_platform, y = perc_usefulness, fill = tot)) +
  geom_bar(stat = "identity", position = "stack") +
  scale_fill_gradient(low = "pink", high = "darkred") +
  coord_flip() +
  labs(title = "Percentage of Useful Responses by Learning Platform",
       x = "Learning Platform",
       y = "Percentage of Usefulness") +
  theme_minimal()