Load libraries

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(lubridate)
library(nycflights13)
library(tidyquant)

## Loading required package: PerformanceAnalytics
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'xts'
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## 
## Attaching package: 'PerformanceAnalytics'
## 
## The following object is masked from 'package:graphics':
## 
##     legend
## 
## Loading required package: quantmod
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(tidyr)

# Read the CSV file
your_dataset <- read_csv("multipleChoiceResponses1.csv")

## Rows: 16716 Columns: 47
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (46): LearningPlatformUsefulnessArxiv, LearningPlatformUsefulnessBlogs, ...
## dbl  (1): Age
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Select relevant columns and perform the required operations
result <- your_dataset %>%
  select(starts_with("LearningPlatformUsefulness")) %>%
  gather(key = "learning_platform", value = "usefulness", na.rm = TRUE) %>%
  mutate(learning_platform = gsub('LearningPlatformUsefulness', '', learning_platform)) %>%
  count(learning_platform, usefulness)

print(result)

## # A tibble: 54 × 3
##    learning_platform usefulness          n
##    <chr>             <chr>           <int>
##  1 Arxiv             Not Useful         37
##  2 Arxiv             Somewhat useful  1038
##  3 Arxiv             Very useful      1316
##  4 Blogs             Not Useful         45
##  5 Blogs             Somewhat useful  2406
##  6 Blogs             Very useful      2314
##  7 College           Not Useful        101
##  8 College           Somewhat useful  1405
##  9 College           Very useful      1853
## 10 Communities       Not Useful         16
## # ℹ 44 more rows

library(dplyr)
library(tidyr)

# Assuming your dataset is named 'your_dataset'
# Replace 'your_dataset' with the actual name of your data frame

result <- your_dataset %>%
  gather(key = "learning_platform", value = "usefulness", na.rm = TRUE) %>%
  mutate(learning_platform = gsub('LearningPlatformUsefulness', '', learning_platform),
         perc_usefulness = as.numeric(usefulness == "Very useful" | usefulness == "Somewhat useful")) %>%
  group_by(learning_platform) %>%
  summarise(count = n(),
            tot = sum(perc_usefulness),
            perc_usefulness = mean(perc_usefulness))

print(result)

## # A tibble: 47 × 4
##    learning_platform         count   tot perc_usefulness
##    <chr>                     <int> <dbl>           <dbl>
##  1 Age                       16385     0           0    
##  2 Arxiv                      2391  2354           0.985
##  3 Blogs                      4765  4720           0.991
##  4 College                    3359  3258           0.970
##  5 Communities                1142  1126           0.986
##  6 Company                     981   940           0.958
##  7 Conferences                2182  2063           0.945
##  8 Courses                    5992  5945           0.992
##  9 CurrentJobTitleSelect     11830     0           0    
## 10 DataScienceIdentitySelect 12671     0           0    
## # ℹ 37 more rows

library(ggplot2)
library(forcats)

# Assuming 'result' is the name of your previous result data frame
# Replace 'result' with the actual name if it's different

# Reorder the learning_platform factor by percentage of usefulness
result <- result %>%
  mutate(learning_platform = fct_reorder(learning_platform, perc_usefulness))

# Convert 'tot' to numeric for continuous fill scale
result$tot <- as.numeric(result$tot)

# Create the plot
ggplot(result, aes(x = learning_platform, y = perc_usefulness, fill = tot)) +
  geom_bar(stat = "identity", position = "stack") +
  scale_fill_gradient(low = "aquamarine1", high = "darkgoldenrod1") +
  coord_flip() +
  labs(title = "Percentage of Useful Responses by Learning Platform",
       x = "Learning Platform",
       y = "Percentage of Usefulness") +
  theme_minimal()

Final exam 112035156

Batjav

2024-01-09

Load libraries