library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
# Read the CSV file
your_dataset <- read_csv("multipleChoiceResponses1.csv")
## Rows: 16716 Columns: 47
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (46): LearningPlatformUsefulnessArxiv, LearningPlatformUsefulnessBlogs, ...
## dbl (1): Age
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Select relevant columns and perform the required operations
result <- your_dataset %>%
select(starts_with("LearningPlatformUsefulness")) %>%
gather(key = "learning_platform", value = "usefulness", na.rm = TRUE) %>%
mutate(learning_platform = gsub('LearningPlatformUsefulness', '', learning_platform)) %>%
count(learning_platform, usefulness)
print(result)
## # A tibble: 54 × 3
## learning_platform usefulness n
## <chr> <chr> <int>
## 1 Arxiv Not Useful 37
## 2 Arxiv Somewhat useful 1038
## 3 Arxiv Very useful 1316
## 4 Blogs Not Useful 45
## 5 Blogs Somewhat useful 2406
## 6 Blogs Very useful 2314
## 7 College Not Useful 101
## 8 College Somewhat useful 1405
## 9 College Very useful 1853
## 10 Communities Not Useful 16
## # ℹ 44 more rows
library(dplyr)
library(tidyr)
# Assuming your dataset is named 'your_dataset'
# Replace 'your_dataset' with the actual name of your data frame
result <- your_dataset %>%
gather(key = "learning_platform", value = "usefulness", na.rm = TRUE) %>%
mutate(learning_platform = gsub('LearningPlatformUsefulness', '', learning_platform),
perc_usefulness = as.numeric(usefulness == "Very useful" | usefulness == "Somewhat useful")) %>%
group_by(learning_platform) %>%
summarise(count = n(),
tot = sum(perc_usefulness),
perc_usefulness = mean(perc_usefulness))
print(result)
## # A tibble: 47 × 4
## learning_platform count tot perc_usefulness
## <chr> <int> <dbl> <dbl>
## 1 Age 16385 0 0
## 2 Arxiv 2391 2354 0.985
## 3 Blogs 4765 4720 0.991
## 4 College 3359 3258 0.970
## 5 Communities 1142 1126 0.986
## 6 Company 981 940 0.958
## 7 Conferences 2182 2063 0.945
## 8 Courses 5992 5945 0.992
## 9 CurrentJobTitleSelect 11830 0 0
## 10 DataScienceIdentitySelect 12671 0 0
## # ℹ 37 more rows
library(ggplot2)
library(forcats)
# Assuming 'result' is the name of your previous result data frame
# Replace 'result' with the actual name if it's different
# Reorder the learning_platform factor by percentage of usefulness
result <- result %>%
mutate(learning_platform = fct_reorder(learning_platform, perc_usefulness))
# Convert 'tot' to numeric for continuous fill scale
result$tot <- as.numeric(result$tot)
# Create the plot
ggplot(result, aes(x = learning_platform, y = perc_usefulness, fill = tot)) +
geom_bar(stat = "identity", position = "stack") +
scale_fill_gradient(low = "lightblue", high = "darkblue") +
coord_flip() +
labs(title = "Percentage of Useful Responses by Learning Platform",
x = "Learning Platform",
y = "Percentage of Usefulness") +
theme_minimal()
