final exam

library(readr)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr)

# Read the CSV file
your_dataset <- read_csv("multipleChoiceResponses1.csv")

## Rows: 16716 Columns: 47

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (46): LearningPlatformUsefulnessArxiv, LearningPlatformUsefulnessBlogs, ...
## dbl  (1): Age
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Select relevant columns and perform the required operations
result <- your_dataset %>%
  select(starts_with("LearningPlatformUsefulness")) %>%
  gather(key = "learning_platform", value = "usefulness", na.rm = TRUE) %>%
  mutate(learning_platform = gsub('LearningPlatformUsefulness', '', learning_platform)) %>%
  count(learning_platform, usefulness)

print(result)

## # A tibble: 54 × 3
##    learning_platform usefulness          n
##    <chr>             <chr>           <int>
##  1 Arxiv             Not Useful         37
##  2 Arxiv             Somewhat useful  1038
##  3 Arxiv             Very useful      1316
##  4 Blogs             Not Useful         45
##  5 Blogs             Somewhat useful  2406
##  6 Blogs             Very useful      2314
##  7 College           Not Useful        101
##  8 College           Somewhat useful  1405
##  9 College           Very useful      1853
## 10 Communities       Not Useful         16
## # ℹ 44 more rows

library(dplyr)
library(tidyr)

# Assuming your dataset is named 'your_dataset'
# Replace 'your_dataset' with the actual name of your data frame

result <- your_dataset %>%
  gather(key = "learning_platform", value = "usefulness", na.rm = TRUE) %>%
  mutate(learning_platform = gsub('LearningPlatformUsefulness', '', learning_platform),
         perc_usefulness = as.numeric(usefulness == "Very useful" | usefulness == "Somewhat useful")) %>%
  group_by(learning_platform) %>%
  summarise(count = n(),
            tot = sum(perc_usefulness),
            perc_usefulness = mean(perc_usefulness))

print(result)

## # A tibble: 47 × 4
##    learning_platform         count   tot perc_usefulness
##    <chr>                     <int> <dbl>           <dbl>
##  1 Age                       16385     0           0    
##  2 Arxiv                      2391  2354           0.985
##  3 Blogs                      4765  4720           0.991
##  4 College                    3359  3258           0.970
##  5 Communities                1142  1126           0.986
##  6 Company                     981   940           0.958
##  7 Conferences                2182  2063           0.945
##  8 Courses                    5992  5945           0.992
##  9 CurrentJobTitleSelect     11830     0           0    
## 10 DataScienceIdentitySelect 12671     0           0    
## # ℹ 37 more rows

library(ggplot2)
library(forcats)

# Assuming 'result' is the name of your previous result data frame
# Replace 'result' with the actual name if it's different

# Reorder the learning_platform factor by percentage of usefulness
result <- result %>%
  mutate(learning_platform = fct_reorder(learning_platform, perc_usefulness))

# Convert 'tot' to numeric for continuous fill scale
result$tot <- as.numeric(result$tot)

# Create the plot
ggplot(result, aes(x = learning_platform, y = perc_usefulness, fill = tot)) +
  geom_bar(stat = "identity", position = "stack") +
  scale_fill_gradient(low = "lightblue", high = "darkblue") +
  coord_flip() +
  labs(title = "Percentage of Useful Responses by Learning Platform",
       x = "Learning Platform",
       y = "Percentage of Usefulness") +
  theme_minimal()

final exam

Uyanga Altansukh

2024-01-02