#Load library
install.packages('tidyr')
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
install.packages('readr')
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
install.packages('ggplot2')
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
install.packages('forcats')
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
install.packages('tidyquant')
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
library('tidyr')
library('readr')
library('dplyr')
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library('ggplot2')
library('forcats')
library('tidyquant')
## Loading required package: lubridate
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
## Loading required package: PerformanceAnalytics
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
## Loading required package: quantmod
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
#Import data
multiple_choice_responses <- read_csv("multipleChoiceResponses.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 16716 Columns: 228
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (212): GenderSelect, Country, EmploymentStatus, StudentStatus, LearningD...
## dbl (13): Age, LearningCategorySelftTaught, LearningCategoryOnlineCourses, ...
## num (1): CompensationAmount
## lgl (2): WorkToolsFrequencyAngoss, WorkToolsFrequencyKNIMECommercial
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Part 1 ----
# 1.1
library(dplyr)
selected_columns <- multiple_choice_responses %>%
select(
starts_with("Learning"),
starts_with("Working"),
starts_with("Age"),
starts_with("EmploymentStatus"),
starts_with("CurrentJob"),
starts_with("MLMethod"),
starts_with("Formal")
)
char_columns <- sapply(selected_columns, is.character)
selected_columns[, char_columns] <- lapply(selected_columns[, char_columns], as.factor)
# 1.2
factor_levels <- data.frame(
variable = names(selected_columns),
num_levels = sapply(selected_columns, nlevels)
)
# 1.2
print(factor_levels)
## variable
## LearningDataScience LearningDataScience
## LearningPlatformSelect LearningPlatformSelect
## LearningPlatformUsefulnessArxiv LearningPlatformUsefulnessArxiv
## LearningPlatformUsefulnessBlogs LearningPlatformUsefulnessBlogs
## LearningPlatformUsefulnessCollege LearningPlatformUsefulnessCollege
## LearningPlatformUsefulnessCompany LearningPlatformUsefulnessCompany
## LearningPlatformUsefulnessConferences LearningPlatformUsefulnessConferences
## LearningPlatformUsefulnessFriends LearningPlatformUsefulnessFriends
## LearningPlatformUsefulnessKaggle LearningPlatformUsefulnessKaggle
## LearningPlatformUsefulnessNewsletters LearningPlatformUsefulnessNewsletters
## LearningPlatformUsefulnessCommunities LearningPlatformUsefulnessCommunities
## LearningPlatformUsefulnessDocumentation LearningPlatformUsefulnessDocumentation
## LearningPlatformUsefulnessCourses LearningPlatformUsefulnessCourses
## LearningPlatformUsefulnessProjects LearningPlatformUsefulnessProjects
## LearningPlatformUsefulnessPodcasts LearningPlatformUsefulnessPodcasts
## LearningPlatformUsefulnessSO LearningPlatformUsefulnessSO
## LearningPlatformUsefulnessTextbook LearningPlatformUsefulnessTextbook
## LearningPlatformUsefulnessTradeBook LearningPlatformUsefulnessTradeBook
## LearningPlatformUsefulnessTutoring LearningPlatformUsefulnessTutoring
## LearningPlatformUsefulnessYouTube LearningPlatformUsefulnessYouTube
## LearningDataScienceTime LearningDataScienceTime
## LearningCategorySelftTaught LearningCategorySelftTaught
## LearningCategoryOnlineCourses LearningCategoryOnlineCourses
## LearningCategoryWork LearningCategoryWork
## LearningCategoryUniversity LearningCategoryUniversity
## LearningCategoryKaggle LearningCategoryKaggle
## LearningCategoryOther LearningCategoryOther
## Age Age
## EmploymentStatus EmploymentStatus
## CurrentJobTitleSelect CurrentJobTitleSelect
## MLMethodNextYearSelect MLMethodNextYearSelect
## FormalEducation FormalEducation
## num_levels
## LearningDataScience 3
## LearningPlatformSelect 5362
## LearningPlatformUsefulnessArxiv 3
## LearningPlatformUsefulnessBlogs 3
## LearningPlatformUsefulnessCollege 3
## LearningPlatformUsefulnessCompany 3
## LearningPlatformUsefulnessConferences 3
## LearningPlatformUsefulnessFriends 3
## LearningPlatformUsefulnessKaggle 3
## LearningPlatformUsefulnessNewsletters 3
## LearningPlatformUsefulnessCommunities 3
## LearningPlatformUsefulnessDocumentation 3
## LearningPlatformUsefulnessCourses 3
## LearningPlatformUsefulnessProjects 3
## LearningPlatformUsefulnessPodcasts 3
## LearningPlatformUsefulnessSO 3
## LearningPlatformUsefulnessTextbook 3
## LearningPlatformUsefulnessTradeBook 3
## LearningPlatformUsefulnessTutoring 3
## LearningPlatformUsefulnessYouTube 3
## LearningDataScienceTime 6
## LearningCategorySelftTaught 0
## LearningCategoryOnlineCourses 0
## LearningCategoryWork 0
## LearningCategoryUniversity 0
## LearningCategoryKaggle 0
## LearningCategoryOther 0
## Age 0
## EmploymentStatus 7
## CurrentJobTitleSelect 16
## MLMethodNextYearSelect 25
## FormalEducation 7
# 1.2
top5_levels <- factor_levels %>%
arrange(desc(num_levels)) %>%
head(5)
# 1.2
print(top5_levels)
## variable num_levels
## LearningPlatformSelect LearningPlatformSelect 5362
## MLMethodNextYearSelect MLMethodNextYearSelect 25
## CurrentJobTitleSelect CurrentJobTitleSelect 16
## EmploymentStatus EmploymentStatus 7
## FormalEducation FormalEducation 7
# 1.3
current_job_levels <- factor_levels %>%
filter(variable == "CurrentJobTitleSelect")
# 1.3
print(current_job_levels)
## variable num_levels
## CurrentJobTitleSelect CurrentJobTitleSelect 16
# 1.4
current_job_levels <- factor_levels %>%
filter(variable == "CurrentJobTitleSelect")
print(current_job_levels)
## variable num_levels
## CurrentJobTitleSelect CurrentJobTitleSelect 16
employer_industry_plot <- ggplot(multiple_choice_responses, aes(x = EmployerIndustry)) +
geom_bar() +
coord_flip()
print(employer_industry_plot)

summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00