This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(readr)
library(tidyr)
library(tidyquant)
## Warning: package 'tidyquant' was built under R version 4.3.2
## Loading required package: lubridate
## Warning: package 'lubridate' was built under R version 4.3.2
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
## Loading required package: PerformanceAnalytics
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
## Loading required package: quantmod
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.2
library(forcats)
## Warning: package 'forcats' was built under R version 4.3.2
library(tidyquant)
#question_1
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.2
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:xts':
##
## first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
responses <- read.csv("C:/Users/Badamkhand/Downloads/multipleChoiceResponses.csv", header = TRUE, stringsAsFactors = FALSE)
selected_columns <- responses %>%
select(
starts_with("Learning"),
starts_with("Working"),
starts_with("Age"),
starts_with("EmploymentStatus"),
starts_with("CurrentJob"),
starts_with("MLMethod"),
starts_with("Formal")
)
char_columns <- sapply(selected_columns, is.character)
selected_columns[, char_columns] <- lapply(selected_columns[, char_columns], as.factor)
# 1.2
factor_levels <- data.frame(
variable = names(selected_columns),
num_levels = sapply(selected_columns, nlevels)
)
# 1.2
print(factor_levels)
## variable
## LearningDataScience LearningDataScience
## LearningPlatformSelect LearningPlatformSelect
## LearningPlatformUsefulnessArxiv LearningPlatformUsefulnessArxiv
## LearningPlatformUsefulnessBlogs LearningPlatformUsefulnessBlogs
## LearningPlatformUsefulnessCollege LearningPlatformUsefulnessCollege
## LearningPlatformUsefulnessCompany LearningPlatformUsefulnessCompany
## LearningPlatformUsefulnessConferences LearningPlatformUsefulnessConferences
## LearningPlatformUsefulnessFriends LearningPlatformUsefulnessFriends
## LearningPlatformUsefulnessKaggle LearningPlatformUsefulnessKaggle
## LearningPlatformUsefulnessNewsletters LearningPlatformUsefulnessNewsletters
## LearningPlatformUsefulnessCommunities LearningPlatformUsefulnessCommunities
## LearningPlatformUsefulnessDocumentation LearningPlatformUsefulnessDocumentation
## LearningPlatformUsefulnessCourses LearningPlatformUsefulnessCourses
## LearningPlatformUsefulnessProjects LearningPlatformUsefulnessProjects
## LearningPlatformUsefulnessPodcasts LearningPlatformUsefulnessPodcasts
## LearningPlatformUsefulnessSO LearningPlatformUsefulnessSO
## LearningPlatformUsefulnessTextbook LearningPlatformUsefulnessTextbook
## LearningPlatformUsefulnessTradeBook LearningPlatformUsefulnessTradeBook
## LearningPlatformUsefulnessTutoring LearningPlatformUsefulnessTutoring
## LearningPlatformUsefulnessYouTube LearningPlatformUsefulnessYouTube
## LearningDataScienceTime LearningDataScienceTime
## LearningCategorySelftTaught LearningCategorySelftTaught
## LearningCategoryOnlineCourses LearningCategoryOnlineCourses
## LearningCategoryWork LearningCategoryWork
## LearningCategoryUniversity LearningCategoryUniversity
## LearningCategoryKaggle LearningCategoryKaggle
## LearningCategoryOther LearningCategoryOther
## Age Age
## EmploymentStatus EmploymentStatus
## CurrentJobTitleSelect CurrentJobTitleSelect
## MLMethodNextYearSelect MLMethodNextYearSelect
## FormalEducation FormalEducation
## num_levels
## LearningDataScience 4
## LearningPlatformSelect 5363
## LearningPlatformUsefulnessArxiv 4
## LearningPlatformUsefulnessBlogs 4
## LearningPlatformUsefulnessCollege 4
## LearningPlatformUsefulnessCompany 4
## LearningPlatformUsefulnessConferences 4
## LearningPlatformUsefulnessFriends 4
## LearningPlatformUsefulnessKaggle 4
## LearningPlatformUsefulnessNewsletters 4
## LearningPlatformUsefulnessCommunities 4
## LearningPlatformUsefulnessDocumentation 4
## LearningPlatformUsefulnessCourses 4
## LearningPlatformUsefulnessProjects 4
## LearningPlatformUsefulnessPodcasts 4
## LearningPlatformUsefulnessSO 4
## LearningPlatformUsefulnessTextbook 4
## LearningPlatformUsefulnessTradeBook 4
## LearningPlatformUsefulnessTutoring 4
## LearningPlatformUsefulnessYouTube 4
## LearningDataScienceTime 7
## LearningCategorySelftTaught 0
## LearningCategoryOnlineCourses 0
## LearningCategoryWork 0
## LearningCategoryUniversity 0
## LearningCategoryKaggle 0
## LearningCategoryOther 0
## Age 0
## EmploymentStatus 7
## CurrentJobTitleSelect 17
## MLMethodNextYearSelect 26
## FormalEducation 8
# 1.2
top5_levels <- factor_levels %>%
arrange(desc(num_levels)) %>%
head(5)
# 1.2
print(top5_levels)
## variable num_levels
## LearningPlatformSelect LearningPlatformSelect 5363
## MLMethodNextYearSelect MLMethodNextYearSelect 26
## CurrentJobTitleSelect CurrentJobTitleSelect 17
## FormalEducation FormalEducation 8
## LearningDataScienceTime LearningDataScienceTime 7
# 1.3
current_job_levels <- factor_levels %>%
filter(variable == "CurrentJobTitleSelect")
# 1.3
print(current_job_levels)
## variable num_levels
## CurrentJobTitleSelect CurrentJobTitleSelect 17
# 1.4
current_job_levels <- factor_levels %>%
filter(variable == "CurrentJobTitleSelect")
print(current_job_levels)
## variable num_levels
## CurrentJobTitleSelect CurrentJobTitleSelect 17
library(ggplot2)
employer_industry_plot <- ggplot(responses, aes(x = EmployerIndustry)) +
geom_bar() +
coord_flip()
print(employer_industry_plot)
#question_2
#2.1
library(ggplot2)
responses <- read.csv("C:/Users/Badamkhand/Downloads/multipleChoiceResponses.csv", header = TRUE, stringsAsFactors = FALSE)
employer_industry_plot <- ggplot(responses, aes(x = EmployerIndustry)) +
geom_bar(stat = "count") +
coord_flip()
print(employer_industry_plot)
#2.2
library(ggplot2)
responses <- read.csv("C:/Users/Badamkhand/Downloads/multipleChoiceResponses.csv", header = TRUE, stringsAsFactors = FALSE)
filtered_responses <- na.omit(responses[c("Age", "EmployerIndustry")])
filtered_plot <- ggplot(filtered_responses, aes(x = EmployerIndustry)) +
geom_bar(stat = "count") +
coord_flip()
print(filtered_plot)
#2.3
library(ggplot2)
responses <- read.csv("C:/Users/Badamkhand/Downloads/multipleChoiceResponses.csv", header = TRUE, stringsAsFactors = FALSE)
filtered_responses <- na.omit(responses[c("Age", "EmployerIndustry")])
ordered_plot <- ggplot(filtered_responses, aes(x = reorder(EmployerIndustry, -table(EmployerIndustry)[EmployerIndustry]), y = Age)) +
geom_segment(aes(xend = reorder(EmployerIndustry, -table(EmployerIndustry)[EmployerIndustry]), yend = 0), size = 1) +
coord_flip()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
print(ordered_plot)
#3
responses <- read.csv("C:/Users/Badamkhand/Downloads/multipleChoiceResponses.csv", header = TRUE, stringsAsFactors = FALSE)
work_levels <- levels(responses$WorkInternalVsExternalTools)
custom_order <- c(
"Entirely internal",
"More internal than external",
"Approximately half internal and half external",
"More external than internal",
"Entirely external",
"Do not know"
)
responses$WorkInternalVsExternalTools <- factor(
responses$WorkInternalVsExternalTools,
levels = custom_order
)
work_plot <- ggplot(responses, aes(x = WorkInternalVsExternalTools)) +
geom_bar(stat = "count") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(work_plot)
You can also embed plots, for example:
Note that the echo = FALSE
parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.