library(tidyr)
library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(forcats)
library(tidyquant)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## ── Attaching core tidyquant packages ──────────────────────── tidyquant 1.0.9 ──
## ✔ PerformanceAnalytics 2.0.8      ✔ TTR                  0.24.4
## ✔ quantmod             0.4.26     ✔ xts                  0.14.1
## ── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date()                 masks base::as.Date()
## ✖ zoo::as.Date.numeric()         masks base::as.Date.numeric()
## ✖ dplyr::filter()                masks stats::filter()
## ✖ xts::first()                   masks dplyr::first()
## ✖ dplyr::lag()                   masks stats::lag()
## ✖ xts::last()                    masks dplyr::last()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary()            masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
multipleChoiceResponses <- read_csv("/cloud/project/multipleChoiceResponses.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 16716 Columns: 228
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (212): GenderSelect, Country, EmploymentStatus, StudentStatus, LearningD...
## dbl  (13): Age, LearningCategorySelftTaught, LearningCategoryOnlineCourses, ...
## num   (1): CompensationAmount
## lgl   (2): WorkToolsFrequencyAngoss, WorkToolsFrequencyKNIMECommercial
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(multipleChoiceResponses)
## Rows: 16,716
## Columns: 228
## $ GenderSelect                                <chr> "Non-binary, genderqueer, …
## $ Country                                     <chr> NA, "United States", "Cana…
## $ Age                                         <dbl> NA, 30, 28, 56, 38, 46, 35…
## $ EmploymentStatus                            <chr> "Employed full-time", "Not…
## $ StudentStatus                               <chr> NA, NA, NA, NA, NA, NA, NA…
## $ LearningDataScience                         <chr> NA, NA, NA, NA, NA, NA, NA…
## $ CodeWriter                                  <chr> "Yes", NA, NA, "Yes", "Yes…
## $ CareerSwitcher                              <chr> NA, NA, NA, NA, NA, NA, NA…
## $ CurrentJobTitleSelect                       <chr> "DBA/Database Engineer", N…
## $ TitleFit                                    <chr> "Fine", NA, NA, "Poorly", …
## $ CurrentEmployerType                         <chr> "Employed by a company tha…
## $ MLToolNextYearSelect                        <chr> "SAS Base", "Python", "Ama…
## $ MLMethodNextYearSelect                      <chr> "Random Forests", "Random …
## $ LanguageRecommendationSelect                <chr> "F#", "Python", "R", "Pyth…
## $ PublicDatasetsSelect                        <chr> "Dataset aggregator/platfo…
## $ LearningPlatformSelect                      <chr> "College/University,Confer…
## $ LearningPlatformUsefulnessArxiv             <chr> NA, NA, "Very useful", NA,…
## $ LearningPlatformUsefulnessBlogs             <chr> NA, NA, NA, "Very useful",…
## $ LearningPlatformUsefulnessCollege           <chr> NA, NA, "Somewhat useful",…
## $ LearningPlatformUsefulnessCompany           <chr> NA, NA, NA, NA, NA, NA, NA…
## $ LearningPlatformUsefulnessConferences       <chr> "Very useful", NA, NA, "Ve…
## $ LearningPlatformUsefulnessFriends           <chr> NA, NA, NA, "Very useful",…
## $ LearningPlatformUsefulnessKaggle            <chr> NA, "Somewhat useful", "So…
## $ LearningPlatformUsefulnessNewsletters       <chr> NA, NA, NA, NA, NA, NA, NA…
## $ LearningPlatformUsefulnessCommunities       <chr> NA, NA, NA, NA, NA, NA, NA…
## $ LearningPlatformUsefulnessDocumentation     <chr> NA, NA, NA, "Very useful",…
## $ LearningPlatformUsefulnessCourses           <chr> NA, NA, "Very useful", "Ve…
## $ LearningPlatformUsefulnessProjects          <chr> NA, NA, NA, "Very useful",…
## $ LearningPlatformUsefulnessPodcasts          <chr> "Very useful", NA, NA, NA,…
## $ LearningPlatformUsefulnessSO                <chr> NA, NA, NA, NA, NA, "Very …
## $ LearningPlatformUsefulnessTextbook          <chr> NA, NA, NA, NA, "Somewhat …
## $ LearningPlatformUsefulnessTradeBook         <chr> "Somewhat useful", NA, NA,…
## $ LearningPlatformUsefulnessTutoring          <chr> NA, NA, NA, NA, NA, NA, NA…
## $ LearningPlatformUsefulnessYouTube           <chr> NA, NA, "Very useful", NA,…
## $ BlogsPodcastsNewslettersSelect              <chr> "Becoming a Data Scientist…
## $ LearningDataScienceTime                     <chr> NA, "1-2 years", "1-2 year…
## $ JobSkillImportanceBigData                   <chr> NA, NA, "Necessary", NA, N…
## $ JobSkillImportanceDegree                    <chr> NA, "Nice to have", NA, NA…
## $ JobSkillImportanceStats                     <chr> NA, "Unnecessary", NA, NA,…
## $ JobSkillImportanceEnterpriseTools           <chr> NA, NA, NA, NA, NA, NA, NA…
## $ JobSkillImportancePython                    <chr> NA, "Unnecessary", NA, NA,…
## $ JobSkillImportanceR                         <chr> NA, NA, "Necessary", NA, N…
## $ JobSkillImportanceSQL                       <chr> NA, "Necessary", NA, NA, N…
## $ JobSkillImportanceKaggleRanking             <chr> NA, NA, NA, NA, NA, NA, NA…
## $ JobSkillImportanceMOOC                      <chr> NA, NA, NA, NA, NA, NA, NA…
## $ JobSkillImportanceVisualizations            <chr> NA, NA, NA, NA, NA, NA, NA…
## $ JobSkillImportanceOtherSelect1              <chr> NA, NA, NA, NA, NA, NA, NA…
## $ JobSkillImportanceOtherSelect2              <chr> NA, NA, NA, NA, NA, NA, NA…
## $ JobSkillImportanceOtherSelect3              <chr> NA, NA, NA, NA, NA, NA, NA…
## $ CoursePlatformSelect                        <chr> NA, NA, "Coursera,edX", NA…
## $ HardwarePersonalProjectsSelect              <chr> NA, NA, "Basic laptop (Mac…
## $ TimeSpentStudying                           <chr> NA, "2 - 10 hours", "2 - 1…
## $ ProveKnowledgeSelect                        <chr> NA, "Master's degree", "Gi…
## $ DataScienceIdentitySelect                   <chr> "Yes", "Yes", "Yes", "Yes"…
## $ FormalEducation                             <chr> "Bachelor's degree", "Mast…
## $ MajorSelect                                 <chr> "Management information sy…
## $ Tenure                                      <chr> "More than 10 years", "Les…
## $ PastJobTitlesSelect                         <chr> "Predictive Modeler,Progra…
## $ FirstTrainingSelect                         <chr> "University courses", "Uni…
## $ LearningCategorySelftTaught                 <dbl> 0, 10, 20, 30, 60, 45, 40,…
## $ LearningCategoryOnlineCourses               <dbl> 0, 30, 50, 0, 5, 25, 0, 40…
## $ LearningCategoryWork                        <dbl> 100, 0, 0, 40, 5, 20, 0, 0…
## $ LearningCategoryUniversity                  <dbl> 0, 30, 30, 30, 30, 0, 50, …
## $ LearningCategoryKaggle                      <dbl> 0, 30, 0, 0, 0, 10, 10, 10…
## $ LearningCategoryOther                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ MLSkillsSelect                              <chr> "Computer Vision,Natural L…
## $ MLTechniquesSelect                          <chr> "Evolutionary Approaches,N…
## $ ParentsEducation                            <chr> "A doctoral degree", "A ba…
## $ EmployerIndustry                            <chr> "Internet-based", NA, NA, …
## $ EmployerSize                                <chr> "100 to 499 employees", NA…
## $ EmployerSizeChange                          <chr> "Increased slightly", NA, …
## $ EmployerMLTime                              <chr> "3-5 years", NA, NA, NA, "…
## $ EmployerSearchMethod                        <chr> "I visited the company's W…
## $ UniversityImportance                        <chr> "Not very important", NA, …
## $ JobFunctionSelect                           <chr> "Build prototypes to explo…
## $ WorkHardwareSelect                          <chr> "Gaming Laptop (Laptop + C…
## $ WorkDataTypeSelect                          <chr> "Text data,Relational data…
## $ WorkProductionFrequency                     <chr> "Rarely", NA, NA, "Always"…
## $ WorkDatasetSize                             <chr> "10GB", NA, NA, "1GB", "10…
## $ WorkAlgorithmsSelect                        <chr> "Neural Networks,Random Fo…
## $ WorkToolsSelect                             <chr> "Amazon Web services,Oracl…
## $ WorkToolsFrequencyAmazonML                  <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyAWS                       <chr> "Rarely", NA, NA, "Often",…
## $ WorkToolsFrequencyAngoss                    <lgl> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyC                         <chr> NA, NA, NA, NA, "Most of t…
## $ WorkToolsFrequencyCloudera                  <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyDataRobot                 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyFlume                     <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyGCP                       <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyHadoop                    <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyIBMCognos                 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyIBMSPSSModeler            <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyIBMSPSSStatistics         <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyIBMWatson                 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyImpala                    <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyJava                      <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyJulia                     <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyJupyter                   <chr> NA, NA, NA, NA, "Sometimes…
## $ WorkToolsFrequencyKNIMECommercial           <lgl> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyKNIMEFree                 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyMathematica               <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyMATLAB                    <chr> NA, NA, NA, "Rarely", "Oft…
## $ WorkToolsFrequencyAzure                     <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyExcel                     <chr> NA, NA, NA, "Sometimes", N…
## $ WorkToolsFrequencyMicrosoftRServer          <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyMicrosoftSQL              <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyMinitab                   <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyNoSQL                     <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyOracle                    <chr> "Sometimes", NA, NA, NA, N…
## $ WorkToolsFrequencyOrange                    <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyPerl                      <chr> "Most of the time", NA, NA…
## $ WorkToolsFrequencyPython                    <chr> NA, NA, NA, "Rarely", "Som…
## $ WorkToolsFrequencyQlik                      <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyR                         <chr> NA, NA, NA, "Rarely", "Som…
## $ WorkToolsFrequencyRapidMinerCommercial      <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyRapidMinerFree            <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySalfrod                   <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySAPBusinessObjects        <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySASBase                   <chr> NA, NA, NA, "Sometimes", N…
## $ WorkToolsFrequencySASEnterprise             <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySASJMP                    <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencySpark                     <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySQL                       <chr> NA, NA, NA, "Often", NA, "…
## $ WorkToolsFrequencyStan                      <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyStatistica                <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyTableau                   <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyTensorFlow                <chr> NA, NA, NA, NA, "Sometimes…
## $ WorkToolsFrequencyTIBCO                     <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyUnix                      <chr> NA, NA, NA, NA, NA, NA, "M…
## $ WorkToolsFrequencySelect1                   <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySelect2                   <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkFrequencySelect3                        <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsSelect                           <chr> "Association Rules,Collabo…
## $ `WorkMethodsFrequencyA/B`                   <chr> NA, NA, NA, "Sometimes", N…
## $ WorkMethodsFrequencyAssociationRules        <chr> "Rarely", NA, NA, NA, "Som…
## $ WorkMethodsFrequencyBayesian                <chr> NA, NA, NA, "Sometimes", "…
## $ WorkMethodsFrequencyCNNs                    <chr> NA, NA, NA, NA, "Most of t…
## $ WorkMethodsFrequencyCollaborativeFiltering  <chr> "Often", NA, NA, NA, "Some…
## $ `WorkMethodsFrequencyCross-Validation`      <chr> NA, NA, NA, NA, NA, NA, "S…
## $ WorkMethodsFrequencyDataVisualization       <chr> NA, NA, NA, "Sometimes", "…
## $ WorkMethodsFrequencyDecisionTrees           <chr> NA, NA, NA, "Often", "Some…
## $ WorkMethodsFrequencyEnsembleMethods         <chr> NA, NA, NA, "Sometimes", "…
## $ WorkMethodsFrequencyEvolutionaryApproaches  <chr> NA, NA, NA, NA, "Sometimes…
## $ WorkMethodsFrequencyGANs                    <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyGBM                     <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyHMMs                    <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyKNN                     <chr> NA, NA, NA, NA, "Most of t…
## $ WorkMethodsFrequencyLiftAnalysis            <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyLogisticRegression      <chr> NA, NA, NA, "Sometimes", "…
## $ WorkMethodsFrequencyMLN                     <chr> NA, NA, NA, "Often", NA, N…
## $ WorkMethodsFrequencyNaiveBayes              <chr> NA, NA, NA, "Sometimes", "…
## $ WorkMethodsFrequencyNLP                     <chr> NA, NA, NA, NA, NA, NA, "M…
## $ WorkMethodsFrequencyNeuralNetworks          <chr> "Sometimes", NA, NA, "Some…
## $ WorkMethodsFrequencyPCA                     <chr> "Often", NA, NA, NA, "Some…
## $ WorkMethodsFrequencyPrescriptiveModeling    <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyRandomForests           <chr> "Most of the time", NA, NA…
## $ WorkMethodsFrequencyRecommenderSystems      <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyRNNs                    <chr> NA, NA, NA, NA, "Sometimes…
## $ WorkMethodsFrequencySegmentation            <chr> NA, NA, NA, NA, "Often", N…
## $ WorkMethodsFrequencySimulation              <chr> NA, NA, NA, "Often", NA, N…
## $ WorkMethodsFrequencySVMs                    <chr> NA, NA, NA, NA, "Most of t…
## $ WorkMethodsFrequencyTextAnalysis            <chr> NA, NA, NA, NA, NA, NA, "M…
## $ WorkMethodsFrequencyTimeSeriesAnalysis      <chr> NA, NA, NA, "Often", "Some…
## $ WorkMethodsFrequencySelect1                 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencySelect2                 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencySelect3                 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ TimeGatheringData                           <dbl> 0, NA, NA, 50, 30, 60, 30,…
## $ TimeModelBuilding                           <dbl> 100, NA, NA, 20, 20, 20, 1…
## $ TimeProduction                              <dbl> 0, NA, NA, 0, 15, 10, 30, …
## $ TimeVisualizing                             <dbl> 0, NA, NA, 10, 15, 10, 20,…
## $ TimeFindingInsights                         <dbl> 0, NA, NA, 20, 20, 0, 10, …
## $ TimeOtherSelect                             <dbl> 0, NA, NA, 0, 0, 0, 0, NA,…
## $ AlgorithmUnderstandingLevel                 <chr> "Enough to explain the alg…
## $ WorkChallengesSelect                        <chr> "Company politics / Lack o…
## $ WorkChallengeFrequencyPolitics              <chr> "Rarely", NA, NA, "Often",…
## $ WorkChallengeFrequencyUnusedResults         <chr> NA, NA, NA, "Often", "Some…
## $ WorkChallengeFrequencyUnusefulInstrumenting <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyDeployment            <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyDirtyData             <chr> NA, NA, NA, "Often", NA, "…
## $ WorkChallengeFrequencyExplaining            <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyPass                  <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkChallengeFrequencyIntegration           <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyTalent                <chr> NA, NA, NA, "Often", "Some…
## $ WorkChallengeFrequencyDataFunds             <chr> NA, NA, NA, "Often", "Some…
## $ WorkChallengeFrequencyDomainExpertise       <chr> NA, NA, NA, "Most of the t…
## $ WorkChallengeFrequencyML                    <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyTools                 <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyExpectations          <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyITCoordination        <chr> NA, NA, NA, NA, "Sometimes…
## $ WorkChallengeFrequencyHiringFunds           <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyPrivacy               <chr> "Often", NA, NA, "Often", …
## $ WorkChallengeFrequencyScaling               <chr> "Most of the time", NA, NA…
## $ WorkChallengeFrequencyEnvironments          <chr> NA, NA, NA, "Often", "Some…
## $ WorkChallengeFrequencyClarity               <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyDataAccess            <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyOtherSelect           <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkDataVisualizations                      <chr> "26-50% of projects", NA, …
## $ WorkInternalVsExternalTools                 <chr> "Do not know", NA, NA, "En…
## $ WorkMLTeamSeatSelect                        <chr> "Standalone Team", NA, NA,…
## $ WorkDatasets                                <chr> NA, NA, NA, "Electricity d…
## $ WorkDatasetsChallenge                       <chr> NA, NA, NA, "Everything is…
## $ WorkDataStorage                             <chr> "Document-oriented (e.g. M…
## $ WorkDataSharing                             <chr> "Company Developed Platfor…
## $ WorkDataSourcing                            <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkCodeSharing                             <chr> "Mercurial,Subversion,Othe…
## $ RemoteWork                                  <chr> "Always", NA, NA, NA, "Rar…
## $ CompensationAmount                          <dbl> NA, NA, NA, 250000, NA, NA…
## $ CompensationCurrency                        <chr> NA, NA, NA, "USD", NA, NA,…
## $ SalaryChange                                <chr> "I am not currently employ…
## $ JobSatisfaction                             <chr> "5", NA, NA, "10 - Highly …
## $ JobSearchResource                           <chr> NA, NA, "Asking friends, f…
## $ JobHuntTime                                 <chr> NA, NA, "1-2", NA, NA, NA,…
## $ JobFactorLearning                           <chr> NA, NA, "Very Important", …
## $ JobFactorSalary                             <chr> NA, NA, "Very Important", …
## $ JobFactorOffice                             <chr> NA, NA, "Very Important", …
## $ JobFactorLanguages                          <chr> NA, NA, "Very Important", …
## $ JobFactorCommute                            <chr> NA, NA, "Very Important", …
## $ JobFactorManagement                         <chr> NA, NA, "Very Important", …
## $ JobFactorExperienceLevel                    <chr> NA, NA, "Very Important", …
## $ JobFactorDepartment                         <chr> NA, NA, "Very Important", …
## $ JobFactorTitle                              <chr> NA, NA, "Very Important", …
## $ JobFactorCompanyFunding                     <chr> NA, NA, "Very Important", …
## $ JobFactorImpact                             <chr> NA, NA, "Very Important", …
## $ JobFactorRemote                             <chr> NA, NA, "Very Important", …
## $ JobFactorIndustry                           <chr> NA, NA, "Very Important", …
## $ JobFactorLeaderReputation                   <chr> NA, "Somewhat important", …
## $ JobFactorDiversity                          <chr> NA, NA, "Very Important", …
## $ JobFactorPublishingOpportunity              <chr> NA, NA, "Very Important", …
multipleChoiceResponses_1.1 <- multipleChoiceResponses %>% 
  select(starts_with("Learning"),
         starts_with("Work"),
         starts_with("Age"),
         starts_with("EmployerIndustry"),
         starts_with("CurrentJob"),
         starts_with("MLMethod"),
         starts_with("Formal")) %>%
  glimpse()
## Rows: 16,716
## Columns: 155
## $ LearningDataScience                         <chr> NA, NA, NA, NA, NA, NA, NA…
## $ LearningPlatformSelect                      <chr> "College/University,Confer…
## $ LearningPlatformUsefulnessArxiv             <chr> NA, NA, "Very useful", NA,…
## $ LearningPlatformUsefulnessBlogs             <chr> NA, NA, NA, "Very useful",…
## $ LearningPlatformUsefulnessCollege           <chr> NA, NA, "Somewhat useful",…
## $ LearningPlatformUsefulnessCompany           <chr> NA, NA, NA, NA, NA, NA, NA…
## $ LearningPlatformUsefulnessConferences       <chr> "Very useful", NA, NA, "Ve…
## $ LearningPlatformUsefulnessFriends           <chr> NA, NA, NA, "Very useful",…
## $ LearningPlatformUsefulnessKaggle            <chr> NA, "Somewhat useful", "So…
## $ LearningPlatformUsefulnessNewsletters       <chr> NA, NA, NA, NA, NA, NA, NA…
## $ LearningPlatformUsefulnessCommunities       <chr> NA, NA, NA, NA, NA, NA, NA…
## $ LearningPlatformUsefulnessDocumentation     <chr> NA, NA, NA, "Very useful",…
## $ LearningPlatformUsefulnessCourses           <chr> NA, NA, "Very useful", "Ve…
## $ LearningPlatformUsefulnessProjects          <chr> NA, NA, NA, "Very useful",…
## $ LearningPlatformUsefulnessPodcasts          <chr> "Very useful", NA, NA, NA,…
## $ LearningPlatformUsefulnessSO                <chr> NA, NA, NA, NA, NA, "Very …
## $ LearningPlatformUsefulnessTextbook          <chr> NA, NA, NA, NA, "Somewhat …
## $ LearningPlatformUsefulnessTradeBook         <chr> "Somewhat useful", NA, NA,…
## $ LearningPlatformUsefulnessTutoring          <chr> NA, NA, NA, NA, NA, NA, NA…
## $ LearningPlatformUsefulnessYouTube           <chr> NA, NA, "Very useful", NA,…
## $ LearningDataScienceTime                     <chr> NA, "1-2 years", "1-2 year…
## $ LearningCategorySelftTaught                 <dbl> 0, 10, 20, 30, 60, 45, 40,…
## $ LearningCategoryOnlineCourses               <dbl> 0, 30, 50, 0, 5, 25, 0, 40…
## $ LearningCategoryWork                        <dbl> 100, 0, 0, 40, 5, 20, 0, 0…
## $ LearningCategoryUniversity                  <dbl> 0, 30, 30, 30, 30, 0, 50, …
## $ LearningCategoryKaggle                      <dbl> 0, 30, 0, 0, 0, 10, 10, 10…
## $ LearningCategoryOther                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ WorkHardwareSelect                          <chr> "Gaming Laptop (Laptop + C…
## $ WorkDataTypeSelect                          <chr> "Text data,Relational data…
## $ WorkProductionFrequency                     <chr> "Rarely", NA, NA, "Always"…
## $ WorkDatasetSize                             <chr> "10GB", NA, NA, "1GB", "10…
## $ WorkAlgorithmsSelect                        <chr> "Neural Networks,Random Fo…
## $ WorkToolsSelect                             <chr> "Amazon Web services,Oracl…
## $ WorkToolsFrequencyAmazonML                  <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyAWS                       <chr> "Rarely", NA, NA, "Often",…
## $ WorkToolsFrequencyAngoss                    <lgl> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyC                         <chr> NA, NA, NA, NA, "Most of t…
## $ WorkToolsFrequencyCloudera                  <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyDataRobot                 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyFlume                     <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyGCP                       <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyHadoop                    <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyIBMCognos                 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyIBMSPSSModeler            <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyIBMSPSSStatistics         <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyIBMWatson                 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyImpala                    <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyJava                      <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyJulia                     <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyJupyter                   <chr> NA, NA, NA, NA, "Sometimes…
## $ WorkToolsFrequencyKNIMECommercial           <lgl> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyKNIMEFree                 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyMathematica               <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyMATLAB                    <chr> NA, NA, NA, "Rarely", "Oft…
## $ WorkToolsFrequencyAzure                     <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyExcel                     <chr> NA, NA, NA, "Sometimes", N…
## $ WorkToolsFrequencyMicrosoftRServer          <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyMicrosoftSQL              <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyMinitab                   <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyNoSQL                     <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyOracle                    <chr> "Sometimes", NA, NA, NA, N…
## $ WorkToolsFrequencyOrange                    <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyPerl                      <chr> "Most of the time", NA, NA…
## $ WorkToolsFrequencyPython                    <chr> NA, NA, NA, "Rarely", "Som…
## $ WorkToolsFrequencyQlik                      <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyR                         <chr> NA, NA, NA, "Rarely", "Som…
## $ WorkToolsFrequencyRapidMinerCommercial      <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyRapidMinerFree            <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySalfrod                   <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySAPBusinessObjects        <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySASBase                   <chr> NA, NA, NA, "Sometimes", N…
## $ WorkToolsFrequencySASEnterprise             <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySASJMP                    <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencySpark                     <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySQL                       <chr> NA, NA, NA, "Often", NA, "…
## $ WorkToolsFrequencyStan                      <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyStatistica                <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyTableau                   <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyTensorFlow                <chr> NA, NA, NA, NA, "Sometimes…
## $ WorkToolsFrequencyTIBCO                     <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyUnix                      <chr> NA, NA, NA, NA, NA, NA, "M…
## $ WorkToolsFrequencySelect1                   <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySelect2                   <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkFrequencySelect3                        <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsSelect                           <chr> "Association Rules,Collabo…
## $ `WorkMethodsFrequencyA/B`                   <chr> NA, NA, NA, "Sometimes", N…
## $ WorkMethodsFrequencyAssociationRules        <chr> "Rarely", NA, NA, NA, "Som…
## $ WorkMethodsFrequencyBayesian                <chr> NA, NA, NA, "Sometimes", "…
## $ WorkMethodsFrequencyCNNs                    <chr> NA, NA, NA, NA, "Most of t…
## $ WorkMethodsFrequencyCollaborativeFiltering  <chr> "Often", NA, NA, NA, "Some…
## $ `WorkMethodsFrequencyCross-Validation`      <chr> NA, NA, NA, NA, NA, NA, "S…
## $ WorkMethodsFrequencyDataVisualization       <chr> NA, NA, NA, "Sometimes", "…
## $ WorkMethodsFrequencyDecisionTrees           <chr> NA, NA, NA, "Often", "Some…
## $ WorkMethodsFrequencyEnsembleMethods         <chr> NA, NA, NA, "Sometimes", "…
## $ WorkMethodsFrequencyEvolutionaryApproaches  <chr> NA, NA, NA, NA, "Sometimes…
## $ WorkMethodsFrequencyGANs                    <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyGBM                     <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyHMMs                    <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyKNN                     <chr> NA, NA, NA, NA, "Most of t…
## $ WorkMethodsFrequencyLiftAnalysis            <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyLogisticRegression      <chr> NA, NA, NA, "Sometimes", "…
## $ WorkMethodsFrequencyMLN                     <chr> NA, NA, NA, "Often", NA, N…
## $ WorkMethodsFrequencyNaiveBayes              <chr> NA, NA, NA, "Sometimes", "…
## $ WorkMethodsFrequencyNLP                     <chr> NA, NA, NA, NA, NA, NA, "M…
## $ WorkMethodsFrequencyNeuralNetworks          <chr> "Sometimes", NA, NA, "Some…
## $ WorkMethodsFrequencyPCA                     <chr> "Often", NA, NA, NA, "Some…
## $ WorkMethodsFrequencyPrescriptiveModeling    <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyRandomForests           <chr> "Most of the time", NA, NA…
## $ WorkMethodsFrequencyRecommenderSystems      <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyRNNs                    <chr> NA, NA, NA, NA, "Sometimes…
## $ WorkMethodsFrequencySegmentation            <chr> NA, NA, NA, NA, "Often", N…
## $ WorkMethodsFrequencySimulation              <chr> NA, NA, NA, "Often", NA, N…
## $ WorkMethodsFrequencySVMs                    <chr> NA, NA, NA, NA, "Most of t…
## $ WorkMethodsFrequencyTextAnalysis            <chr> NA, NA, NA, NA, NA, NA, "M…
## $ WorkMethodsFrequencyTimeSeriesAnalysis      <chr> NA, NA, NA, "Often", "Some…
## $ WorkMethodsFrequencySelect1                 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencySelect2                 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencySelect3                 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkChallengesSelect                        <chr> "Company politics / Lack o…
## $ WorkChallengeFrequencyPolitics              <chr> "Rarely", NA, NA, "Often",…
## $ WorkChallengeFrequencyUnusedResults         <chr> NA, NA, NA, "Often", "Some…
## $ WorkChallengeFrequencyUnusefulInstrumenting <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyDeployment            <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyDirtyData             <chr> NA, NA, NA, "Often", NA, "…
## $ WorkChallengeFrequencyExplaining            <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyPass                  <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkChallengeFrequencyIntegration           <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyTalent                <chr> NA, NA, NA, "Often", "Some…
## $ WorkChallengeFrequencyDataFunds             <chr> NA, NA, NA, "Often", "Some…
## $ WorkChallengeFrequencyDomainExpertise       <chr> NA, NA, NA, "Most of the t…
## $ WorkChallengeFrequencyML                    <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyTools                 <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyExpectations          <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyITCoordination        <chr> NA, NA, NA, NA, "Sometimes…
## $ WorkChallengeFrequencyHiringFunds           <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyPrivacy               <chr> "Often", NA, NA, "Often", …
## $ WorkChallengeFrequencyScaling               <chr> "Most of the time", NA, NA…
## $ WorkChallengeFrequencyEnvironments          <chr> NA, NA, NA, "Often", "Some…
## $ WorkChallengeFrequencyClarity               <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyDataAccess            <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyOtherSelect           <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkDataVisualizations                      <chr> "26-50% of projects", NA, …
## $ WorkInternalVsExternalTools                 <chr> "Do not know", NA, NA, "En…
## $ WorkMLTeamSeatSelect                        <chr> "Standalone Team", NA, NA,…
## $ WorkDatasets                                <chr> NA, NA, NA, "Electricity d…
## $ WorkDatasetsChallenge                       <chr> NA, NA, NA, "Everything is…
## $ WorkDataStorage                             <chr> "Document-oriented (e.g. M…
## $ WorkDataSharing                             <chr> "Company Developed Platfor…
## $ WorkDataSourcing                            <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkCodeSharing                             <chr> "Mercurial,Subversion,Othe…
## $ Age                                         <dbl> NA, 30, 28, 56, 38, 46, 35…
## $ EmployerIndustry                            <chr> "Internet-based", NA, NA, …
## $ CurrentJobTitleSelect                       <chr> "DBA/Database Engineer", N…
## $ MLMethodNextYearSelect                      <chr> "Random Forests", "Random …
## $ FormalEducation                             <chr> "Bachelor's degree", "Mast…
multipleChoiceResponses_fct <- multipleChoiceResponses_1.1 %>% 
  mutate(across(where(is.character),as.factor))

number_of_levels <- multipleChoiceResponses_fct %>% 
  summarise(across(everything(),nlevels)) %>% 
  gather(key = "variable", value = "num_levels") %>% 
  glimpse()
## Rows: 155
## Columns: 2
## $ variable   <chr> "LearningDataScience", "LearningPlatformSelect", "LearningP…
## $ num_levels <int> 3, 5362, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
number_of_levels %>% arrange(desc(num_levels)) %>% slice_head(n = 5)
## # A tibble: 5 × 2
##   variable               num_levels
##   <chr>                       <int>
## 1 WorkMethodsSelect            6190
## 2 LearningPlatformSelect       5362
## 3 WorkToolsSelect              5248
## 4 WorkChallengesSelect         4287
## 5 WorkDatasetsChallenge        2220
nlevels(multipleChoiceResponses_fct$CurrentJobTitleSelect)
## [1] 16
levels(multipleChoiceResponses_fct$CurrentJobTitleSelect)
##  [1] "Business Analyst"                    
##  [2] "Computer Scientist"                  
##  [3] "Data Analyst"                        
##  [4] "Data Miner"                          
##  [5] "Data Scientist"                      
##  [6] "DBA/Database Engineer"               
##  [7] "Engineer"                            
##  [8] "Machine Learning Engineer"           
##  [9] "Operations Research Practitioner"    
## [10] "Other"                               
## [11] "Predictive Modeler"                  
## [12] "Programmer"                          
## [13] "Researcher"                          
## [14] "Scientist/Researcher"                
## [15] "Software Developer/Software Engineer"
## [16] "Statistician"
EmployerIndustry_2.1 <- multipleChoiceResponses_fct %>% 
  select(EmployerIndustry) %>% 
  group_by(EmployerIndustry) %>% 
  summarise(count = n())

EmployerIndustry_2.1 %>% 
  ggplot(aes(x=EmployerIndustry, y = count))+
  geom_col()+
  coord_flip()

EmployerIndustry_2.2 <- multipleChoiceResponses_fct %>% 
  filter(!is.na(Age),!is.na(EmployerIndustry)) %>% 
  select(EmployerIndustry) %>% 
  group_by(EmployerIndustry) %>% 
  summarise(count = n())

EmployerIndustry_2.2 %>% 
  ggplot(aes(x=EmployerIndustry, y = count))+
  geom_col()+
  coord_flip()

EmployerIndustry_2.3 <- EmployerIndustry_2.2 %>% 
  mutate(EmployerIndustry = EmployerIndustry %>% 
           fct_reorder(count)) %>% 
  mutate(EmployerIndustry = EmployerIndustry %>%
           fct_relevel("Other",after = 0))

EmployerIndustry_2.3 %>%
  ggplot(aes(x=EmployerIndustry, y = count))+
  geom_segment(aes(xend = EmployerIndustry, yend = 0))+
  geom_point()+
  coord_flip()+
  labs(y = "n")

levels(multipleChoiceResponses_fct$WorkInternalVsExternalTools)
## [1] "Approximately half internal and half external"
## [2] "Do not know"                                  
## [3] "Entirely external"                            
## [4] "Entirely internal"                            
## [5] "More external than internal"                  
## [6] "More internal than external"
WorkInternalVsExternal_reorder <- multipleChoiceResponses_fct %>%  
  mutate(WorkInternalVsExternalTools = WorkInternalVsExternalTools %>%  
           fct_relevel(c("Entirely internal",
                         "More internal than external",
                         "Approximately half internal and half external",
                         "More external than internal",
                         "Entirely external",
                         "Do not know")))
levels(WorkInternalVsExternal_reorder$WorkInternalVsExternalTools)
## [1] "Entirely internal"                            
## [2] "More internal than external"                  
## [3] "Approximately half internal and half external"
## [4] "More external than internal"                  
## [5] "Entirely external"                            
## [6] "Do not know"
WorkInternalVsExternal_reorder %>% select(WorkInternalVsExternalTools) %>% group_by(WorkInternalVsExternalTools) %>% summarise(count = n()) %>% 
  ggplot(aes(x=WorkInternalVsExternalTools, y = count))+
  geom_col()+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))