library(tidyr)
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(forcats)
library(tidyquant)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## ── Attaching core tidyquant packages ──────────────────────── tidyquant 1.0.9 ──
## ✔ PerformanceAnalytics 2.0.8 ✔ TTR 0.24.4
## ✔ quantmod 0.4.26 ✔ xts 0.14.1
## ── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date() masks base::as.Date()
## ✖ zoo::as.Date.numeric() masks base::as.Date.numeric()
## ✖ dplyr::filter() masks stats::filter()
## ✖ xts::first() masks dplyr::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ xts::last() masks dplyr::last()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary() masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
multipleChoiceResponses <- read_csv("/cloud/project/multipleChoiceResponses.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 16716 Columns: 228
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (212): GenderSelect, Country, EmploymentStatus, StudentStatus, LearningD...
## dbl (13): Age, LearningCategorySelftTaught, LearningCategoryOnlineCourses, ...
## num (1): CompensationAmount
## lgl (2): WorkToolsFrequencyAngoss, WorkToolsFrequencyKNIMECommercial
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(multipleChoiceResponses)
## Rows: 16,716
## Columns: 228
## $ GenderSelect <chr> "Non-binary, genderqueer, …
## $ Country <chr> NA, "United States", "Cana…
## $ Age <dbl> NA, 30, 28, 56, 38, 46, 35…
## $ EmploymentStatus <chr> "Employed full-time", "Not…
## $ StudentStatus <chr> NA, NA, NA, NA, NA, NA, NA…
## $ LearningDataScience <chr> NA, NA, NA, NA, NA, NA, NA…
## $ CodeWriter <chr> "Yes", NA, NA, "Yes", "Yes…
## $ CareerSwitcher <chr> NA, NA, NA, NA, NA, NA, NA…
## $ CurrentJobTitleSelect <chr> "DBA/Database Engineer", N…
## $ TitleFit <chr> "Fine", NA, NA, "Poorly", …
## $ CurrentEmployerType <chr> "Employed by a company tha…
## $ MLToolNextYearSelect <chr> "SAS Base", "Python", "Ama…
## $ MLMethodNextYearSelect <chr> "Random Forests", "Random …
## $ LanguageRecommendationSelect <chr> "F#", "Python", "R", "Pyth…
## $ PublicDatasetsSelect <chr> "Dataset aggregator/platfo…
## $ LearningPlatformSelect <chr> "College/University,Confer…
## $ LearningPlatformUsefulnessArxiv <chr> NA, NA, "Very useful", NA,…
## $ LearningPlatformUsefulnessBlogs <chr> NA, NA, NA, "Very useful",…
## $ LearningPlatformUsefulnessCollege <chr> NA, NA, "Somewhat useful",…
## $ LearningPlatformUsefulnessCompany <chr> NA, NA, NA, NA, NA, NA, NA…
## $ LearningPlatformUsefulnessConferences <chr> "Very useful", NA, NA, "Ve…
## $ LearningPlatformUsefulnessFriends <chr> NA, NA, NA, "Very useful",…
## $ LearningPlatformUsefulnessKaggle <chr> NA, "Somewhat useful", "So…
## $ LearningPlatformUsefulnessNewsletters <chr> NA, NA, NA, NA, NA, NA, NA…
## $ LearningPlatformUsefulnessCommunities <chr> NA, NA, NA, NA, NA, NA, NA…
## $ LearningPlatformUsefulnessDocumentation <chr> NA, NA, NA, "Very useful",…
## $ LearningPlatformUsefulnessCourses <chr> NA, NA, "Very useful", "Ve…
## $ LearningPlatformUsefulnessProjects <chr> NA, NA, NA, "Very useful",…
## $ LearningPlatformUsefulnessPodcasts <chr> "Very useful", NA, NA, NA,…
## $ LearningPlatformUsefulnessSO <chr> NA, NA, NA, NA, NA, "Very …
## $ LearningPlatformUsefulnessTextbook <chr> NA, NA, NA, NA, "Somewhat …
## $ LearningPlatformUsefulnessTradeBook <chr> "Somewhat useful", NA, NA,…
## $ LearningPlatformUsefulnessTutoring <chr> NA, NA, NA, NA, NA, NA, NA…
## $ LearningPlatformUsefulnessYouTube <chr> NA, NA, "Very useful", NA,…
## $ BlogsPodcastsNewslettersSelect <chr> "Becoming a Data Scientist…
## $ LearningDataScienceTime <chr> NA, "1-2 years", "1-2 year…
## $ JobSkillImportanceBigData <chr> NA, NA, "Necessary", NA, N…
## $ JobSkillImportanceDegree <chr> NA, "Nice to have", NA, NA…
## $ JobSkillImportanceStats <chr> NA, "Unnecessary", NA, NA,…
## $ JobSkillImportanceEnterpriseTools <chr> NA, NA, NA, NA, NA, NA, NA…
## $ JobSkillImportancePython <chr> NA, "Unnecessary", NA, NA,…
## $ JobSkillImportanceR <chr> NA, NA, "Necessary", NA, N…
## $ JobSkillImportanceSQL <chr> NA, "Necessary", NA, NA, N…
## $ JobSkillImportanceKaggleRanking <chr> NA, NA, NA, NA, NA, NA, NA…
## $ JobSkillImportanceMOOC <chr> NA, NA, NA, NA, NA, NA, NA…
## $ JobSkillImportanceVisualizations <chr> NA, NA, NA, NA, NA, NA, NA…
## $ JobSkillImportanceOtherSelect1 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ JobSkillImportanceOtherSelect2 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ JobSkillImportanceOtherSelect3 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ CoursePlatformSelect <chr> NA, NA, "Coursera,edX", NA…
## $ HardwarePersonalProjectsSelect <chr> NA, NA, "Basic laptop (Mac…
## $ TimeSpentStudying <chr> NA, "2 - 10 hours", "2 - 1…
## $ ProveKnowledgeSelect <chr> NA, "Master's degree", "Gi…
## $ DataScienceIdentitySelect <chr> "Yes", "Yes", "Yes", "Yes"…
## $ FormalEducation <chr> "Bachelor's degree", "Mast…
## $ MajorSelect <chr> "Management information sy…
## $ Tenure <chr> "More than 10 years", "Les…
## $ PastJobTitlesSelect <chr> "Predictive Modeler,Progra…
## $ FirstTrainingSelect <chr> "University courses", "Uni…
## $ LearningCategorySelftTaught <dbl> 0, 10, 20, 30, 60, 45, 40,…
## $ LearningCategoryOnlineCourses <dbl> 0, 30, 50, 0, 5, 25, 0, 40…
## $ LearningCategoryWork <dbl> 100, 0, 0, 40, 5, 20, 0, 0…
## $ LearningCategoryUniversity <dbl> 0, 30, 30, 30, 30, 0, 50, …
## $ LearningCategoryKaggle <dbl> 0, 30, 0, 0, 0, 10, 10, 10…
## $ LearningCategoryOther <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ MLSkillsSelect <chr> "Computer Vision,Natural L…
## $ MLTechniquesSelect <chr> "Evolutionary Approaches,N…
## $ ParentsEducation <chr> "A doctoral degree", "A ba…
## $ EmployerIndustry <chr> "Internet-based", NA, NA, …
## $ EmployerSize <chr> "100 to 499 employees", NA…
## $ EmployerSizeChange <chr> "Increased slightly", NA, …
## $ EmployerMLTime <chr> "3-5 years", NA, NA, NA, "…
## $ EmployerSearchMethod <chr> "I visited the company's W…
## $ UniversityImportance <chr> "Not very important", NA, …
## $ JobFunctionSelect <chr> "Build prototypes to explo…
## $ WorkHardwareSelect <chr> "Gaming Laptop (Laptop + C…
## $ WorkDataTypeSelect <chr> "Text data,Relational data…
## $ WorkProductionFrequency <chr> "Rarely", NA, NA, "Always"…
## $ WorkDatasetSize <chr> "10GB", NA, NA, "1GB", "10…
## $ WorkAlgorithmsSelect <chr> "Neural Networks,Random Fo…
## $ WorkToolsSelect <chr> "Amazon Web services,Oracl…
## $ WorkToolsFrequencyAmazonML <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyAWS <chr> "Rarely", NA, NA, "Often",…
## $ WorkToolsFrequencyAngoss <lgl> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyC <chr> NA, NA, NA, NA, "Most of t…
## $ WorkToolsFrequencyCloudera <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyDataRobot <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyFlume <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyGCP <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyHadoop <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyIBMCognos <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyIBMSPSSModeler <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyIBMSPSSStatistics <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyIBMWatson <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyImpala <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyJava <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyJulia <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyJupyter <chr> NA, NA, NA, NA, "Sometimes…
## $ WorkToolsFrequencyKNIMECommercial <lgl> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyKNIMEFree <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyMathematica <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyMATLAB <chr> NA, NA, NA, "Rarely", "Oft…
## $ WorkToolsFrequencyAzure <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyExcel <chr> NA, NA, NA, "Sometimes", N…
## $ WorkToolsFrequencyMicrosoftRServer <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyMicrosoftSQL <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyMinitab <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyNoSQL <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyOracle <chr> "Sometimes", NA, NA, NA, N…
## $ WorkToolsFrequencyOrange <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyPerl <chr> "Most of the time", NA, NA…
## $ WorkToolsFrequencyPython <chr> NA, NA, NA, "Rarely", "Som…
## $ WorkToolsFrequencyQlik <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyR <chr> NA, NA, NA, "Rarely", "Som…
## $ WorkToolsFrequencyRapidMinerCommercial <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyRapidMinerFree <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySalfrod <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySAPBusinessObjects <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySASBase <chr> NA, NA, NA, "Sometimes", N…
## $ WorkToolsFrequencySASEnterprise <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySASJMP <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencySpark <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySQL <chr> NA, NA, NA, "Often", NA, "…
## $ WorkToolsFrequencyStan <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyStatistica <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyTableau <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyTensorFlow <chr> NA, NA, NA, NA, "Sometimes…
## $ WorkToolsFrequencyTIBCO <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyUnix <chr> NA, NA, NA, NA, NA, NA, "M…
## $ WorkToolsFrequencySelect1 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySelect2 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkFrequencySelect3 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsSelect <chr> "Association Rules,Collabo…
## $ `WorkMethodsFrequencyA/B` <chr> NA, NA, NA, "Sometimes", N…
## $ WorkMethodsFrequencyAssociationRules <chr> "Rarely", NA, NA, NA, "Som…
## $ WorkMethodsFrequencyBayesian <chr> NA, NA, NA, "Sometimes", "…
## $ WorkMethodsFrequencyCNNs <chr> NA, NA, NA, NA, "Most of t…
## $ WorkMethodsFrequencyCollaborativeFiltering <chr> "Often", NA, NA, NA, "Some…
## $ `WorkMethodsFrequencyCross-Validation` <chr> NA, NA, NA, NA, NA, NA, "S…
## $ WorkMethodsFrequencyDataVisualization <chr> NA, NA, NA, "Sometimes", "…
## $ WorkMethodsFrequencyDecisionTrees <chr> NA, NA, NA, "Often", "Some…
## $ WorkMethodsFrequencyEnsembleMethods <chr> NA, NA, NA, "Sometimes", "…
## $ WorkMethodsFrequencyEvolutionaryApproaches <chr> NA, NA, NA, NA, "Sometimes…
## $ WorkMethodsFrequencyGANs <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyGBM <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyHMMs <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyKNN <chr> NA, NA, NA, NA, "Most of t…
## $ WorkMethodsFrequencyLiftAnalysis <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyLogisticRegression <chr> NA, NA, NA, "Sometimes", "…
## $ WorkMethodsFrequencyMLN <chr> NA, NA, NA, "Often", NA, N…
## $ WorkMethodsFrequencyNaiveBayes <chr> NA, NA, NA, "Sometimes", "…
## $ WorkMethodsFrequencyNLP <chr> NA, NA, NA, NA, NA, NA, "M…
## $ WorkMethodsFrequencyNeuralNetworks <chr> "Sometimes", NA, NA, "Some…
## $ WorkMethodsFrequencyPCA <chr> "Often", NA, NA, NA, "Some…
## $ WorkMethodsFrequencyPrescriptiveModeling <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyRandomForests <chr> "Most of the time", NA, NA…
## $ WorkMethodsFrequencyRecommenderSystems <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyRNNs <chr> NA, NA, NA, NA, "Sometimes…
## $ WorkMethodsFrequencySegmentation <chr> NA, NA, NA, NA, "Often", N…
## $ WorkMethodsFrequencySimulation <chr> NA, NA, NA, "Often", NA, N…
## $ WorkMethodsFrequencySVMs <chr> NA, NA, NA, NA, "Most of t…
## $ WorkMethodsFrequencyTextAnalysis <chr> NA, NA, NA, NA, NA, NA, "M…
## $ WorkMethodsFrequencyTimeSeriesAnalysis <chr> NA, NA, NA, "Often", "Some…
## $ WorkMethodsFrequencySelect1 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencySelect2 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencySelect3 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ TimeGatheringData <dbl> 0, NA, NA, 50, 30, 60, 30,…
## $ TimeModelBuilding <dbl> 100, NA, NA, 20, 20, 20, 1…
## $ TimeProduction <dbl> 0, NA, NA, 0, 15, 10, 30, …
## $ TimeVisualizing <dbl> 0, NA, NA, 10, 15, 10, 20,…
## $ TimeFindingInsights <dbl> 0, NA, NA, 20, 20, 0, 10, …
## $ TimeOtherSelect <dbl> 0, NA, NA, 0, 0, 0, 0, NA,…
## $ AlgorithmUnderstandingLevel <chr> "Enough to explain the alg…
## $ WorkChallengesSelect <chr> "Company politics / Lack o…
## $ WorkChallengeFrequencyPolitics <chr> "Rarely", NA, NA, "Often",…
## $ WorkChallengeFrequencyUnusedResults <chr> NA, NA, NA, "Often", "Some…
## $ WorkChallengeFrequencyUnusefulInstrumenting <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyDeployment <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyDirtyData <chr> NA, NA, NA, "Often", NA, "…
## $ WorkChallengeFrequencyExplaining <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyPass <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkChallengeFrequencyIntegration <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyTalent <chr> NA, NA, NA, "Often", "Some…
## $ WorkChallengeFrequencyDataFunds <chr> NA, NA, NA, "Often", "Some…
## $ WorkChallengeFrequencyDomainExpertise <chr> NA, NA, NA, "Most of the t…
## $ WorkChallengeFrequencyML <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyTools <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyExpectations <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyITCoordination <chr> NA, NA, NA, NA, "Sometimes…
## $ WorkChallengeFrequencyHiringFunds <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyPrivacy <chr> "Often", NA, NA, "Often", …
## $ WorkChallengeFrequencyScaling <chr> "Most of the time", NA, NA…
## $ WorkChallengeFrequencyEnvironments <chr> NA, NA, NA, "Often", "Some…
## $ WorkChallengeFrequencyClarity <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyDataAccess <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyOtherSelect <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkDataVisualizations <chr> "26-50% of projects", NA, …
## $ WorkInternalVsExternalTools <chr> "Do not know", NA, NA, "En…
## $ WorkMLTeamSeatSelect <chr> "Standalone Team", NA, NA,…
## $ WorkDatasets <chr> NA, NA, NA, "Electricity d…
## $ WorkDatasetsChallenge <chr> NA, NA, NA, "Everything is…
## $ WorkDataStorage <chr> "Document-oriented (e.g. M…
## $ WorkDataSharing <chr> "Company Developed Platfor…
## $ WorkDataSourcing <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkCodeSharing <chr> "Mercurial,Subversion,Othe…
## $ RemoteWork <chr> "Always", NA, NA, NA, "Rar…
## $ CompensationAmount <dbl> NA, NA, NA, 250000, NA, NA…
## $ CompensationCurrency <chr> NA, NA, NA, "USD", NA, NA,…
## $ SalaryChange <chr> "I am not currently employ…
## $ JobSatisfaction <chr> "5", NA, NA, "10 - Highly …
## $ JobSearchResource <chr> NA, NA, "Asking friends, f…
## $ JobHuntTime <chr> NA, NA, "1-2", NA, NA, NA,…
## $ JobFactorLearning <chr> NA, NA, "Very Important", …
## $ JobFactorSalary <chr> NA, NA, "Very Important", …
## $ JobFactorOffice <chr> NA, NA, "Very Important", …
## $ JobFactorLanguages <chr> NA, NA, "Very Important", …
## $ JobFactorCommute <chr> NA, NA, "Very Important", …
## $ JobFactorManagement <chr> NA, NA, "Very Important", …
## $ JobFactorExperienceLevel <chr> NA, NA, "Very Important", …
## $ JobFactorDepartment <chr> NA, NA, "Very Important", …
## $ JobFactorTitle <chr> NA, NA, "Very Important", …
## $ JobFactorCompanyFunding <chr> NA, NA, "Very Important", …
## $ JobFactorImpact <chr> NA, NA, "Very Important", …
## $ JobFactorRemote <chr> NA, NA, "Very Important", …
## $ JobFactorIndustry <chr> NA, NA, "Very Important", …
## $ JobFactorLeaderReputation <chr> NA, "Somewhat important", …
## $ JobFactorDiversity <chr> NA, NA, "Very Important", …
## $ JobFactorPublishingOpportunity <chr> NA, NA, "Very Important", …
multipleChoiceResponses_1.1 <- multipleChoiceResponses %>%
select(starts_with("Learning"),
starts_with("Work"),
starts_with("Age"),
starts_with("EmployerIndustry"),
starts_with("CurrentJob"),
starts_with("MLMethod"),
starts_with("Formal")) %>%
glimpse()
## Rows: 16,716
## Columns: 155
## $ LearningDataScience <chr> NA, NA, NA, NA, NA, NA, NA…
## $ LearningPlatformSelect <chr> "College/University,Confer…
## $ LearningPlatformUsefulnessArxiv <chr> NA, NA, "Very useful", NA,…
## $ LearningPlatformUsefulnessBlogs <chr> NA, NA, NA, "Very useful",…
## $ LearningPlatformUsefulnessCollege <chr> NA, NA, "Somewhat useful",…
## $ LearningPlatformUsefulnessCompany <chr> NA, NA, NA, NA, NA, NA, NA…
## $ LearningPlatformUsefulnessConferences <chr> "Very useful", NA, NA, "Ve…
## $ LearningPlatformUsefulnessFriends <chr> NA, NA, NA, "Very useful",…
## $ LearningPlatformUsefulnessKaggle <chr> NA, "Somewhat useful", "So…
## $ LearningPlatformUsefulnessNewsletters <chr> NA, NA, NA, NA, NA, NA, NA…
## $ LearningPlatformUsefulnessCommunities <chr> NA, NA, NA, NA, NA, NA, NA…
## $ LearningPlatformUsefulnessDocumentation <chr> NA, NA, NA, "Very useful",…
## $ LearningPlatformUsefulnessCourses <chr> NA, NA, "Very useful", "Ve…
## $ LearningPlatformUsefulnessProjects <chr> NA, NA, NA, "Very useful",…
## $ LearningPlatformUsefulnessPodcasts <chr> "Very useful", NA, NA, NA,…
## $ LearningPlatformUsefulnessSO <chr> NA, NA, NA, NA, NA, "Very …
## $ LearningPlatformUsefulnessTextbook <chr> NA, NA, NA, NA, "Somewhat …
## $ LearningPlatformUsefulnessTradeBook <chr> "Somewhat useful", NA, NA,…
## $ LearningPlatformUsefulnessTutoring <chr> NA, NA, NA, NA, NA, NA, NA…
## $ LearningPlatformUsefulnessYouTube <chr> NA, NA, "Very useful", NA,…
## $ LearningDataScienceTime <chr> NA, "1-2 years", "1-2 year…
## $ LearningCategorySelftTaught <dbl> 0, 10, 20, 30, 60, 45, 40,…
## $ LearningCategoryOnlineCourses <dbl> 0, 30, 50, 0, 5, 25, 0, 40…
## $ LearningCategoryWork <dbl> 100, 0, 0, 40, 5, 20, 0, 0…
## $ LearningCategoryUniversity <dbl> 0, 30, 30, 30, 30, 0, 50, …
## $ LearningCategoryKaggle <dbl> 0, 30, 0, 0, 0, 10, 10, 10…
## $ LearningCategoryOther <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ WorkHardwareSelect <chr> "Gaming Laptop (Laptop + C…
## $ WorkDataTypeSelect <chr> "Text data,Relational data…
## $ WorkProductionFrequency <chr> "Rarely", NA, NA, "Always"…
## $ WorkDatasetSize <chr> "10GB", NA, NA, "1GB", "10…
## $ WorkAlgorithmsSelect <chr> "Neural Networks,Random Fo…
## $ WorkToolsSelect <chr> "Amazon Web services,Oracl…
## $ WorkToolsFrequencyAmazonML <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyAWS <chr> "Rarely", NA, NA, "Often",…
## $ WorkToolsFrequencyAngoss <lgl> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyC <chr> NA, NA, NA, NA, "Most of t…
## $ WorkToolsFrequencyCloudera <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyDataRobot <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyFlume <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyGCP <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyHadoop <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyIBMCognos <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyIBMSPSSModeler <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyIBMSPSSStatistics <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyIBMWatson <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyImpala <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyJava <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyJulia <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyJupyter <chr> NA, NA, NA, NA, "Sometimes…
## $ WorkToolsFrequencyKNIMECommercial <lgl> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyKNIMEFree <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyMathematica <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyMATLAB <chr> NA, NA, NA, "Rarely", "Oft…
## $ WorkToolsFrequencyAzure <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyExcel <chr> NA, NA, NA, "Sometimes", N…
## $ WorkToolsFrequencyMicrosoftRServer <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyMicrosoftSQL <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyMinitab <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyNoSQL <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyOracle <chr> "Sometimes", NA, NA, NA, N…
## $ WorkToolsFrequencyOrange <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyPerl <chr> "Most of the time", NA, NA…
## $ WorkToolsFrequencyPython <chr> NA, NA, NA, "Rarely", "Som…
## $ WorkToolsFrequencyQlik <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyR <chr> NA, NA, NA, "Rarely", "Som…
## $ WorkToolsFrequencyRapidMinerCommercial <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyRapidMinerFree <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySalfrod <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySAPBusinessObjects <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySASBase <chr> NA, NA, NA, "Sometimes", N…
## $ WorkToolsFrequencySASEnterprise <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySASJMP <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencySpark <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySQL <chr> NA, NA, NA, "Often", NA, "…
## $ WorkToolsFrequencyStan <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyStatistica <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyTableau <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyTensorFlow <chr> NA, NA, NA, NA, "Sometimes…
## $ WorkToolsFrequencyTIBCO <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyUnix <chr> NA, NA, NA, NA, NA, NA, "M…
## $ WorkToolsFrequencySelect1 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySelect2 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkFrequencySelect3 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsSelect <chr> "Association Rules,Collabo…
## $ `WorkMethodsFrequencyA/B` <chr> NA, NA, NA, "Sometimes", N…
## $ WorkMethodsFrequencyAssociationRules <chr> "Rarely", NA, NA, NA, "Som…
## $ WorkMethodsFrequencyBayesian <chr> NA, NA, NA, "Sometimes", "…
## $ WorkMethodsFrequencyCNNs <chr> NA, NA, NA, NA, "Most of t…
## $ WorkMethodsFrequencyCollaborativeFiltering <chr> "Often", NA, NA, NA, "Some…
## $ `WorkMethodsFrequencyCross-Validation` <chr> NA, NA, NA, NA, NA, NA, "S…
## $ WorkMethodsFrequencyDataVisualization <chr> NA, NA, NA, "Sometimes", "…
## $ WorkMethodsFrequencyDecisionTrees <chr> NA, NA, NA, "Often", "Some…
## $ WorkMethodsFrequencyEnsembleMethods <chr> NA, NA, NA, "Sometimes", "…
## $ WorkMethodsFrequencyEvolutionaryApproaches <chr> NA, NA, NA, NA, "Sometimes…
## $ WorkMethodsFrequencyGANs <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyGBM <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyHMMs <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyKNN <chr> NA, NA, NA, NA, "Most of t…
## $ WorkMethodsFrequencyLiftAnalysis <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyLogisticRegression <chr> NA, NA, NA, "Sometimes", "…
## $ WorkMethodsFrequencyMLN <chr> NA, NA, NA, "Often", NA, N…
## $ WorkMethodsFrequencyNaiveBayes <chr> NA, NA, NA, "Sometimes", "…
## $ WorkMethodsFrequencyNLP <chr> NA, NA, NA, NA, NA, NA, "M…
## $ WorkMethodsFrequencyNeuralNetworks <chr> "Sometimes", NA, NA, "Some…
## $ WorkMethodsFrequencyPCA <chr> "Often", NA, NA, NA, "Some…
## $ WorkMethodsFrequencyPrescriptiveModeling <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyRandomForests <chr> "Most of the time", NA, NA…
## $ WorkMethodsFrequencyRecommenderSystems <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyRNNs <chr> NA, NA, NA, NA, "Sometimes…
## $ WorkMethodsFrequencySegmentation <chr> NA, NA, NA, NA, "Often", N…
## $ WorkMethodsFrequencySimulation <chr> NA, NA, NA, "Often", NA, N…
## $ WorkMethodsFrequencySVMs <chr> NA, NA, NA, NA, "Most of t…
## $ WorkMethodsFrequencyTextAnalysis <chr> NA, NA, NA, NA, NA, NA, "M…
## $ WorkMethodsFrequencyTimeSeriesAnalysis <chr> NA, NA, NA, "Often", "Some…
## $ WorkMethodsFrequencySelect1 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencySelect2 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencySelect3 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkChallengesSelect <chr> "Company politics / Lack o…
## $ WorkChallengeFrequencyPolitics <chr> "Rarely", NA, NA, "Often",…
## $ WorkChallengeFrequencyUnusedResults <chr> NA, NA, NA, "Often", "Some…
## $ WorkChallengeFrequencyUnusefulInstrumenting <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyDeployment <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyDirtyData <chr> NA, NA, NA, "Often", NA, "…
## $ WorkChallengeFrequencyExplaining <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyPass <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkChallengeFrequencyIntegration <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyTalent <chr> NA, NA, NA, "Often", "Some…
## $ WorkChallengeFrequencyDataFunds <chr> NA, NA, NA, "Often", "Some…
## $ WorkChallengeFrequencyDomainExpertise <chr> NA, NA, NA, "Most of the t…
## $ WorkChallengeFrequencyML <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyTools <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyExpectations <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyITCoordination <chr> NA, NA, NA, NA, "Sometimes…
## $ WorkChallengeFrequencyHiringFunds <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyPrivacy <chr> "Often", NA, NA, "Often", …
## $ WorkChallengeFrequencyScaling <chr> "Most of the time", NA, NA…
## $ WorkChallengeFrequencyEnvironments <chr> NA, NA, NA, "Often", "Some…
## $ WorkChallengeFrequencyClarity <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyDataAccess <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyOtherSelect <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkDataVisualizations <chr> "26-50% of projects", NA, …
## $ WorkInternalVsExternalTools <chr> "Do not know", NA, NA, "En…
## $ WorkMLTeamSeatSelect <chr> "Standalone Team", NA, NA,…
## $ WorkDatasets <chr> NA, NA, NA, "Electricity d…
## $ WorkDatasetsChallenge <chr> NA, NA, NA, "Everything is…
## $ WorkDataStorage <chr> "Document-oriented (e.g. M…
## $ WorkDataSharing <chr> "Company Developed Platfor…
## $ WorkDataSourcing <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkCodeSharing <chr> "Mercurial,Subversion,Othe…
## $ Age <dbl> NA, 30, 28, 56, 38, 46, 35…
## $ EmployerIndustry <chr> "Internet-based", NA, NA, …
## $ CurrentJobTitleSelect <chr> "DBA/Database Engineer", N…
## $ MLMethodNextYearSelect <chr> "Random Forests", "Random …
## $ FormalEducation <chr> "Bachelor's degree", "Mast…
multipleChoiceResponses_fct <- multipleChoiceResponses_1.1 %>%
mutate(across(where(is.character),as.factor))
number_of_levels <- multipleChoiceResponses_fct %>%
summarise(across(everything(),nlevels)) %>%
gather(key = "variable", value = "num_levels") %>%
glimpse()
## Rows: 155
## Columns: 2
## $ variable <chr> "LearningDataScience", "LearningPlatformSelect", "LearningP…
## $ num_levels <int> 3, 5362, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
number_of_levels %>% arrange(desc(num_levels)) %>% slice_head(n = 5)
## # A tibble: 5 × 2
## variable num_levels
## <chr> <int>
## 1 WorkMethodsSelect 6190
## 2 LearningPlatformSelect 5362
## 3 WorkToolsSelect 5248
## 4 WorkChallengesSelect 4287
## 5 WorkDatasetsChallenge 2220
nlevels(multipleChoiceResponses_fct$CurrentJobTitleSelect)
## [1] 16
levels(multipleChoiceResponses_fct$CurrentJobTitleSelect)
## [1] "Business Analyst"
## [2] "Computer Scientist"
## [3] "Data Analyst"
## [4] "Data Miner"
## [5] "Data Scientist"
## [6] "DBA/Database Engineer"
## [7] "Engineer"
## [8] "Machine Learning Engineer"
## [9] "Operations Research Practitioner"
## [10] "Other"
## [11] "Predictive Modeler"
## [12] "Programmer"
## [13] "Researcher"
## [14] "Scientist/Researcher"
## [15] "Software Developer/Software Engineer"
## [16] "Statistician"
EmployerIndustry_2.1 <- multipleChoiceResponses_fct %>%
select(EmployerIndustry) %>%
group_by(EmployerIndustry) %>%
summarise(count = n())
EmployerIndustry_2.1 %>%
ggplot(aes(x=EmployerIndustry, y = count))+
geom_col()+
coord_flip()

EmployerIndustry_2.2 <- multipleChoiceResponses_fct %>%
filter(!is.na(Age),!is.na(EmployerIndustry)) %>%
select(EmployerIndustry) %>%
group_by(EmployerIndustry) %>%
summarise(count = n())
EmployerIndustry_2.2 %>%
ggplot(aes(x=EmployerIndustry, y = count))+
geom_col()+
coord_flip()

EmployerIndustry_2.3 <- EmployerIndustry_2.2 %>%
mutate(EmployerIndustry = EmployerIndustry %>%
fct_reorder(count)) %>%
mutate(EmployerIndustry = EmployerIndustry %>%
fct_relevel("Other",after = 0))
EmployerIndustry_2.3 %>%
ggplot(aes(x=EmployerIndustry, y = count))+
geom_segment(aes(xend = EmployerIndustry, yend = 0))+
geom_point()+
coord_flip()+
labs(y = "n")

levels(multipleChoiceResponses_fct$WorkInternalVsExternalTools)
## [1] "Approximately half internal and half external"
## [2] "Do not know"
## [3] "Entirely external"
## [4] "Entirely internal"
## [5] "More external than internal"
## [6] "More internal than external"
WorkInternalVsExternal_reorder <- multipleChoiceResponses_fct %>%
mutate(WorkInternalVsExternalTools = WorkInternalVsExternalTools %>%
fct_relevel(c("Entirely internal",
"More internal than external",
"Approximately half internal and half external",
"More external than internal",
"Entirely external",
"Do not know")))
levels(WorkInternalVsExternal_reorder$WorkInternalVsExternalTools)
## [1] "Entirely internal"
## [2] "More internal than external"
## [3] "Approximately half internal and half external"
## [4] "More external than internal"
## [5] "Entirely external"
## [6] "Do not know"
WorkInternalVsExternal_reorder %>% select(WorkInternalVsExternalTools) %>% group_by(WorkInternalVsExternalTools) %>% summarise(count = n()) %>%
ggplot(aes(x=WorkInternalVsExternalTools, y = count))+
geom_col()+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
