library(‘tidyr’) library(‘readr’) library(‘dplyr’) ## ## Attaching
package: ’dplyr’ ## The following objects are masked from
’package:stats’: ## ## filter, lag ## The following objects are masked
from ’package:base’: ## ## intersect, setdiff, setequal, union
library(‘ggplot2’) library(‘forcats’) library(‘tidyquant’) ## Loading
required package: lubridate ## ## Attaching package: ’lubridate’ 1 ##
The following objects are masked from ’package:base’: ## ## date,
intersect, setdiff, union ## Loading required package:
PerformanceAnalytics ## Loading required package: xts ## Loading
required package: zoo ## ## Attaching package: ’zoo’ ## The following
objects are masked from ’package:base’:
Select column names starting with Leaning, Working, Age,
EmployerIndustry, CurrentJob, MLMethod and Formal. Show the results
using glimpse(). ## Rows: 16716 Columns: 228 ## – Column specification
——————————————————– ## Delimiter: “,” ## chr (212): GenderSelect,
Country, EmploymentStatus, StudentStatus, LearningD… ## dbl (13): Age,
LearningCategorySelftTaught, LearningCategoryOnlineCourses, … ## lgl
(2): WorkToolsFrequencyAngoss, WorkToolsFrequencyKNIMECommercial ## ## i
Use ’spec()‘ to retrieve the full column specification for this data. ##
i Specify the column types or set ’show_col_types = FALSE‘ to quiet this
message. ## Rows: 16,716 ## Columns: 228 ## $ GenderSelect
“Non-binary, genderqueer, ~ ## $ Country NA,”United States”,
“Cana~ ## $ Age NA, 30, 28, 56, 38, 46, 35~ ## $ EmploymentStatus
”Employed full-time”, “Not~ ## $ StudentStatus NA, NA, NA,
NA, NA, NA, NA~ ## $ LearningDataScience NA, NA, NA, NA, NA, NA,
NA~ ## $ CodeWriter ”Yes”, NA, NA, “Yes”, “Yes~ ## $ CareerSwitcher
NA, NA, NA, NA, NA, NA, NA~ ## $ CurrentJobTitleSelect
”DBA/Database Engineer”, N~ ## $ TitleFit “Fine”, NA, NA,
“Poorly”, ~ ## $ CurrentEmployerType “Employed by a company tha~
## $ MLToolNextYearSelect ”SAS Base”, “Python”, “Ama~ ## $
MLMethodNextYearSelect ”Random Forests”, “Random ~ ## $
LanguageRecommendationSelect ”F#“,”Python”, “R”, “Pyth~ ## $
PublicDatasetsSelect ”Dataset aggregator/platfo~ ## $
LearningPlatformSelect “College/University,Confer~ ## $
LearningPlatformUsefulnessArxiv NA, NA,”Very useful”, NA,~ ## $
LearningPlatformUsefulnessBlogs NA, NA, NA, “Very useful”,~ ## $
LearningPlatformUsefulnessCollege NA, NA, “Somewhat useful”,~ ## $
LearningPlatformUsefulnessCompany NA, NA, NA, NA, NA, NA, NA~ ## $
LearningPlatformUsefulnessConferences “Very useful”, NA, NA, “Ve~
## $ LearningPlatformUsefulnessFriends NA, NA, NA,”Very useful”,~
## $ LearningPlatformUsefulnessKaggle NA, “Somewhat useful”, “So~
## $ LearningPlatformUsefulnessNewsletters NA, NA, NA, NA, NA, NA,
NA~ ## $ LearningPlatformUsefulnessCommunities NA, NA, NA, NA, NA,
NA, NA~ ## $ LearningPlatformUsefulnessDocumentation NA, NA,
NA,”Very useful”,~ ## $ LearningPlatformUsefulnessCourses NA, NA,
“Very useful”, “Ve~ ## $ LearningPlatformUsefulnessProjects NA,
NA, NA,”Very useful”,~ ## $ LearningPlatformUsefulnessPodcasts
“Very useful”, NA, NA, NA,~ ## $ LearningPlatformUsefulnessSO NA,
NA, NA, NA, NA, “Very ~ ## $ LearningPlatformUsefulnessTextbook
NA, NA, NA, NA,”Somewhat ~ ## $ LearningPlatformUsefulnessTradeBook
“Somewhat useful”, NA, NA,~ 3 ## $
LearningPlatformUsefulnessTutoring NA, NA, NA, NA, NA, NA, NA~ ##
$ LearningPlatformUsefulnessYouTube NA, NA, “Very useful”, NA,~ ##
$ BlogsPodcastsNewslettersSelect “Becoming a Data Scientist~ ## $
LearningDataScienceTime NA,”1-2 years”, “1-2 year~ ## $
JobSkillImportanceBigData NA, NA,”Necessary”, NA, N~ ## $
JobSkillImportanceDegree NA, “Nice to have”, NA, NA~ ## $
JobSkillImportanceStats NA, “Unnecessary”, NA, NA,~ ## $
JobSkillImportanceEnterpriseTools NA, NA, NA, NA, NA, NA, NA~ ## $
JobSkillImportancePython NA, “Unnecessary”, NA, NA,~ ## $
JobSkillImportanceR NA, NA, “Necessary”, NA, N~ ## $
JobSkillImportanceSQL NA, “Necessary”, NA, NA, N~ ## $
JobSkillImportanceKaggleRanking NA, NA, NA, NA, NA, NA, NA~ ## $
JobSkillImportanceMOOC NA, NA, NA, NA, NA, NA, NA~ ## $
JobSkillImportanceVisualizations NA, NA, NA, NA, NA, NA, NA~ ## $
JobSkillImportanceOtherSelect1 NA, NA, NA, NA, NA, NA, NA~ ## $
JobSkillImportanceOtherSelect2 NA, NA, NA, NA, NA, NA, NA~ ## $
JobSkillImportanceOtherSelect3 NA, NA, NA, NA, NA, NA, NA~ ## $
CoursePlatformSelect NA, NA, “Coursera,edX”, NA~ ## $
HardwarePersonalProjectsSelect NA, NA, “Basic laptop (Mac~ ## $
TimeSpentStudying NA,”2 - 10 hours”, “2 - 1~ ## $
ProveKnowledgeSelect NA,”Master’s degree”, “Gi~ ## $
DataScienceIdentitySelect ”Yes”, “Yes”, “Yes”, “Yes”~ ## $
FormalEducation “Bachelor’s degree”, “Mast~ ## $ MajorSelect
”Management information sy~ ## $ Tenure “More than 10 years”,
“Les~ ## $ PastJobTitlesSelect ”Predictive Modeler,Progra~ ## $
FirstTrainingSelect “University courses”, “Uni~ ## $
LearningCategorySelftTaught 0, 10, 20, 30, 60, 45, 40,~ ## $
LearningCategoryOnlineCourses 0, 30, 50, 0, 5, 25, 0, 40~ ## $
LearningCategoryWork 100, 0, 0, 40, 5, 20, 0, 0~ ## $
LearningCategoryUniversity 0, 30, 30, 30, 30, 0, 50, ~ ## $
LearningCategoryKaggle 0, 30, 0, 0, 0, 10, 10, 10~ ## $
LearningCategoryOther 0, 0, 0, 0, 0, 0, 0, 0, 0,~ ## $
MLSkillsSelect ”Computer Vision,Natural L~ ## $ MLTechniquesSelect
“Evolutionary Approaches,N~ ## $ ParentsEducation ”A doctoral
degree”, “A ba~ ## $ EmployerIndustry ”Internet-based”, NA, NA, ~
## $ EmployerSize “100 to 499 employees”, NA~ ## $
EmployerSizeChange “Increased slightly”, NA, ~ ## $ EmployerMLTime
“3-5 years”, NA, NA, NA, “~ ## $ EmployerSearchMethod ”I
visited the company’s W~ ## $ UniversityImportance “Not very
important”, NA, ~ ## $ JobFunctionSelect “Build prototypes to
explo~ ## $ WorkHardwareSelect ”Gaming Laptop (Laptop + C~ ## $
WorkDataTypeSelect “Text data,Relational data~ ## $
WorkProductionFrequency ”Rarely”, NA, NA, “Always”~ ## $
WorkDatasetSize “10GB”, NA, NA, “1GB”, “10~ ## $
WorkAlgorithmsSelect ”Neural Networks,Random Fo~ ## $
WorkToolsSelect “Amazon Web services,Oracl~ ## $
WorkToolsFrequencyAmazonML NA, NA, NA,”Rarely”, NA, ~ ## $
WorkToolsFrequencyAWS “Rarely”, NA, NA, “Often”,~ ## $
WorkToolsFrequencyAngoss NA, NA, NA, NA, NA, NA, NA~ ## $
WorkToolsFrequencyC NA, NA, NA, NA, “Most of t~ ## $
WorkToolsFrequencyCloudera NA, NA, NA,”Rarely”, NA, ~ 4 ## $
WorkToolsFrequencyDataRobot NA, NA, NA, NA, NA, NA, NA~ ## $
WorkToolsFrequencyFlume NA, NA, NA, NA, NA, NA, NA~ ## $
WorkToolsFrequencyGCP NA, NA, NA, NA, NA, NA, NA~ ## $
WorkToolsFrequencyHadoop NA, NA, NA, “Rarely”, NA, ~ ## $
WorkToolsFrequencyIBMCognos NA, NA, NA, NA, NA, NA, NA~ ## $
WorkToolsFrequencyIBMSPSSModeler NA, NA, NA, NA, NA, NA, NA~ ## $
WorkToolsFrequencyIBMSPSSStatistics NA, NA, NA, NA, NA, NA, NA~ ##
$ WorkToolsFrequencyIBMWatson NA, NA, NA, NA, NA, NA, NA~ ## $
WorkToolsFrequencyImpala NA, NA, NA, “Rarely”, NA, ~ ## $
WorkToolsFrequencyJava NA, NA, NA, “Rarely”, NA, ~ ## $
WorkToolsFrequencyJulia NA, NA, NA, NA, NA, NA, NA~ ## $
WorkToolsFrequencyJupyter NA, NA, NA, NA, “Sometimes~ ## $
WorkToolsFrequencyKNIMECommercial NA, NA, NA, NA, NA, NA, NA~ ## $
WorkToolsFrequencyKNIMEFree NA, NA, NA, NA, NA, NA, NA~ ## $
WorkToolsFrequencyMathematica NA, NA, NA,”Rarely”, NA, ~ ## $
WorkToolsFrequencyMATLAB NA, NA, NA, “Rarely”, “Oft~ ## $
WorkToolsFrequencyAzure NA, NA, NA, NA, NA, NA, NA~ ## $
WorkToolsFrequencyExcel NA, NA, NA,”Sometimes”, N~ ## $
WorkToolsFrequencyMicrosoftRServer NA, NA, NA, NA, NA, NA, NA~ ##
$ WorkToolsFrequencyMicrosoftSQL NA, NA, NA, “Rarely”, NA, ~ ## $
WorkToolsFrequencyMinitab NA, NA, NA, NA, NA, NA, NA~ ## $
WorkToolsFrequencyNoSQL NA, NA, NA, “Rarely”, NA, ~ ## $
WorkToolsFrequencyOracle “Sometimes”, NA, NA, NA, N~ ## $
WorkToolsFrequencyOrange NA, NA, NA, NA, NA, NA, NA~ ## $
WorkToolsFrequencyPerl “Most of the time”, NA, NA~ ## $
WorkToolsFrequencyPython NA, NA, NA, “Rarely”, “Som~ ## $
WorkToolsFrequencyQlik NA, NA, NA, NA, NA, NA, NA~ ## $
WorkToolsFrequencyR NA, NA, NA,”Rarely”, “Som~ ## $
WorkToolsFrequencyRapidMinerCommercial NA, NA, NA, NA, NA, NA, NA~
## $ WorkToolsFrequencyRapidMinerFree NA, NA, NA, NA, NA, NA, NA~
## $ WorkToolsFrequencySalfrod NA, NA, NA, NA, NA, NA, NA~ ## $
WorkToolsFrequencySAPBusinessObjects NA, NA, NA, NA, NA, NA, NA~
## $ WorkToolsFrequencySASBase NA, NA, NA,”Sometimes”, N~ ## $
WorkToolsFrequencySASEnterprise NA, NA, NA, NA, NA, NA, NA~ ## $
WorkToolsFrequencySASJMP NA, NA, NA, “Rarely”, NA, ~ ## $
WorkToolsFrequencySpark NA, NA, NA, NA, NA, NA, NA~ ## $
WorkToolsFrequencySQL NA, NA, NA, “Often”, NA, “~ ## $
WorkToolsFrequencyStan NA, NA, NA, NA, NA, NA, NA~ ## $
WorkToolsFrequencyStatistica NA, NA, NA, NA, NA, NA, NA~ ## $
WorkToolsFrequencyTableau NA, NA, NA,”Rarely”, NA, ~ ## $
WorkToolsFrequencyTensorFlow NA, NA, NA, NA, “Sometimes~ ## $
WorkToolsFrequencyTIBCO NA, NA, NA, NA, NA, NA, NA~ ## $
WorkToolsFrequencyUnix NA, NA, NA, NA, NA, NA,”M~ ## $
WorkToolsFrequencySelect1 NA, NA, NA, NA, NA, NA, NA~ ## $
WorkToolsFrequencySelect2 NA, NA, NA, NA, NA, NA, NA~ ## $
WorkFrequencySelect3 NA, NA, NA, NA, NA, NA, NA~ ## $
WorkMethodsSelect “Association Rules,Collabo~ ## $
’WorkMethodsFrequencyA/B‘ NA, NA, NA,”Sometimes”, N~ ## $
WorkMethodsFrequencyAssociationRules “Rarely”, NA, NA, NA, “Som~
## $ WorkMethodsFrequencyBayesian NA, NA, NA,”Sometimes”, “~ ## $
WorkMethodsFrequencyCNNs NA, NA, NA, NA,”Most of t~ ## $
WorkMethodsFrequencyCollaborativeFiltering “Often”, NA, NA, NA,
“Some~ ## $ ’WorkMethodsFrequencyCross-Validation‘ NA, NA, NA, NA,
NA, NA,”S~ ## $ WorkMethodsFrequencyDataVisualization NA, NA, NA,
“Sometimes”, “~ 5 ## $ WorkMethodsFrequencyDecisionTrees NA, NA,
NA,”Often”, “Some~ ## $ WorkMethodsFrequencyEnsembleMethods NA,
NA, NA,”Sometimes”, “~ ## $ WorkMethodsFrequencyEvolutionaryApproaches
NA, NA, NA, NA,”Sometimes~ ## $ WorkMethodsFrequencyGANs NA,
NA, NA, NA, NA, NA, NA~ ## $ WorkMethodsFrequencyGBM NA, NA, NA,
NA, NA, NA, NA~ ## $ WorkMethodsFrequencyHMMs NA, NA, NA, NA, NA,
NA, NA~ ## $ WorkMethodsFrequencyKNN NA, NA, NA, NA, “Most of t~
## $ WorkMethodsFrequencyLiftAnalysis NA, NA, NA, NA, NA, NA, NA~
## $ WorkMethodsFrequencyLogisticRegression NA, NA,
NA,”Sometimes”, “~ ## $ WorkMethodsFrequencyMLN NA, NA,
NA,”Often”, NA, N~ ## $ WorkMethodsFrequencyNaiveBayes NA, NA, NA,
“Sometimes”, “~ ## $ WorkMethodsFrequencyNLP NA, NA, NA, NA, NA,
NA,”M~ ## $ WorkMethodsFrequencyNeuralNetworks “Sometimes”, NA,
NA, “Some~ ## $ WorkMethodsFrequencyPCA ”Often”, NA, NA, NA, “Some~
## $ WorkMethodsFrequencyPrescriptiveModeling NA, NA, NA, NA, NA,
NA, NA~ ## $ WorkMethodsFrequencyRandomForests ”Most of the time”,
NA, NA~ ## $ WorkMethodsFrequencyRecommenderSystems NA, NA, NA,
NA, NA, NA, NA~ ## $ WorkMethodsFrequencyRNNs NA, NA, NA, NA,
“Sometimes~ ## $ WorkMethodsFrequencySegmentation NA, NA, NA,
NA,”Often”, N~ ## $ WorkMethodsFrequencySimulation NA, NA, NA,
“Often”, NA, N~ ## $ WorkMethodsFrequencySVMs NA, NA, NA, NA,
“Most of t~ ## $ WorkMethodsFrequencyTextAnalysis NA, NA, NA, NA,
NA, NA,”M~ ## $ WorkMethodsFrequencyTimeSeriesAnalysis NA, NA, NA,
“Often”, “Some~ ## $ WorkMethodsFrequencySelect1 NA, NA, NA, NA,
NA, NA, NA~ ## $ WorkMethodsFrequencySelect2 NA, NA, NA, NA, NA,
NA, NA~ ## $ WorkMethodsFrequencySelect3 NA, NA, NA, NA, NA, NA,
NA~ ## $ TimeGatheringData 0, NA, NA, 50, 30, 60, 30,~ ## $
TimeModelBuilding 100, NA, NA, 20, 20, 20, 1~ ## $ TimeProduction
0, NA, NA, 0, 15, 10, 30, ~ ## $ TimeVisualizing 0, NA, NA,
10, 15, 10, 20,~ ## $ TimeFindingInsights 0, NA, NA, 20, 20, 0,
10, ~ ## $ TimeOtherSelect 0, NA, NA, 0, 0, 0, 0, NA,~ ## $
AlgorithmUnderstandingLevel ”Enough to explain the alg~ ## $
WorkChallengesSelect “Company politics / Lack o~ ## $
WorkChallengeFrequencyPolitics ”Rarely”, NA, NA, “Often”,~ ## $
WorkChallengeFrequencyUnusedResults NA, NA, NA, “Often”, “Some~ ##
$ WorkChallengeFrequencyUnusefulInstrumenting NA, NA, NA,”Often”,
NA, N~ ## $ WorkChallengeFrequencyDeployment NA, NA, NA, “Often”,
NA, N~ ## $ WorkChallengeFrequencyDirtyData NA, NA, NA, “Often”,
NA, “~ ## $ WorkChallengeFrequencyExplaining NA, NA, NA,”Often”,
NA, N~ ## $ WorkChallengeFrequencyPass NA, NA, NA, NA, NA, NA, NA~
## $ WorkChallengeFrequencyIntegration NA, NA, NA, “Often”, NA, N~
## $ WorkChallengeFrequencyTalent NA, NA, NA, “Often”, “Some~ ## $
WorkChallengeFrequencyDataFunds NA, NA, NA,”Often”, “Some~ ## $
WorkChallengeFrequencyDomainExpertise NA, NA, NA,”Most of the t~
## $ WorkChallengeFrequencyML NA, NA, NA, “Often”, NA, N~ ## $
WorkChallengeFrequencyTools NA, NA, NA, “Often”, NA, N~ ## $
WorkChallengeFrequencyExpectations NA, NA, NA, “Often”, NA, N~ ##
$ WorkChallengeFrequencyITCoordination NA, NA, NA, NA, “Sometimes~
## $ WorkChallengeFrequencyHiringFunds NA, NA, NA,”Often”, NA, N~
## $ WorkChallengeFrequencyPrivacy “Often”, NA, NA, “Often”, ~ ##
$ WorkChallengeFrequencyScaling “Most of the time”, NA, NA~ ## $
WorkChallengeFrequencyEnvironments NA, NA, NA, “Often”, “Some~ ##
$ WorkChallengeFrequencyClarity NA, NA, NA,”Often”, NA, N~ 6 ## $
WorkChallengeFrequencyDataAccess NA, NA, NA, “Often”, NA, N~ ## $
WorkChallengeFrequencyOtherSelect NA, NA, NA, NA, NA, NA, NA~ ## $
WorkDataVisualizations “26-50% of projects”, NA, ~ ## $
WorkInternalVsExternalTools “Do not know”, NA, NA, “En~ ## $
WorkMLTeamSeatSelect ”Standalone Team”, NA, NA,~ ## $ WorkDatasets
NA, NA, NA, “Electricity d~ ## $ WorkDatasetsChallenge NA,
NA, NA,”Everything is~ ## $ WorkDataStorage “Document-oriented
(e.g. M~ ## $ WorkDataSharing ”Company Developed Platfor~ ## $
WorkDataSourcing NA, NA, NA, NA, NA, NA, NA~ ## $ WorkCodeSharing
“Mercurial,Subversion,Othe~ ## $ RemoteWork ”Always”, NA, NA,
NA, “Rar~ ## $ CompensationAmount NA, NA, NA, 250000, NA, NA~ ## $
CompensationCurrency NA, NA, NA,”USD”, NA, NA,~ ## $ SalaryChange
“I am not currently employ~ ## $ JobSatisfaction ”5”, NA, NA,
“10 - Highly ~ ## $ JobSearchResource NA, NA,”Asking friends, f~
## $ JobHuntTime NA, NA, “1-2”, NA, NA, NA,~ ## $
JobFactorLearning NA, NA, “Very Important”, ~ ## $ JobFactorSalary
NA, NA, “Very Important”, ~ ## $ JobFactorOffice NA, NA,
“Very Important”, ~ ## $ JobFactorLanguages NA, NA, “Very
Important”, ~ ## $ JobFactorCommute NA, NA, “Very Important”, ~ ##
$ JobFactorManagement NA, NA, “Very Important”, ~ ## $
JobFactorExperienceLevel NA, NA, “Very Important”, ~ ## $
JobFactorDepartment NA, NA, “Very Important”, ~ ## $
JobFactorTitle NA, NA, “Very Important”, ~ ## $
JobFactorCompanyFunding NA, NA, “Very Important”, ~ ## $
JobFactorImpact NA, NA, “Very Important”, ~ ## $ JobFactorRemote
NA, NA, “Very Important”, ~ ## $ JobFactorIndustry NA, NA,
“Very Important”, ~ ## $ JobFactorLeaderReputation NA, “Somewhat
important”, ~ ## $ JobFactorDiversity NA, NA, “Very Important”, ~
## $ JobFactorPublishingOpportunity NA, NA, “Very Important”, ~ ##
Rows: 16,716 ## Columns: 155 ## $ LearningDataScience NA, NA, NA,
NA, NA, NA, NA~ ## $ LearningPlatformSelect
“College/University,Confer~ ## $ LearningPlatformUsefulnessArxiv
NA, NA,”Very useful”, NA,~ ## $ LearningPlatformUsefulnessBlogs
NA, NA, NA, “Very useful”,~ ## $ LearningPlatformUsefulnessCollege
NA, NA, “Somewhat useful”,~ ## $ LearningPlatformUsefulnessCompany
NA, NA, NA, NA, NA, NA, NA~ ## $ LearningPlatformUsefulnessConferences
“Very useful”, NA, NA, “Ve~ ## $ LearningPlatformUsefulnessFriends
NA, NA, NA,”Very useful”,~ ## $ LearningPlatformUsefulnessKaggle
NA, “Somewhat useful”, “So~ ## $
LearningPlatformUsefulnessNewsletters NA, NA, NA, NA, NA, NA, NA~
## $ LearningPlatformUsefulnessCommunities NA, NA, NA, NA, NA, NA,
NA~ ## $ LearningPlatformUsefulnessDocumentation NA, NA, NA,”Very
useful”,~ ## $ LearningPlatformUsefulnessCourses NA, NA, “Very
useful”, “Ve~ ## $ LearningPlatformUsefulnessProjects NA, NA,
NA,”Very useful”,~ ## $ LearningPlatformUsefulnessPodcasts “Very
useful”, NA, NA, NA,~ ## $ LearningPlatformUsefulnessSO NA, NA,
NA, NA, NA, “Very ~ ## $ LearningPlatformUsefulnessTextbook NA,
NA, NA, NA,”Somewhat ~ 7 ## $ LearningPlatformUsefulnessTradeBook
“Somewhat useful”, NA, NA,~ ## $ LearningPlatformUsefulnessTutoring
NA, NA, NA, NA, NA, NA, NA~ ## $ LearningPlatformUsefulnessYouTube
NA, NA, “Very useful”, NA,~ ## $ LearningDataScienceTime NA,
“1-2 years”, “1-2 year~ ## $ LearningCategorySelftTaught 0, 10,
20, 30, 60, 45, 40,~ ## $ LearningCategoryOnlineCourses 0, 30, 50,
0, 5, 25, 0, 40~ ## $ LearningCategoryWork 100, 0, 0, 40, 5, 20,
0, 0~ ## $ LearningCategoryUniversity