library(‘tidyr’) library(‘readr’) library(‘dplyr’) ## ## Attaching package: ’dplyr’ ## The following objects are masked from ’package:stats’: ## ## filter, lag ## The following objects are masked from ’package:base’: ## ## intersect, setdiff, setequal, union library(‘ggplot2’) library(‘forcats’) library(‘tidyquant’) ## Loading required package: lubridate ## ## Attaching package: ’lubridate’ 1 ## The following objects are masked from ’package:base’: ## ## date, intersect, setdiff, union ## Loading required package: PerformanceAnalytics ## Loading required package: xts ## Loading required package: zoo ## ## Attaching package: ’zoo’ ## The following objects are masked from ’package:base’:

Select column names starting with Leaning, Working, Age, EmployerIndustry, CurrentJob, MLMethod and Formal. Show the results using glimpse(). ## Rows: 16716 Columns: 228 ## – Column specification ——————————————————– ## Delimiter: “,” ## chr (212): GenderSelect, Country, EmploymentStatus, StudentStatus, LearningD… ## dbl (13): Age, LearningCategorySelftTaught, LearningCategoryOnlineCourses, … ## lgl (2): WorkToolsFrequencyAngoss, WorkToolsFrequencyKNIMECommercial ## ## i Use ’spec()‘ to retrieve the full column specification for this data. ## i Specify the column types or set ’show_col_types = FALSE‘ to quiet this message. ## Rows: 16,716 ## Columns: 228 ## $ GenderSelect “Non-binary, genderqueer, ~ ## $ Country NA,”United States”, “Cana~ ## $ Age NA, 30, 28, 56, 38, 46, 35~ ## $ EmploymentStatus ”Employed full-time”, “Not~ ## $ StudentStatus NA, NA, NA, NA, NA, NA, NA~ ## $ LearningDataScience NA, NA, NA, NA, NA, NA, NA~ ## $ CodeWriter ”Yes”, NA, NA, “Yes”, “Yes~ ## $ CareerSwitcher NA, NA, NA, NA, NA, NA, NA~ ## $ CurrentJobTitleSelect ”DBA/Database Engineer”, N~ ## $ TitleFit “Fine”, NA, NA, “Poorly”, ~ ## $ CurrentEmployerType “Employed by a company tha~ ## $ MLToolNextYearSelect ”SAS Base”, “Python”, “Ama~ ## $ MLMethodNextYearSelect ”Random Forests”, “Random ~ ## $ LanguageRecommendationSelect ”F#“,”Python”, “R”, “Pyth~ ## $ PublicDatasetsSelect ”Dataset aggregator/platfo~ ## $ LearningPlatformSelect “College/University,Confer~ ## $ LearningPlatformUsefulnessArxiv NA, NA,”Very useful”, NA,~ ## $ LearningPlatformUsefulnessBlogs NA, NA, NA, “Very useful”,~ ## $ LearningPlatformUsefulnessCollege NA, NA, “Somewhat useful”,~ ## $ LearningPlatformUsefulnessCompany NA, NA, NA, NA, NA, NA, NA~ ## $ LearningPlatformUsefulnessConferences “Very useful”, NA, NA, “Ve~ ## $ LearningPlatformUsefulnessFriends NA, NA, NA,”Very useful”,~ ## $ LearningPlatformUsefulnessKaggle NA, “Somewhat useful”, “So~ ## $ LearningPlatformUsefulnessNewsletters NA, NA, NA, NA, NA, NA, NA~ ## $ LearningPlatformUsefulnessCommunities NA, NA, NA, NA, NA, NA, NA~ ## $ LearningPlatformUsefulnessDocumentation NA, NA, NA,”Very useful”,~ ## $ LearningPlatformUsefulnessCourses NA, NA, “Very useful”, “Ve~ ## $ LearningPlatformUsefulnessProjects NA, NA, NA,”Very useful”,~ ## $ LearningPlatformUsefulnessPodcasts “Very useful”, NA, NA, NA,~ ## $ LearningPlatformUsefulnessSO NA, NA, NA, NA, NA, “Very ~ ## $ LearningPlatformUsefulnessTextbook NA, NA, NA, NA,”Somewhat ~ ## $ LearningPlatformUsefulnessTradeBook “Somewhat useful”, NA, NA,~ 3 ## $ LearningPlatformUsefulnessTutoring NA, NA, NA, NA, NA, NA, NA~ ## $ LearningPlatformUsefulnessYouTube NA, NA, “Very useful”, NA,~ ## $ BlogsPodcastsNewslettersSelect “Becoming a Data Scientist~ ## $ LearningDataScienceTime NA,”1-2 years”, “1-2 year~ ## $ JobSkillImportanceBigData NA, NA,”Necessary”, NA, N~ ## $ JobSkillImportanceDegree NA, “Nice to have”, NA, NA~ ## $ JobSkillImportanceStats NA, “Unnecessary”, NA, NA,~ ## $ JobSkillImportanceEnterpriseTools NA, NA, NA, NA, NA, NA, NA~ ## $ JobSkillImportancePython NA, “Unnecessary”, NA, NA,~ ## $ JobSkillImportanceR NA, NA, “Necessary”, NA, N~ ## $ JobSkillImportanceSQL NA, “Necessary”, NA, NA, N~ ## $ JobSkillImportanceKaggleRanking NA, NA, NA, NA, NA, NA, NA~ ## $ JobSkillImportanceMOOC NA, NA, NA, NA, NA, NA, NA~ ## $ JobSkillImportanceVisualizations NA, NA, NA, NA, NA, NA, NA~ ## $ JobSkillImportanceOtherSelect1 NA, NA, NA, NA, NA, NA, NA~ ## $ JobSkillImportanceOtherSelect2 NA, NA, NA, NA, NA, NA, NA~ ## $ JobSkillImportanceOtherSelect3 NA, NA, NA, NA, NA, NA, NA~ ## $ CoursePlatformSelect NA, NA, “Coursera,edX”, NA~ ## $ HardwarePersonalProjectsSelect NA, NA, “Basic laptop (Mac~ ## $ TimeSpentStudying NA,”2 - 10 hours”, “2 - 1~ ## $ ProveKnowledgeSelect NA,”Master’s degree”, “Gi~ ## $ DataScienceIdentitySelect ”Yes”, “Yes”, “Yes”, “Yes”~ ## $ FormalEducation “Bachelor’s degree”, “Mast~ ## $ MajorSelect ”Management information sy~ ## $ Tenure “More than 10 years”, “Les~ ## $ PastJobTitlesSelect ”Predictive Modeler,Progra~ ## $ FirstTrainingSelect “University courses”, “Uni~ ## $ LearningCategorySelftTaught 0, 10, 20, 30, 60, 45, 40,~ ## $ LearningCategoryOnlineCourses 0, 30, 50, 0, 5, 25, 0, 40~ ## $ LearningCategoryWork 100, 0, 0, 40, 5, 20, 0, 0~ ## $ LearningCategoryUniversity 0, 30, 30, 30, 30, 0, 50, ~ ## $ LearningCategoryKaggle 0, 30, 0, 0, 0, 10, 10, 10~ ## $ LearningCategoryOther 0, 0, 0, 0, 0, 0, 0, 0, 0,~ ## $ MLSkillsSelect ”Computer Vision,Natural L~ ## $ MLTechniquesSelect “Evolutionary Approaches,N~ ## $ ParentsEducation ”A doctoral degree”, “A ba~ ## $ EmployerIndustry ”Internet-based”, NA, NA, ~ ## $ EmployerSize “100 to 499 employees”, NA~ ## $ EmployerSizeChange “Increased slightly”, NA, ~ ## $ EmployerMLTime “3-5 years”, NA, NA, NA, “~ ## $ EmployerSearchMethod ”I visited the company’s W~ ## $ UniversityImportance “Not very important”, NA, ~ ## $ JobFunctionSelect “Build prototypes to explo~ ## $ WorkHardwareSelect ”Gaming Laptop (Laptop + C~ ## $ WorkDataTypeSelect “Text data,Relational data~ ## $ WorkProductionFrequency ”Rarely”, NA, NA, “Always”~ ## $ WorkDatasetSize “10GB”, NA, NA, “1GB”, “10~ ## $ WorkAlgorithmsSelect ”Neural Networks,Random Fo~ ## $ WorkToolsSelect “Amazon Web services,Oracl~ ## $ WorkToolsFrequencyAmazonML NA, NA, NA,”Rarely”, NA, ~ ## $ WorkToolsFrequencyAWS “Rarely”, NA, NA, “Often”,~ ## $ WorkToolsFrequencyAngoss NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyC NA, NA, NA, NA, “Most of t~ ## $ WorkToolsFrequencyCloudera NA, NA, NA,”Rarely”, NA, ~ 4 ## $ WorkToolsFrequencyDataRobot NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyFlume NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyGCP NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyHadoop NA, NA, NA, “Rarely”, NA, ~ ## $ WorkToolsFrequencyIBMCognos NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyIBMSPSSModeler NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyIBMSPSSStatistics NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyIBMWatson NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyImpala NA, NA, NA, “Rarely”, NA, ~ ## $ WorkToolsFrequencyJava NA, NA, NA, “Rarely”, NA, ~ ## $ WorkToolsFrequencyJulia NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyJupyter NA, NA, NA, NA, “Sometimes~ ## $ WorkToolsFrequencyKNIMECommercial NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyKNIMEFree NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyMathematica NA, NA, NA,”Rarely”, NA, ~ ## $ WorkToolsFrequencyMATLAB NA, NA, NA, “Rarely”, “Oft~ ## $ WorkToolsFrequencyAzure NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyExcel NA, NA, NA,”Sometimes”, N~ ## $ WorkToolsFrequencyMicrosoftRServer NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyMicrosoftSQL NA, NA, NA, “Rarely”, NA, ~ ## $ WorkToolsFrequencyMinitab NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyNoSQL NA, NA, NA, “Rarely”, NA, ~ ## $ WorkToolsFrequencyOracle “Sometimes”, NA, NA, NA, N~ ## $ WorkToolsFrequencyOrange NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyPerl “Most of the time”, NA, NA~ ## $ WorkToolsFrequencyPython NA, NA, NA, “Rarely”, “Som~ ## $ WorkToolsFrequencyQlik NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyR NA, NA, NA,”Rarely”, “Som~ ## $ WorkToolsFrequencyRapidMinerCommercial NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyRapidMinerFree NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencySalfrod NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencySAPBusinessObjects NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencySASBase NA, NA, NA,”Sometimes”, N~ ## $ WorkToolsFrequencySASEnterprise NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencySASJMP NA, NA, NA, “Rarely”, NA, ~ ## $ WorkToolsFrequencySpark NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencySQL NA, NA, NA, “Often”, NA, “~ ## $ WorkToolsFrequencyStan NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyStatistica NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyTableau NA, NA, NA,”Rarely”, NA, ~ ## $ WorkToolsFrequencyTensorFlow NA, NA, NA, NA, “Sometimes~ ## $ WorkToolsFrequencyTIBCO NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyUnix NA, NA, NA, NA, NA, NA,”M~ ## $ WorkToolsFrequencySelect1 NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencySelect2 NA, NA, NA, NA, NA, NA, NA~ ## $ WorkFrequencySelect3 NA, NA, NA, NA, NA, NA, NA~ ## $ WorkMethodsSelect “Association Rules,Collabo~ ## $ ’WorkMethodsFrequencyA/B‘ NA, NA, NA,”Sometimes”, N~ ## $ WorkMethodsFrequencyAssociationRules “Rarely”, NA, NA, NA, “Som~ ## $ WorkMethodsFrequencyBayesian NA, NA, NA,”Sometimes”, “~ ## $ WorkMethodsFrequencyCNNs NA, NA, NA, NA,”Most of t~ ## $ WorkMethodsFrequencyCollaborativeFiltering “Often”, NA, NA, NA, “Some~ ## $ ’WorkMethodsFrequencyCross-Validation‘ NA, NA, NA, NA, NA, NA,”S~ ## $ WorkMethodsFrequencyDataVisualization NA, NA, NA, “Sometimes”, “~ 5 ## $ WorkMethodsFrequencyDecisionTrees NA, NA, NA,”Often”, “Some~ ## $ WorkMethodsFrequencyEnsembleMethods NA, NA, NA,”Sometimes”, “~ ## $ WorkMethodsFrequencyEvolutionaryApproaches NA, NA, NA, NA,”Sometimes~ ## $ WorkMethodsFrequencyGANs NA, NA, NA, NA, NA, NA, NA~ ## $ WorkMethodsFrequencyGBM NA, NA, NA, NA, NA, NA, NA~ ## $ WorkMethodsFrequencyHMMs NA, NA, NA, NA, NA, NA, NA~ ## $ WorkMethodsFrequencyKNN NA, NA, NA, NA, “Most of t~ ## $ WorkMethodsFrequencyLiftAnalysis NA, NA, NA, NA, NA, NA, NA~ ## $ WorkMethodsFrequencyLogisticRegression NA, NA, NA,”Sometimes”, “~ ## $ WorkMethodsFrequencyMLN NA, NA, NA,”Often”, NA, N~ ## $ WorkMethodsFrequencyNaiveBayes NA, NA, NA, “Sometimes”, “~ ## $ WorkMethodsFrequencyNLP NA, NA, NA, NA, NA, NA,”M~ ## $ WorkMethodsFrequencyNeuralNetworks “Sometimes”, NA, NA, “Some~ ## $ WorkMethodsFrequencyPCA ”Often”, NA, NA, NA, “Some~ ## $ WorkMethodsFrequencyPrescriptiveModeling NA, NA, NA, NA, NA, NA, NA~ ## $ WorkMethodsFrequencyRandomForests ”Most of the time”, NA, NA~ ## $ WorkMethodsFrequencyRecommenderSystems NA, NA, NA, NA, NA, NA, NA~ ## $ WorkMethodsFrequencyRNNs NA, NA, NA, NA, “Sometimes~ ## $ WorkMethodsFrequencySegmentation NA, NA, NA, NA,”Often”, N~ ## $ WorkMethodsFrequencySimulation NA, NA, NA, “Often”, NA, N~ ## $ WorkMethodsFrequencySVMs NA, NA, NA, NA, “Most of t~ ## $ WorkMethodsFrequencyTextAnalysis NA, NA, NA, NA, NA, NA,”M~ ## $ WorkMethodsFrequencyTimeSeriesAnalysis NA, NA, NA, “Often”, “Some~ ## $ WorkMethodsFrequencySelect1 NA, NA, NA, NA, NA, NA, NA~ ## $ WorkMethodsFrequencySelect2 NA, NA, NA, NA, NA, NA, NA~ ## $ WorkMethodsFrequencySelect3 NA, NA, NA, NA, NA, NA, NA~ ## $ TimeGatheringData 0, NA, NA, 50, 30, 60, 30,~ ## $ TimeModelBuilding 100, NA, NA, 20, 20, 20, 1~ ## $ TimeProduction 0, NA, NA, 0, 15, 10, 30, ~ ## $ TimeVisualizing 0, NA, NA, 10, 15, 10, 20,~ ## $ TimeFindingInsights 0, NA, NA, 20, 20, 0, 10, ~ ## $ TimeOtherSelect 0, NA, NA, 0, 0, 0, 0, NA,~ ## $ AlgorithmUnderstandingLevel ”Enough to explain the alg~ ## $ WorkChallengesSelect “Company politics / Lack o~ ## $ WorkChallengeFrequencyPolitics ”Rarely”, NA, NA, “Often”,~ ## $ WorkChallengeFrequencyUnusedResults NA, NA, NA, “Often”, “Some~ ## $ WorkChallengeFrequencyUnusefulInstrumenting NA, NA, NA,”Often”, NA, N~ ## $ WorkChallengeFrequencyDeployment NA, NA, NA, “Often”, NA, N~ ## $ WorkChallengeFrequencyDirtyData NA, NA, NA, “Often”, NA, “~ ## $ WorkChallengeFrequencyExplaining NA, NA, NA,”Often”, NA, N~ ## $ WorkChallengeFrequencyPass NA, NA, NA, NA, NA, NA, NA~ ## $ WorkChallengeFrequencyIntegration NA, NA, NA, “Often”, NA, N~ ## $ WorkChallengeFrequencyTalent NA, NA, NA, “Often”, “Some~ ## $ WorkChallengeFrequencyDataFunds NA, NA, NA,”Often”, “Some~ ## $ WorkChallengeFrequencyDomainExpertise NA, NA, NA,”Most of the t~ ## $ WorkChallengeFrequencyML NA, NA, NA, “Often”, NA, N~ ## $ WorkChallengeFrequencyTools NA, NA, NA, “Often”, NA, N~ ## $ WorkChallengeFrequencyExpectations NA, NA, NA, “Often”, NA, N~ ## $ WorkChallengeFrequencyITCoordination NA, NA, NA, NA, “Sometimes~ ## $ WorkChallengeFrequencyHiringFunds NA, NA, NA,”Often”, NA, N~ ## $ WorkChallengeFrequencyPrivacy “Often”, NA, NA, “Often”, ~ ## $ WorkChallengeFrequencyScaling “Most of the time”, NA, NA~ ## $ WorkChallengeFrequencyEnvironments NA, NA, NA, “Often”, “Some~ ## $ WorkChallengeFrequencyClarity NA, NA, NA,”Often”, NA, N~ 6 ## $ WorkChallengeFrequencyDataAccess NA, NA, NA, “Often”, NA, N~ ## $ WorkChallengeFrequencyOtherSelect NA, NA, NA, NA, NA, NA, NA~ ## $ WorkDataVisualizations “26-50% of projects”, NA, ~ ## $ WorkInternalVsExternalTools “Do not know”, NA, NA, “En~ ## $ WorkMLTeamSeatSelect ”Standalone Team”, NA, NA,~ ## $ WorkDatasets NA, NA, NA, “Electricity d~ ## $ WorkDatasetsChallenge NA, NA, NA,”Everything is~ ## $ WorkDataStorage “Document-oriented (e.g. M~ ## $ WorkDataSharing ”Company Developed Platfor~ ## $ WorkDataSourcing NA, NA, NA, NA, NA, NA, NA~ ## $ WorkCodeSharing “Mercurial,Subversion,Othe~ ## $ RemoteWork ”Always”, NA, NA, NA, “Rar~ ## $ CompensationAmount NA, NA, NA, 250000, NA, NA~ ## $ CompensationCurrency NA, NA, NA,”USD”, NA, NA,~ ## $ SalaryChange “I am not currently employ~ ## $ JobSatisfaction ”5”, NA, NA, “10 - Highly ~ ## $ JobSearchResource NA, NA,”Asking friends, f~ ## $ JobHuntTime NA, NA, “1-2”, NA, NA, NA,~ ## $ JobFactorLearning NA, NA, “Very Important”, ~ ## $ JobFactorSalary NA, NA, “Very Important”, ~ ## $ JobFactorOffice NA, NA, “Very Important”, ~ ## $ JobFactorLanguages NA, NA, “Very Important”, ~ ## $ JobFactorCommute NA, NA, “Very Important”, ~ ## $ JobFactorManagement NA, NA, “Very Important”, ~ ## $ JobFactorExperienceLevel NA, NA, “Very Important”, ~ ## $ JobFactorDepartment NA, NA, “Very Important”, ~ ## $ JobFactorTitle NA, NA, “Very Important”, ~ ## $ JobFactorCompanyFunding NA, NA, “Very Important”, ~ ## $ JobFactorImpact NA, NA, “Very Important”, ~ ## $ JobFactorRemote NA, NA, “Very Important”, ~ ## $ JobFactorIndustry NA, NA, “Very Important”, ~ ## $ JobFactorLeaderReputation NA, “Somewhat important”, ~ ## $ JobFactorDiversity NA, NA, “Very Important”, ~ ## $ JobFactorPublishingOpportunity NA, NA, “Very Important”, ~ ## Rows: 16,716 ## Columns: 155 ## $ LearningDataScience NA, NA, NA, NA, NA, NA, NA~ ## $ LearningPlatformSelect “College/University,Confer~ ## $ LearningPlatformUsefulnessArxiv NA, NA,”Very useful”, NA,~ ## $ LearningPlatformUsefulnessBlogs NA, NA, NA, “Very useful”,~ ## $ LearningPlatformUsefulnessCollege NA, NA, “Somewhat useful”,~ ## $ LearningPlatformUsefulnessCompany NA, NA, NA, NA, NA, NA, NA~ ## $ LearningPlatformUsefulnessConferences “Very useful”, NA, NA, “Ve~ ## $ LearningPlatformUsefulnessFriends NA, NA, NA,”Very useful”,~ ## $ LearningPlatformUsefulnessKaggle NA, “Somewhat useful”, “So~ ## $ LearningPlatformUsefulnessNewsletters NA, NA, NA, NA, NA, NA, NA~ ## $ LearningPlatformUsefulnessCommunities NA, NA, NA, NA, NA, NA, NA~ ## $ LearningPlatformUsefulnessDocumentation NA, NA, NA,”Very useful”,~ ## $ LearningPlatformUsefulnessCourses NA, NA, “Very useful”, “Ve~ ## $ LearningPlatformUsefulnessProjects NA, NA, NA,”Very useful”,~ ## $ LearningPlatformUsefulnessPodcasts “Very useful”, NA, NA, NA,~ ## $ LearningPlatformUsefulnessSO NA, NA, NA, NA, NA, “Very ~ ## $ LearningPlatformUsefulnessTextbook NA, NA, NA, NA,”Somewhat ~ 7 ## $ LearningPlatformUsefulnessTradeBook “Somewhat useful”, NA, NA,~ ## $ LearningPlatformUsefulnessTutoring NA, NA, NA, NA, NA, NA, NA~ ## $ LearningPlatformUsefulnessYouTube NA, NA, “Very useful”, NA,~ ## $ LearningDataScienceTime NA, “1-2 years”, “1-2 year~ ## $ LearningCategorySelftTaught 0, 10, 20, 30, 60, 45, 40,~ ## $ LearningCategoryOnlineCourses 0, 30, 50, 0, 5, 25, 0, 40~ ## $ LearningCategoryWork 100, 0, 0, 40, 5, 20, 0, 0~ ## $ LearningCategoryUniversity 0, 30, 30, 30, 30, 0, 50, ~ ## $ LearningCategoryKaggle 0, 30, 0, 0, 0, 10, 10, 10~ ## $ LearningCategoryOther 0, 0, 0, 0, 0, 0, 0, 0, 0,~ ## $ WorkHardwareSelect ”Gaming Laptop (Laptop + C~ ## $ WorkDataTypeSelect “Text data,Relational data~ ## $ WorkProductionFrequency ”Rarely”, NA, NA, “Always”~ ## $ WorkDatasetSize “10GB”, NA, NA, “1GB”, “10~ ## $ WorkAlgorithmsSelect ”Neural Networks,Random Fo~ ## $ WorkToolsSelect “Amazon Web services,Oracl~ ## $ WorkToolsFrequencyAmazonML NA, NA, NA,”Rarely”, NA, ~ ## $ WorkToolsFrequencyAWS “Rarely”, NA, NA, “Often”,~ ## $ WorkToolsFrequencyAngoss NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyC NA, NA, NA, NA, “Most of t~ ## $ WorkToolsFrequencyCloudera NA, NA, NA,”Rarely”, NA, ~ ## $ WorkToolsFrequencyDataRobot NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyFlume NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyGCP NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyHadoop NA, NA, NA, “Rarely”, NA, ~ ## $ WorkToolsFrequencyIBMCognos NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyIBMSPSSModeler NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyIBMSPSSStatistics NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyIBMWatson NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyImpala NA, NA, NA, “Rarely”, NA, ~ ## $ WorkToolsFrequencyJava NA, NA, NA, “Rarely”, NA, ~ ## $ WorkToolsFrequencyJulia NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyJupyter NA, NA, NA, NA, “Sometimes~ ## $ WorkToolsFrequencyKNIMECommercial NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyKNIMEFree NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyMathematica NA, NA, NA,”Rarely”, NA, ~ ## $ WorkToolsFrequencyMATLAB NA, NA, NA, “Rarely”, “Oft~ ## $ WorkToolsFrequencyAzure NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyExcel NA, NA, NA,”Sometimes”, N~ ## $ WorkToolsFrequencyMicrosoftRServer NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyMicrosoftSQL NA, NA, NA, “Rarely”, NA, ~ ## $ WorkToolsFrequencyMinitab NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyNoSQL NA, NA, NA, “Rarely”, NA, ~ ## $ WorkToolsFrequencyOracle “Sometimes”, NA, NA, NA, N~ ## $ WorkToolsFrequencyOrange NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyPerl “Most of the time”, NA, NA~ ## $ WorkToolsFrequencyPython NA, NA, NA, “Rarely”, “Som~ ## $ WorkToolsFrequencyQlik NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyR NA, NA, NA,”Rarely”, “Som~ ## $ WorkToolsFrequencyRapidMinerCommercial NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyRapidMinerFree NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencySalfrod NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencySAPBusinessObjects NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencySASBase NA, NA, NA,”Sometimes”, N~ 8 ## $ WorkToolsFrequencySASEnterprise NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencySASJMP NA, NA, NA, “Rarely”, NA, ~ ## $ WorkToolsFrequencySpark NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencySQL NA, NA, NA, “Often”, NA, “~ ## $ WorkToolsFrequencyStan NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyStatistica NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyTableau NA, NA, NA,”Rarely”, NA, ~ ## $ WorkToolsFrequencyTensorFlow NA, NA, NA, NA, “Sometimes~ ## $ WorkToolsFrequencyTIBCO NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencyUnix NA, NA, NA, NA, NA, NA,”M~ ## $ WorkToolsFrequencySelect1 NA, NA, NA, NA, NA, NA, NA~ ## $ WorkToolsFrequencySelect2 NA, NA, NA, NA, NA, NA, NA~ ## $ WorkFrequencySelect3 NA, NA, NA, NA, NA, NA, NA~ ## $ WorkMethodsSelect “Association Rules,Collabo~ ## $ ’WorkMethodsFrequencyA/B‘ NA, NA, NA,”Sometimes”, N~ ## $ WorkMethodsFrequencyAssociationRules “Rarely”, NA, NA, NA, “Som~ ## $ WorkMethodsFrequencyBayesian NA, NA, NA,”Sometimes”, “~ ## $ WorkMethodsFrequencyCNNs NA, NA, NA, NA,”Most of t~ ## $ WorkMethodsFrequencyCollaborativeFiltering “Often”, NA, NA, NA, “Some~ ## $ ’WorkMethodsFrequencyCross-Validation‘ NA, NA, NA, NA, NA, NA,”S~ ## $ WorkMethodsFrequencyDataVisualization NA, NA, NA, “Sometimes”, “~ ## $ WorkMethodsFrequencyDecisionTrees NA, NA, NA,”Often”, “Some~ ## $ WorkMethodsFrequencyEnsembleMethods NA, NA, NA,”Sometimes”, “~ ## $ WorkMethodsFrequencyEvolutionaryApproaches NA, NA, NA, NA,”Sometimes~ ## $ WorkMethodsFrequencyGANs NA, NA, NA, NA, NA, NA, NA~ ## $ WorkMethodsFrequencyGBM NA, NA, NA, NA, NA, NA, NA~ ## $ WorkMethodsFrequencyHMMs NA, NA, NA, NA, NA, NA, NA~ ## $ WorkMethodsFrequencyKNN NA, NA, NA, NA, “Most of t~ ## $ WorkMethodsFrequencyLiftAnalysis NA, NA, NA, NA, NA, NA, NA~ ## $ WorkMethodsFrequencyLogisticRegression NA, NA, NA,”Sometimes”, “~ ## $ WorkMethodsFrequencyMLN NA, NA, NA,”Often”, NA, N~ ## $ WorkMethodsFrequencyNaiveBayes NA, NA, NA, “Sometimes”, “~ ## $ WorkMethodsFrequencyNLP NA, NA, NA, NA, NA, NA,”M~ ## $ WorkMethodsFrequencyNeuralNetworks “Sometimes”, NA, NA, “Some~ ## $ WorkMethodsFrequencyPCA ”Often”, NA, NA, NA, “Some~ ## $ WorkMethodsFrequencyPrescriptiveModeling NA, NA, NA, NA, NA, NA, NA~ ## $ WorkMethodsFrequencyRandomForests ”Most of the time”, NA, NA~ ## $ WorkMethodsFrequencyRecommenderSystems NA, NA, NA, NA, NA, NA, NA~ ## $ WorkMethodsFrequencyRNNs NA, NA, NA, NA, “Sometimes~ ## $ WorkMethodsFrequencySegmentation NA, NA, NA, NA,”Often”, N~ ## $ WorkMethodsFrequencySimulation NA, NA, NA, “Often”, NA, N~ ## $ WorkMethodsFrequencySVMs NA, NA, NA, NA, “Most of t~ ## $ WorkMethodsFrequencyTextAnalysis NA, NA, NA, NA, NA, NA,”M~ ## $ WorkMethodsFrequencyTimeSeriesAnalysis NA, NA, NA, “Often”, “Some~ ## $ WorkMethodsFrequencySelect1 NA, NA, NA, NA, NA, NA, NA~ ## $ WorkMethodsFrequencySelect2 NA, NA, NA, NA, NA, NA, NA~ ## $ WorkMethodsFrequencySelect3 NA, NA, NA, NA, NA, NA, NA~ ## $ WorkChallengesSelect ”Company politics / Lack o~ ## $ WorkChallengeFrequencyPolitics “Rarely”, NA, NA, “Often”,~ ## $ WorkChallengeFrequencyUnusedResults NA, NA, NA, “Often”, “Some~ ## $ WorkChallengeFrequencyUnusefulInstrumenting NA, NA, NA,”Often”, NA, N~ ## $ WorkChallengeFrequencyDeployment NA, NA, NA, “Often”, NA, N~ ## $ WorkChallengeFrequencyDirtyData NA, NA, NA, “Often”, NA, “~ ## $ WorkChallengeFrequencyExplaining NA, NA, NA,”Often”, NA, N~ 9 ## $ WorkChallengeFrequencyPass NA, NA, NA, NA, NA, NA, NA~ ## $ WorkChallengeFrequencyIntegration NA, NA, NA, “Often”, NA, N~ ## $ WorkChallengeFrequencyTalent NA, NA, NA, “Often”, “Some~ ## $ WorkChallengeFrequencyDataFunds NA, NA, NA,”Often”, “Some~ ## $ WorkChallengeFrequencyDomainExpertise NA, NA, NA,”Most of the t~ ## $ WorkChallengeFrequencyML NA, NA, NA, “Often”, NA, N~ ## $ WorkChallengeFrequencyTools NA, NA, NA, “Often”, NA, N~ ## $ WorkChallengeFrequencyExpectations NA, NA, NA, “Often”, NA, N~ ## $ WorkChallengeFrequencyITCoordination NA, NA, NA, NA, “Sometimes~ ## $ WorkChallengeFrequencyHiringFunds NA, NA, NA,”Often”, NA, N~ ## $ WorkChallengeFrequencyPrivacy “Often”, NA, NA, “Often”, ~ ## $ WorkChallengeFrequencyScaling “Most of the time”, NA, NA~ ## $ WorkChallengeFrequencyEnvironments NA, NA, NA, “Often”, “Some~ ## $ WorkChallengeFrequencyClarity NA, NA, NA,”Often”, NA, N~ ## $ WorkChallengeFrequencyDataAccess NA, NA, NA, “Often”, NA, N~ ## $ WorkChallengeFrequencyOtherSelect NA, NA, NA, NA, NA, NA, NA~ ## $ WorkDataVisualizations “26-50% of projects”, NA, ~ ## $ WorkInternalVsExternalTools “Do not know”, NA, NA, “En~ ## $ WorkMLTeamSeatSelect ”Standalone Team”, NA, NA,~ ## $ WorkDatasets NA, NA, NA, “Electricity d~ ## $ WorkDatasetsChallenge NA, NA, NA,”Everything is~ ## $ WorkDataStorage “Document-oriented (e.g. M~ ## $ WorkDataSharing ”Company Developed Platfor~ ## $ WorkDataSourcing NA, NA, NA, NA, NA, NA, NA~ ## $ WorkCodeSharing “Mercurial,Subversion,Othe~ ## $ Age NA, 30, 28, 56, 38, 46, 35~ ## $ EmployerIndustry ”Internet-based”, NA, NA, ~ ## $ CurrentJobTitleSelect “DBA/Database Engineer”, N~ ## $ MLMethodNextYearSelect “Random Forests”, “Random ~ ## $ FormalEducation ”Bachelor’s degree”, “Mast~ 1.2 Change all the character columns to factors. Find the number of levels for each factor. Hint: nlevels(). ## Rows: 155 ## Columns: 2 ## $ variable ”LearningDataScience”, “LearningPlatformSelect”, “LearningP~ ## $ num_levels 3, 5362, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,~ 1.3 Select the 5 rows with the highest number of levels ## # A tibble: 5 x 2 ## variable num_levels ## ## 1 WorkMethodsSelect 6190 ## 2 LearningPlatformSelect 5362 ## 3 WorkToolsSelect 5248 ## 4 WorkChallengesSelect 4287 ## 5 WorkDatasetsChallenge 2220 10 1.4 Filter for where the column called variable equals CurrentJobTitleSelect. Show its levels and number of levels. ## [1] 16 ## [1]”Business Analyst” ## [2] “Computer Scientist” ## [3] “Data Analyst” ## [4] “Data Miner” ## [5] “Data Scientist” ## [6] “DBA/Database Engineer” ## [7] “Engineer” ## [8] “Machine Learning Engineer” ## [9] “Operations Research Practitioner” ## [10] “Other” ## [11] “Predictive Modeler” ## [12] “Programmer” ## [13] “Researcher” ## [14] “Scientist/Researcher” ## [15] “Software Developer/Software Engineer” ## [16] “Statistician” ## ## as.Date, as.Date.numeric ## ## ################################### WARNING ###################################

Load necessary library

library(ggplot2)

Assuming your dataset is stored as a CSV file

Replace ‘your_file.csv’ with the name of your file

data <- read.csv(“your_file.csv”)

Ensure the column is treated as a factor

data\(EmployerIndustry <- as.factor(data\)EmployerIndustry)

Create a bar plot

ggplot(data, aes(x = EmployerIndustry)) + geom_bar(fill = “blue”, color = “black”) + theme_minimal() + labs(title = “Bar Plot of Employer Industry”, x = “Industry”, y = “Count”) + theme(axis.text.x = element_text(angle = 45, hjust = 1))

# We noticed you have dplyr installed. The dplyr lag() function breaks how

# base R’s lag() function is supposed to work, which breaks lag(my_xts).

#

# Calls to lag(my_xts) that you enter or source() into this session won’t

# work correctly.

#

# All package code is unaffected because it is protected by the R namespace

# mechanism.

#

# Set ’options(xts.warn_dplyr_breaks_lag = FALSE)‘ to suppress this warning.

#

# You can use stats::lag() to make sure you’re not using dplyr::lag(), or you

# can add conflictRules(’dplyr’, exclude = ’lag’) to your .Rprofile to stop

# dplyr from breaking base R’s lag() function.

################################### WARNING

Attaching package: ’xts’

The following objects are masked from ’package:dplyr’:

first, last

Attaching package: ’PerformanceAnalytics’

The following object is masked from ’package:graphics’:

legend

Loading required package: quantmod

Loading required package: TTR

Registered S3 method overwritten by ’quantmod’:

method from

as.zoo.data.frame zoo