multipleChoiceResponses <- read_csv("C:/Users/Admin/OneDrive - 亞洲大學[Asia University]/Financial Database Mana & Application/Data/multipleChoiceResponses.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 16716 Columns: 228
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (212): GenderSelect, Country, EmploymentStatus, StudentStatus, LearningD...
## dbl (13): Age, LearningCategorySelftTaught, LearningCategoryOnlineCourses, ...
## num (1): CompensationAmount
## lgl (2): WorkToolsFrequencyAngoss, WorkToolsFrequencyKNIMECommercial
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(multipleChoiceResponses)
## Rows: 16,716
## Columns: 228
## $ GenderSelect <chr> "Non-binary, genderqueer, …
## $ Country <chr> NA, "United States", "Cana…
## $ Age <dbl> NA, 30, 28, 56, 38, 46, 35…
## $ EmploymentStatus <chr> "Employed full-time", "Not…
## $ StudentStatus <chr> NA, NA, NA, NA, NA, NA, NA…
## $ LearningDataScience <chr> NA, NA, NA, NA, NA, NA, NA…
## $ CodeWriter <chr> "Yes", NA, NA, "Yes", "Yes…
## $ CareerSwitcher <chr> NA, NA, NA, NA, NA, NA, NA…
## $ CurrentJobTitleSelect <chr> "DBA/Database Engineer", N…
## $ TitleFit <chr> "Fine", NA, NA, "Poorly", …
## $ CurrentEmployerType <chr> "Employed by a company tha…
## $ MLToolNextYearSelect <chr> "SAS Base", "Python", "Ama…
## $ MLMethodNextYearSelect <chr> "Random Forests", "Random …
## $ LanguageRecommendationSelect <chr> "F#", "Python", "R", "Pyth…
## $ PublicDatasetsSelect <chr> "Dataset aggregator/platfo…
## $ LearningPlatformSelect <chr> "College/University,Confer…
## $ LearningPlatformUsefulnessArxiv <chr> NA, NA, "Very useful", NA,…
## $ LearningPlatformUsefulnessBlogs <chr> NA, NA, NA, "Very useful",…
## $ LearningPlatformUsefulnessCollege <chr> NA, NA, "Somewhat useful",…
## $ LearningPlatformUsefulnessCompany <chr> NA, NA, NA, NA, NA, NA, NA…
## $ LearningPlatformUsefulnessConferences <chr> "Very useful", NA, NA, "Ve…
## $ LearningPlatformUsefulnessFriends <chr> NA, NA, NA, "Very useful",…
## $ LearningPlatformUsefulnessKaggle <chr> NA, "Somewhat useful", "So…
## $ LearningPlatformUsefulnessNewsletters <chr> NA, NA, NA, NA, NA, NA, NA…
## $ LearningPlatformUsefulnessCommunities <chr> NA, NA, NA, NA, NA, NA, NA…
## $ LearningPlatformUsefulnessDocumentation <chr> NA, NA, NA, "Very useful",…
## $ LearningPlatformUsefulnessCourses <chr> NA, NA, "Very useful", "Ve…
## $ LearningPlatformUsefulnessProjects <chr> NA, NA, NA, "Very useful",…
## $ LearningPlatformUsefulnessPodcasts <chr> "Very useful", NA, NA, NA,…
## $ LearningPlatformUsefulnessSO <chr> NA, NA, NA, NA, NA, "Very …
## $ LearningPlatformUsefulnessTextbook <chr> NA, NA, NA, NA, "Somewhat …
## $ LearningPlatformUsefulnessTradeBook <chr> "Somewhat useful", NA, NA,…
## $ LearningPlatformUsefulnessTutoring <chr> NA, NA, NA, NA, NA, NA, NA…
## $ LearningPlatformUsefulnessYouTube <chr> NA, NA, "Very useful", NA,…
## $ BlogsPodcastsNewslettersSelect <chr> "Becoming a Data Scientist…
## $ LearningDataScienceTime <chr> NA, "1-2 years", "1-2 year…
## $ JobSkillImportanceBigData <chr> NA, NA, "Necessary", NA, N…
## $ JobSkillImportanceDegree <chr> NA, "Nice to have", NA, NA…
## $ JobSkillImportanceStats <chr> NA, "Unnecessary", NA, NA,…
## $ JobSkillImportanceEnterpriseTools <chr> NA, NA, NA, NA, NA, NA, NA…
## $ JobSkillImportancePython <chr> NA, "Unnecessary", NA, NA,…
## $ JobSkillImportanceR <chr> NA, NA, "Necessary", NA, N…
## $ JobSkillImportanceSQL <chr> NA, "Necessary", NA, NA, N…
## $ JobSkillImportanceKaggleRanking <chr> NA, NA, NA, NA, NA, NA, NA…
## $ JobSkillImportanceMOOC <chr> NA, NA, NA, NA, NA, NA, NA…
## $ JobSkillImportanceVisualizations <chr> NA, NA, NA, NA, NA, NA, NA…
## $ JobSkillImportanceOtherSelect1 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ JobSkillImportanceOtherSelect2 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ JobSkillImportanceOtherSelect3 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ CoursePlatformSelect <chr> NA, NA, "Coursera,edX", NA…
## $ HardwarePersonalProjectsSelect <chr> NA, NA, "Basic laptop (Mac…
## $ TimeSpentStudying <chr> NA, "2 - 10 hours", "2 - 1…
## $ ProveKnowledgeSelect <chr> NA, "Master's degree", "Gi…
## $ DataScienceIdentitySelect <chr> "Yes", "Yes", "Yes", "Yes"…
## $ FormalEducation <chr> "Bachelor's degree", "Mast…
## $ MajorSelect <chr> "Management information sy…
## $ Tenure <chr> "More than 10 years", "Les…
## $ PastJobTitlesSelect <chr> "Predictive Modeler,Progra…
## $ FirstTrainingSelect <chr> "University courses", "Uni…
## $ LearningCategorySelftTaught <dbl> 0, 10, 20, 30, 60, 45, 40,…
## $ LearningCategoryOnlineCourses <dbl> 0, 30, 50, 0, 5, 25, 0, 40…
## $ LearningCategoryWork <dbl> 100, 0, 0, 40, 5, 20, 0, 0…
## $ LearningCategoryUniversity <dbl> 0, 30, 30, 30, 30, 0, 50, …
## $ LearningCategoryKaggle <dbl> 0, 30, 0, 0, 0, 10, 10, 10…
## $ LearningCategoryOther <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ MLSkillsSelect <chr> "Computer Vision,Natural L…
## $ MLTechniquesSelect <chr> "Evolutionary Approaches,N…
## $ ParentsEducation <chr> "A doctoral degree", "A ba…
## $ EmployerIndustry <chr> "Internet-based", NA, NA, …
## $ EmployerSize <chr> "100 to 499 employees", NA…
## $ EmployerSizeChange <chr> "Increased slightly", NA, …
## $ EmployerMLTime <chr> "3-5 years", NA, NA, NA, "…
## $ EmployerSearchMethod <chr> "I visited the company's W…
## $ UniversityImportance <chr> "Not very important", NA, …
## $ JobFunctionSelect <chr> "Build prototypes to explo…
## $ WorkHardwareSelect <chr> "Gaming Laptop (Laptop + C…
## $ WorkDataTypeSelect <chr> "Text data,Relational data…
## $ WorkProductionFrequency <chr> "Rarely", NA, NA, "Always"…
## $ WorkDatasetSize <chr> "10GB", NA, NA, "1GB", "10…
## $ WorkAlgorithmsSelect <chr> "Neural Networks,Random Fo…
## $ WorkToolsSelect <chr> "Amazon Web services,Oracl…
## $ WorkToolsFrequencyAmazonML <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyAWS <chr> "Rarely", NA, NA, "Often",…
## $ WorkToolsFrequencyAngoss <lgl> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyC <chr> NA, NA, NA, NA, "Most of t…
## $ WorkToolsFrequencyCloudera <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyDataRobot <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyFlume <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyGCP <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyHadoop <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyIBMCognos <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyIBMSPSSModeler <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyIBMSPSSStatistics <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyIBMWatson <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyImpala <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyJava <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyJulia <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyJupyter <chr> NA, NA, NA, NA, "Sometimes…
## $ WorkToolsFrequencyKNIMECommercial <lgl> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyKNIMEFree <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyMathematica <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyMATLAB <chr> NA, NA, NA, "Rarely", "Oft…
## $ WorkToolsFrequencyAzure <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyExcel <chr> NA, NA, NA, "Sometimes", N…
## $ WorkToolsFrequencyMicrosoftRServer <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyMicrosoftSQL <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyMinitab <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyNoSQL <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyOracle <chr> "Sometimes", NA, NA, NA, N…
## $ WorkToolsFrequencyOrange <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyPerl <chr> "Most of the time", NA, NA…
## $ WorkToolsFrequencyPython <chr> NA, NA, NA, "Rarely", "Som…
## $ WorkToolsFrequencyQlik <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyR <chr> NA, NA, NA, "Rarely", "Som…
## $ WorkToolsFrequencyRapidMinerCommercial <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyRapidMinerFree <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySalfrod <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySAPBusinessObjects <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySASBase <chr> NA, NA, NA, "Sometimes", N…
## $ WorkToolsFrequencySASEnterprise <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySASJMP <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencySpark <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySQL <chr> NA, NA, NA, "Often", NA, "…
## $ WorkToolsFrequencyStan <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyStatistica <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyTableau <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyTensorFlow <chr> NA, NA, NA, NA, "Sometimes…
## $ WorkToolsFrequencyTIBCO <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyUnix <chr> NA, NA, NA, NA, NA, NA, "M…
## $ WorkToolsFrequencySelect1 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySelect2 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkFrequencySelect3 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsSelect <chr> "Association Rules,Collabo…
## $ `WorkMethodsFrequencyA/B` <chr> NA, NA, NA, "Sometimes", N…
## $ WorkMethodsFrequencyAssociationRules <chr> "Rarely", NA, NA, NA, "Som…
## $ WorkMethodsFrequencyBayesian <chr> NA, NA, NA, "Sometimes", "…
## $ WorkMethodsFrequencyCNNs <chr> NA, NA, NA, NA, "Most of t…
## $ WorkMethodsFrequencyCollaborativeFiltering <chr> "Often", NA, NA, NA, "Some…
## $ `WorkMethodsFrequencyCross-Validation` <chr> NA, NA, NA, NA, NA, NA, "S…
## $ WorkMethodsFrequencyDataVisualization <chr> NA, NA, NA, "Sometimes", "…
## $ WorkMethodsFrequencyDecisionTrees <chr> NA, NA, NA, "Often", "Some…
## $ WorkMethodsFrequencyEnsembleMethods <chr> NA, NA, NA, "Sometimes", "…
## $ WorkMethodsFrequencyEvolutionaryApproaches <chr> NA, NA, NA, NA, "Sometimes…
## $ WorkMethodsFrequencyGANs <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyGBM <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyHMMs <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyKNN <chr> NA, NA, NA, NA, "Most of t…
## $ WorkMethodsFrequencyLiftAnalysis <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyLogisticRegression <chr> NA, NA, NA, "Sometimes", "…
## $ WorkMethodsFrequencyMLN <chr> NA, NA, NA, "Often", NA, N…
## $ WorkMethodsFrequencyNaiveBayes <chr> NA, NA, NA, "Sometimes", "…
## $ WorkMethodsFrequencyNLP <chr> NA, NA, NA, NA, NA, NA, "M…
## $ WorkMethodsFrequencyNeuralNetworks <chr> "Sometimes", NA, NA, "Some…
## $ WorkMethodsFrequencyPCA <chr> "Often", NA, NA, NA, "Some…
## $ WorkMethodsFrequencyPrescriptiveModeling <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyRandomForests <chr> "Most of the time", NA, NA…
## $ WorkMethodsFrequencyRecommenderSystems <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyRNNs <chr> NA, NA, NA, NA, "Sometimes…
## $ WorkMethodsFrequencySegmentation <chr> NA, NA, NA, NA, "Often", N…
## $ WorkMethodsFrequencySimulation <chr> NA, NA, NA, "Often", NA, N…
## $ WorkMethodsFrequencySVMs <chr> NA, NA, NA, NA, "Most of t…
## $ WorkMethodsFrequencyTextAnalysis <chr> NA, NA, NA, NA, NA, NA, "M…
## $ WorkMethodsFrequencyTimeSeriesAnalysis <chr> NA, NA, NA, "Often", "Some…
## $ WorkMethodsFrequencySelect1 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencySelect2 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencySelect3 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ TimeGatheringData <dbl> 0, NA, NA, 50, 30, 60, 30,…
## $ TimeModelBuilding <dbl> 100, NA, NA, 20, 20, 20, 1…
## $ TimeProduction <dbl> 0, NA, NA, 0, 15, 10, 30, …
## $ TimeVisualizing <dbl> 0, NA, NA, 10, 15, 10, 20,…
## $ TimeFindingInsights <dbl> 0, NA, NA, 20, 20, 0, 10, …
## $ TimeOtherSelect <dbl> 0, NA, NA, 0, 0, 0, 0, NA,…
## $ AlgorithmUnderstandingLevel <chr> "Enough to explain the alg…
## $ WorkChallengesSelect <chr> "Company politics / Lack o…
## $ WorkChallengeFrequencyPolitics <chr> "Rarely", NA, NA, "Often",…
## $ WorkChallengeFrequencyUnusedResults <chr> NA, NA, NA, "Often", "Some…
## $ WorkChallengeFrequencyUnusefulInstrumenting <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyDeployment <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyDirtyData <chr> NA, NA, NA, "Often", NA, "…
## $ WorkChallengeFrequencyExplaining <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyPass <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkChallengeFrequencyIntegration <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyTalent <chr> NA, NA, NA, "Often", "Some…
## $ WorkChallengeFrequencyDataFunds <chr> NA, NA, NA, "Often", "Some…
## $ WorkChallengeFrequencyDomainExpertise <chr> NA, NA, NA, "Most of the t…
## $ WorkChallengeFrequencyML <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyTools <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyExpectations <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyITCoordination <chr> NA, NA, NA, NA, "Sometimes…
## $ WorkChallengeFrequencyHiringFunds <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyPrivacy <chr> "Often", NA, NA, "Often", …
## $ WorkChallengeFrequencyScaling <chr> "Most of the time", NA, NA…
## $ WorkChallengeFrequencyEnvironments <chr> NA, NA, NA, "Often", "Some…
## $ WorkChallengeFrequencyClarity <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyDataAccess <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyOtherSelect <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkDataVisualizations <chr> "26-50% of projects", NA, …
## $ WorkInternalVsExternalTools <chr> "Do not know", NA, NA, "En…
## $ WorkMLTeamSeatSelect <chr> "Standalone Team", NA, NA,…
## $ WorkDatasets <chr> NA, NA, NA, "Electricity d…
## $ WorkDatasetsChallenge <chr> NA, NA, NA, "Everything is…
## $ WorkDataStorage <chr> "Document-oriented (e.g. M…
## $ WorkDataSharing <chr> "Company Developed Platfor…
## $ WorkDataSourcing <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkCodeSharing <chr> "Mercurial,Subversion,Othe…
## $ RemoteWork <chr> "Always", NA, NA, NA, "Rar…
## $ CompensationAmount <dbl> NA, NA, NA, 250000, NA, NA…
## $ CompensationCurrency <chr> NA, NA, NA, "USD", NA, NA,…
## $ SalaryChange <chr> "I am not currently employ…
## $ JobSatisfaction <chr> "5", NA, NA, "10 - Highly …
## $ JobSearchResource <chr> NA, NA, "Asking friends, f…
## $ JobHuntTime <chr> NA, NA, "1-2", NA, NA, NA,…
## $ JobFactorLearning <chr> NA, NA, "Very Important", …
## $ JobFactorSalary <chr> NA, NA, "Very Important", …
## $ JobFactorOffice <chr> NA, NA, "Very Important", …
## $ JobFactorLanguages <chr> NA, NA, "Very Important", …
## $ JobFactorCommute <chr> NA, NA, "Very Important", …
## $ JobFactorManagement <chr> NA, NA, "Very Important", …
## $ JobFactorExperienceLevel <chr> NA, NA, "Very Important", …
## $ JobFactorDepartment <chr> NA, NA, "Very Important", …
## $ JobFactorTitle <chr> NA, NA, "Very Important", …
## $ JobFactorCompanyFunding <chr> NA, NA, "Very Important", …
## $ JobFactorImpact <chr> NA, NA, "Very Important", …
## $ JobFactorRemote <chr> NA, NA, "Very Important", …
## $ JobFactorIndustry <chr> NA, NA, "Very Important", …
## $ JobFactorLeaderReputation <chr> NA, "Somewhat important", …
## $ JobFactorDiversity <chr> NA, NA, "Very Important", …
## $ JobFactorPublishingOpportunity <chr> NA, NA, "Very Important", …
1.1 Select column names starting with Learning, Work
,
Age
, EmployerIndustry
,
CurrentJob
, MLMethod
and Formal.
Show the results using glimpse()
multipleChoiceResponses_1.1 <- multipleChoiceResponses %>%
select(starts_with("Learning"),
starts_with("Work"),
starts_with("Age"),
starts_with("EmployerIndustry"),
starts_with("CurrentJob"),
starts_with("MLMethod"),
starts_with("Formal")) %>%
glimpse()
## Rows: 16,716
## Columns: 155
## $ LearningDataScience <chr> NA, NA, NA, NA, NA, NA, NA…
## $ LearningPlatformSelect <chr> "College/University,Confer…
## $ LearningPlatformUsefulnessArxiv <chr> NA, NA, "Very useful", NA,…
## $ LearningPlatformUsefulnessBlogs <chr> NA, NA, NA, "Very useful",…
## $ LearningPlatformUsefulnessCollege <chr> NA, NA, "Somewhat useful",…
## $ LearningPlatformUsefulnessCompany <chr> NA, NA, NA, NA, NA, NA, NA…
## $ LearningPlatformUsefulnessConferences <chr> "Very useful", NA, NA, "Ve…
## $ LearningPlatformUsefulnessFriends <chr> NA, NA, NA, "Very useful",…
## $ LearningPlatformUsefulnessKaggle <chr> NA, "Somewhat useful", "So…
## $ LearningPlatformUsefulnessNewsletters <chr> NA, NA, NA, NA, NA, NA, NA…
## $ LearningPlatformUsefulnessCommunities <chr> NA, NA, NA, NA, NA, NA, NA…
## $ LearningPlatformUsefulnessDocumentation <chr> NA, NA, NA, "Very useful",…
## $ LearningPlatformUsefulnessCourses <chr> NA, NA, "Very useful", "Ve…
## $ LearningPlatformUsefulnessProjects <chr> NA, NA, NA, "Very useful",…
## $ LearningPlatformUsefulnessPodcasts <chr> "Very useful", NA, NA, NA,…
## $ LearningPlatformUsefulnessSO <chr> NA, NA, NA, NA, NA, "Very …
## $ LearningPlatformUsefulnessTextbook <chr> NA, NA, NA, NA, "Somewhat …
## $ LearningPlatformUsefulnessTradeBook <chr> "Somewhat useful", NA, NA,…
## $ LearningPlatformUsefulnessTutoring <chr> NA, NA, NA, NA, NA, NA, NA…
## $ LearningPlatformUsefulnessYouTube <chr> NA, NA, "Very useful", NA,…
## $ LearningDataScienceTime <chr> NA, "1-2 years", "1-2 year…
## $ LearningCategorySelftTaught <dbl> 0, 10, 20, 30, 60, 45, 40,…
## $ LearningCategoryOnlineCourses <dbl> 0, 30, 50, 0, 5, 25, 0, 40…
## $ LearningCategoryWork <dbl> 100, 0, 0, 40, 5, 20, 0, 0…
## $ LearningCategoryUniversity <dbl> 0, 30, 30, 30, 30, 0, 50, …
## $ LearningCategoryKaggle <dbl> 0, 30, 0, 0, 0, 10, 10, 10…
## $ LearningCategoryOther <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ WorkHardwareSelect <chr> "Gaming Laptop (Laptop + C…
## $ WorkDataTypeSelect <chr> "Text data,Relational data…
## $ WorkProductionFrequency <chr> "Rarely", NA, NA, "Always"…
## $ WorkDatasetSize <chr> "10GB", NA, NA, "1GB", "10…
## $ WorkAlgorithmsSelect <chr> "Neural Networks,Random Fo…
## $ WorkToolsSelect <chr> "Amazon Web services,Oracl…
## $ WorkToolsFrequencyAmazonML <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyAWS <chr> "Rarely", NA, NA, "Often",…
## $ WorkToolsFrequencyAngoss <lgl> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyC <chr> NA, NA, NA, NA, "Most of t…
## $ WorkToolsFrequencyCloudera <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyDataRobot <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyFlume <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyGCP <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyHadoop <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyIBMCognos <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyIBMSPSSModeler <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyIBMSPSSStatistics <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyIBMWatson <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyImpala <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyJava <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyJulia <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyJupyter <chr> NA, NA, NA, NA, "Sometimes…
## $ WorkToolsFrequencyKNIMECommercial <lgl> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyKNIMEFree <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyMathematica <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyMATLAB <chr> NA, NA, NA, "Rarely", "Oft…
## $ WorkToolsFrequencyAzure <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyExcel <chr> NA, NA, NA, "Sometimes", N…
## $ WorkToolsFrequencyMicrosoftRServer <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyMicrosoftSQL <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyMinitab <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyNoSQL <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyOracle <chr> "Sometimes", NA, NA, NA, N…
## $ WorkToolsFrequencyOrange <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyPerl <chr> "Most of the time", NA, NA…
## $ WorkToolsFrequencyPython <chr> NA, NA, NA, "Rarely", "Som…
## $ WorkToolsFrequencyQlik <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyR <chr> NA, NA, NA, "Rarely", "Som…
## $ WorkToolsFrequencyRapidMinerCommercial <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyRapidMinerFree <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySalfrod <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySAPBusinessObjects <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySASBase <chr> NA, NA, NA, "Sometimes", N…
## $ WorkToolsFrequencySASEnterprise <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySASJMP <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencySpark <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySQL <chr> NA, NA, NA, "Often", NA, "…
## $ WorkToolsFrequencyStan <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyStatistica <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyTableau <chr> NA, NA, NA, "Rarely", NA, …
## $ WorkToolsFrequencyTensorFlow <chr> NA, NA, NA, NA, "Sometimes…
## $ WorkToolsFrequencyTIBCO <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencyUnix <chr> NA, NA, NA, NA, NA, NA, "M…
## $ WorkToolsFrequencySelect1 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkToolsFrequencySelect2 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkFrequencySelect3 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsSelect <chr> "Association Rules,Collabo…
## $ `WorkMethodsFrequencyA/B` <chr> NA, NA, NA, "Sometimes", N…
## $ WorkMethodsFrequencyAssociationRules <chr> "Rarely", NA, NA, NA, "Som…
## $ WorkMethodsFrequencyBayesian <chr> NA, NA, NA, "Sometimes", "…
## $ WorkMethodsFrequencyCNNs <chr> NA, NA, NA, NA, "Most of t…
## $ WorkMethodsFrequencyCollaborativeFiltering <chr> "Often", NA, NA, NA, "Some…
## $ `WorkMethodsFrequencyCross-Validation` <chr> NA, NA, NA, NA, NA, NA, "S…
## $ WorkMethodsFrequencyDataVisualization <chr> NA, NA, NA, "Sometimes", "…
## $ WorkMethodsFrequencyDecisionTrees <chr> NA, NA, NA, "Often", "Some…
## $ WorkMethodsFrequencyEnsembleMethods <chr> NA, NA, NA, "Sometimes", "…
## $ WorkMethodsFrequencyEvolutionaryApproaches <chr> NA, NA, NA, NA, "Sometimes…
## $ WorkMethodsFrequencyGANs <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyGBM <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyHMMs <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyKNN <chr> NA, NA, NA, NA, "Most of t…
## $ WorkMethodsFrequencyLiftAnalysis <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyLogisticRegression <chr> NA, NA, NA, "Sometimes", "…
## $ WorkMethodsFrequencyMLN <chr> NA, NA, NA, "Often", NA, N…
## $ WorkMethodsFrequencyNaiveBayes <chr> NA, NA, NA, "Sometimes", "…
## $ WorkMethodsFrequencyNLP <chr> NA, NA, NA, NA, NA, NA, "M…
## $ WorkMethodsFrequencyNeuralNetworks <chr> "Sometimes", NA, NA, "Some…
## $ WorkMethodsFrequencyPCA <chr> "Often", NA, NA, NA, "Some…
## $ WorkMethodsFrequencyPrescriptiveModeling <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyRandomForests <chr> "Most of the time", NA, NA…
## $ WorkMethodsFrequencyRecommenderSystems <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencyRNNs <chr> NA, NA, NA, NA, "Sometimes…
## $ WorkMethodsFrequencySegmentation <chr> NA, NA, NA, NA, "Often", N…
## $ WorkMethodsFrequencySimulation <chr> NA, NA, NA, "Often", NA, N…
## $ WorkMethodsFrequencySVMs <chr> NA, NA, NA, NA, "Most of t…
## $ WorkMethodsFrequencyTextAnalysis <chr> NA, NA, NA, NA, NA, NA, "M…
## $ WorkMethodsFrequencyTimeSeriesAnalysis <chr> NA, NA, NA, "Often", "Some…
## $ WorkMethodsFrequencySelect1 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencySelect2 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkMethodsFrequencySelect3 <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkChallengesSelect <chr> "Company politics / Lack o…
## $ WorkChallengeFrequencyPolitics <chr> "Rarely", NA, NA, "Often",…
## $ WorkChallengeFrequencyUnusedResults <chr> NA, NA, NA, "Often", "Some…
## $ WorkChallengeFrequencyUnusefulInstrumenting <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyDeployment <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyDirtyData <chr> NA, NA, NA, "Often", NA, "…
## $ WorkChallengeFrequencyExplaining <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyPass <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkChallengeFrequencyIntegration <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyTalent <chr> NA, NA, NA, "Often", "Some…
## $ WorkChallengeFrequencyDataFunds <chr> NA, NA, NA, "Often", "Some…
## $ WorkChallengeFrequencyDomainExpertise <chr> NA, NA, NA, "Most of the t…
## $ WorkChallengeFrequencyML <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyTools <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyExpectations <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyITCoordination <chr> NA, NA, NA, NA, "Sometimes…
## $ WorkChallengeFrequencyHiringFunds <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyPrivacy <chr> "Often", NA, NA, "Often", …
## $ WorkChallengeFrequencyScaling <chr> "Most of the time", NA, NA…
## $ WorkChallengeFrequencyEnvironments <chr> NA, NA, NA, "Often", "Some…
## $ WorkChallengeFrequencyClarity <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyDataAccess <chr> NA, NA, NA, "Often", NA, N…
## $ WorkChallengeFrequencyOtherSelect <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkDataVisualizations <chr> "26-50% of projects", NA, …
## $ WorkInternalVsExternalTools <chr> "Do not know", NA, NA, "En…
## $ WorkMLTeamSeatSelect <chr> "Standalone Team", NA, NA,…
## $ WorkDatasets <chr> NA, NA, NA, "Electricity d…
## $ WorkDatasetsChallenge <chr> NA, NA, NA, "Everything is…
## $ WorkDataStorage <chr> "Document-oriented (e.g. M…
## $ WorkDataSharing <chr> "Company Developed Platfor…
## $ WorkDataSourcing <chr> NA, NA, NA, NA, NA, NA, NA…
## $ WorkCodeSharing <chr> "Mercurial,Subversion,Othe…
## $ Age <dbl> NA, 30, 28, 56, 38, 46, 35…
## $ EmployerIndustry <chr> "Internet-based", NA, NA, …
## $ CurrentJobTitleSelect <chr> "DBA/Database Engineer", N…
## $ MLMethodNextYearSelect <chr> "Random Forests", "Random …
## $ FormalEducation <chr> "Bachelor's degree", "Mast…
1.2 Change all the character columns to factors. Find the number of
levels for each factor. Hint: nlevels()
.
multipleChoiceResponses_fct <- multipleChoiceResponses_1.1 %>%
mutate(across(where(is.character),as.factor))
number_of_levels <- multipleChoiceResponses_fct %>%
summarise(across(everything(),nlevels)) %>%
gather(key = "variable", value = "num_levels") %>%
glimpse()
## Rows: 155
## Columns: 2
## $ variable <chr> "LearningDataScience", "LearningPlatformSelect", "LearningP…
## $ num_levels <int> 3, 5362, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
1.3 Select the 5 rows with the highest number of levels
number_of_levels %>% arrange(desc(num_levels)) %>% slice_head(n = 5)
## # A tibble: 5 × 2
## variable num_levels
## <chr> <int>
## 1 WorkMethodsSelect 6190
## 2 LearningPlatformSelect 5362
## 3 WorkToolsSelect 5248
## 4 WorkChallengesSelect 4287
## 5 WorkDatasetsChallenge 2220
1.4 Filter for where the column called variable equals
CurrentJobTitleSelect.
Show its levels and number of
levels
nlevels(multipleChoiceResponses_fct$CurrentJobTitleSelect)
## [1] 16
levels(multipleChoiceResponses_fct$CurrentJobTitleSelect)
## [1] "Business Analyst"
## [2] "Computer Scientist"
## [3] "Data Analyst"
## [4] "Data Miner"
## [5] "Data Scientist"
## [6] "DBA/Database Engineer"
## [7] "Engineer"
## [8] "Machine Learning Engineer"
## [9] "Operations Research Practitioner"
## [10] "Other"
## [11] "Predictive Modeler"
## [12] "Programmer"
## [13] "Researcher"
## [14] "Scientist/Researcher"
## [15] "Software Developer/Software Engineer"
## [16] "Statistician"