linkMC<-"https://raw.githubusercontent.com/betsyrosalen/DATA_607_Project_3/master/project3_master/rawdata/multipleChoiceResponses.csv"
#importing MC items
MC<-read_csv (linkMC)
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   Age = col_integer(),
##   LearningCategorySelftTaught = col_integer(),
##   LearningCategoryOnlineCourses = col_integer(),
##   LearningCategoryWork = col_integer(),
##   LearningCategoryUniversity = col_double(),
##   LearningCategoryKaggle = col_double(),
##   LearningCategoryOther = col_integer(),
##   TimeGatheringData = col_integer(),
##   TimeModelBuilding = col_integer(),
##   TimeProduction = col_integer(),
##   TimeVisualizing = col_integer(),
##   TimeFindingInsights = col_integer(),
##   TimeOtherSelect = col_integer()
## )
## See spec(...) for full column specifications.
# removing NAs as they are not meaningful
subset <- MC %>%
        filter(!is.na(FormalEducation), !is.na(MLMethodNextYearSelect)) %>%
        select(FormalEducation, MLMethodNextYearSelect)
First we plot the distribution of formal education in the dataset
subset %>%
        ggplot() +
        geom_bar(mapping = aes(x = FormalEducation, fill = FormalEducation), show.legend = FALSE) +
        coord_flip()

The data set predominantly contains candidates with Master's degree.   
Now let's look at the different ML/DS methods in the dataset
unique(subset$MLMethodNextYearSelect)
##  [1] "Random Forests"                             
##  [2] "Deep learning"                              
##  [3] "Neural Nets"                                
##  [4] "Text Mining"                                
##  [5] "Genetic & Evolutionary Algorithms"          
##  [6] "Link Analysis"                              
##  [7] "Rule Induction"                             
##  [8] "Regression"                                 
##  [9] "Proprietary Algorithms"                     
## [10] "I don't plan on learning a new ML/DS method"
## [11] "Ensemble Methods (e.g. boosting, bagging)"  
## [12] "Factor Analysis"                            
## [13] "Social Network Analysis"                    
## [14] "Monte Carlo Methods"                        
## [15] "Time Series Analysis"                       
## [16] "Other"                                      
## [17] "Bayesian Methods"                           
## [18] "Survival Analysis"                          
## [19] "MARS"                                       
## [20] "Anomaly Detection"                          
## [21] "Cluster Analysis"                           
## [22] "Decision Trees"                             
## [23] "Association Rules"                          
## [24] "Uplift Modeling"                            
## [25] "Support Vector Machines (SVM)"
Now we can plot the distribution of ML/DS methods in the formal education bar chart
subset %>%
ggplot() + 
        geom_bar(mapping = aes(x = FormalEducation, 
                               fill = MLMethodNextYearSelect), 
                 position = "fill") +
        coord_flip() + 
        theme(legend.position="bottom")

subset %>%
ggplot() + 
        geom_bar(mapping = aes(x = FormalEducation, fill = MLMethodNextYearSelect), position = "dodge") +
        coord_flip() + 
        theme(legend.position="bottom")

algo <- c("Deep learning")
subset %>%
        group_by(FormalEducation, MLMethodNextYearSelect) %>%
        summarise(n = n()) %>%
        mutate(freq = n / sum(n)) %>%
        filter(MLMethodNextYearSelect %in% algo )
## # A tibble: 7 x 4
## # Groups: FormalEducation [7]
##   FormalEducation                              MLMethodNextYe…     n  freq
##   <chr>                                        <chr>           <int> <dbl>
## 1 Bachelor's degree                            Deep learning    1339 0.401
## 2 Doctoral degree                              Deep learning     764 0.436
## 3 I did not complete any formal education pas… Deep learning      68 0.386
## 4 I prefer not to answer                       Deep learning      28 0.500
## 5 Master's degree                              Deep learning    1825 0.397
## 6 Professional degree                          Deep learning     118 0.377
## 7 Some college/university study without earni… Deep learning     212 0.375