Data Source and Description

Kaggle conducted an industry-wide survey to establish a comprehensive view of the state of data science and machine learning. The survey received over 16,000 responses and we learned a ton about who is working with data, what’s happening at the cutting edge of machine learning across industries, and how new data scientists can best break into the field.

You can dowload data here.

Most Used ML Models and Best Tools Next Year

R codes for this graph:

rm(list = ls())
library(tidyverse)

df_survey <- read.csv("E:\\R_project\\Kaggle\\ds_survey\\multipleChoiceResponses.csv", stringsAsFactors = FALSE)


df_survey %>% 
  group_by(MLToolNextYearSelect) %>% 
  count() %>% 
  ungroup() %>% 
  arrange(-n) %>% 
  slice(-1) %>% 
  filter(!str_detect(MLToolNextYearSelect, "I don")) %>% 
  slice(1:14) %>% 
  rename(tool = MLToolNextYearSelect) %>% 
  mutate(tool = case_when(str_detect(tool, "Spa") ~ "Spark", 
                          str_detect(tool, "Jup") ~ "Jupyter Notebooks", 
                          str_detect(tool, "Web") ~ "Amazon Web Services", 
                          str_detect(tool, "IBM") ~ "IBM Watson Analytics", 
                          str_detect(tool, "Micro") ~ "Microsoft Azure ML", TRUE ~ tool)) -> df2


df_survey %>% 
  group_by(MLTechniquesSelect) %>% 
  count() %>% 
  ungroup() %>% 
  arrange(-n) %>% 
  pull(MLTechniquesSelect) %>% 
  str_split("\\,", simplify = TRUE) %>% 
  as.vector() %>% 
  str_split("-", simplify = TRUE) %>% 
  as.vector() %>% 
  str_squish() %>% 
  table() %>% 
  as.data.frame() -> df3

names(df3) <- c("Model", "N")


df3 %>% 
  arrange(-N) %>% 
  slice(-1) %>% 
  filter(!str_detect(Model, "colon")) %>% 
  filter(!str_detect(Model, "Other")) %>% 
  mutate(Model = as.character(Model)) %>% 
  mutate(Model = case_when(str_detect(Model, "Gradient") ~ "Gradient Boosting Machine", 
                           str_detect(Model, "Support") ~ "Support Vector Machines", 
                           str_detect(Model, "CNN") ~ "Convolutional Neural Networks", 
                           str_detect(Model, "RNN") ~ "Recurrent Neural Networks", 
                           str_detect(Model, "GAN") ~ "Generative Adversary Networks", 
                           TRUE ~ Model)) %>% 
  group_by(Model) %>% 
  summarise(n = sum(N)) %>% 
  ungroup() %>% 
  arrange(-n) -> df3


library(hrbrthemes)
library(extrafont)
library(gridExtra)
my_colors <- c("#3E606F")
my_font <- "Roboto Condensed"


my_bar <- function(df_selected) {
  
  names(df_selected) <- c("Model", "value")
  df_selected %>% 
    arrange(value) %>%
    mutate(Model = factor(Model, levels = Model)) -> m
  
  m %>% 
    ggplot(aes(Model, value)) +
    geom_col(fill = my_colors, color = my_colors, width = 0.8) +
    coord_flip() +
    geom_text(data = m, aes(label = value), hjust = 1.1, color = "white", size = 6, family = my_font) + 
    theme_ft_rc() + 
    theme(panel.grid = element_blank()) + 
    theme(axis.text.x = element_blank()) + 
    theme(axis.text.y = element_text(color = "white", size = 14, family = my_font)) + 
    theme(plot.title = element_text(size = 20)) + 
    scale_y_discrete(expand = c(0.01, 0)) + 
    labs(x = NULL, y = NULL)
}


my_bar(df3) + 
  labs(title = "Figure 1: Most Used ML Models") -> p1

my_bar(df2) + 
  labs(title = "Figure 2: Best Tools Next Year") -> p2


grid.arrange(p1, p2, nrow = 1)
LS0tDQp0aXRsZTogIkthZ2dsZSBEYXRhIFNjaWVuY2UgU3VydmV5IChNb3N0IFVzZWQgTUwgTW9kZWxzKSINCmF1dGhvcjogIk5ndXllbiBDaGkgRHVuZyINCnN1YnRpdGxlOiAiRGFpbHkgR3JhcGggU2VyaWVzIg0Kb3V0cHV0Og0KICBodG1sX2RvY3VtZW50Og0KICAgIGNvZGVfZG93bmxvYWQ6IHllcw0KICAgICMgY29kZV9mb2xkaW5nOiBoaWRlDQogICAgaGlnaGxpZ2h0OiB6ZW5idXJuDQogICAgdGhlbWU6IGZsYXRseQ0KICAgIHRvYzogeWVzDQogICAgdG9jX2Zsb2F0OiB5ZXMNCiAgd29yZF9kb2N1bWVudDoNCiAgICB0b2M6IHllcw0KLS0tDQoNCmBgYHtyIHNldHVwLGluY2x1ZGU9RkFMU0V9DQprbml0cjo6b3B0c19jaHVuayRzZXQoZWNobyA9IFRSVUUsIHdhcm5pbmcgPSBGQUxTRSwgbWVzc2FnZSA9IEZBTFNFLCBmaWcucmV0aW5hPTIpDQpgYGANCg0KIyBEYXRhIFNvdXJjZSBhbmQgRGVzY3JpcHRpb24NCg0KS2FnZ2xlIGNvbmR1Y3RlZCBhbiBpbmR1c3RyeS13aWRlIHN1cnZleSB0byBlc3RhYmxpc2ggYSBjb21wcmVoZW5zaXZlIHZpZXcgb2YgdGhlIHN0YXRlIG9mIGRhdGEgc2NpZW5jZSBhbmQgbWFjaGluZSBsZWFybmluZy4gVGhlIHN1cnZleSByZWNlaXZlZCBvdmVyIDE2LDAwMCByZXNwb25zZXMgYW5kIHdlIGxlYXJuZWQgYSB0b24gYWJvdXQgd2hvIGlzIHdvcmtpbmcgd2l0aCBkYXRhLCB3aGF04oCZcyBoYXBwZW5pbmcgYXQgdGhlIGN1dHRpbmcgZWRnZSBvZiBtYWNoaW5lIGxlYXJuaW5nIGFjcm9zcyBpbmR1c3RyaWVzLCBhbmQgaG93IG5ldyBkYXRhIHNjaWVudGlzdHMgY2FuIGJlc3QgYnJlYWsgaW50byB0aGUgZmllbGQuDQoNCllvdSBjYW4gZG93bG9hZCBkYXRhIFtoZXJlXShodHRwczovL3d3dy5rYWdnbGUuY29tL2thZ2dsZS9rYWdnbGUtc3VydmV5LTIwMTcvZGF0YSkuIA0KDQojIE1vc3QgVXNlZCBNTCBNb2RlbHMgYW5kIEJlc3QgVG9vbHMgTmV4dCBZZWFyDQoNCiFbXShDOlxVc2Vyc1xaYm9va1xEZXNrdG9wXHBpY1xwNS5qcGcpDQoNClIgY29kZXMgZm9yIHRoaXMgZ3JhcGg6IA0KDQpgYGB7ciwgZXZhbD1GQUxTRX0NCg0KDQpybShsaXN0ID0gbHMoKSkNCmxpYnJhcnkodGlkeXZlcnNlKQ0KDQpkZl9zdXJ2ZXkgPC0gcmVhZC5jc3YoIkU6XFxSX3Byb2plY3RcXEthZ2dsZVxcZHNfc3VydmV5XFxtdWx0aXBsZUNob2ljZVJlc3BvbnNlcy5jc3YiLCBzdHJpbmdzQXNGYWN0b3JzID0gRkFMU0UpDQoNCg0KZGZfc3VydmV5ICU+JSANCiAgZ3JvdXBfYnkoTUxUb29sTmV4dFllYXJTZWxlY3QpICU+JSANCiAgY291bnQoKSAlPiUgDQogIHVuZ3JvdXAoKSAlPiUgDQogIGFycmFuZ2UoLW4pICU+JSANCiAgc2xpY2UoLTEpICU+JSANCiAgZmlsdGVyKCFzdHJfZGV0ZWN0KE1MVG9vbE5leHRZZWFyU2VsZWN0LCAiSSBkb24iKSkgJT4lIA0KICBzbGljZSgxOjE0KSAlPiUgDQogIHJlbmFtZSh0b29sID0gTUxUb29sTmV4dFllYXJTZWxlY3QpICU+JSANCiAgbXV0YXRlKHRvb2wgPSBjYXNlX3doZW4oc3RyX2RldGVjdCh0b29sLCAiU3BhIikgfiAiU3BhcmsiLCANCiAgICAgICAgICAgICAgICAgICAgICAgICAgc3RyX2RldGVjdCh0b29sLCAiSnVwIikgfiAiSnVweXRlciBOb3RlYm9va3MiLCANCiAgICAgICAgICAgICAgICAgICAgICAgICAgc3RyX2RldGVjdCh0b29sLCAiV2ViIikgfiAiQW1hem9uIFdlYiBTZXJ2aWNlcyIsIA0KICAgICAgICAgICAgICAgICAgICAgICAgICBzdHJfZGV0ZWN0KHRvb2wsICJJQk0iKSB+ICJJQk0gV2F0c29uIEFuYWx5dGljcyIsIA0KICAgICAgICAgICAgICAgICAgICAgICAgICBzdHJfZGV0ZWN0KHRvb2wsICJNaWNybyIpIH4gIk1pY3Jvc29mdCBBenVyZSBNTCIsIFRSVUUgfiB0b29sKSkgLT4gZGYyDQoNCg0KZGZfc3VydmV5ICU+JSANCiAgZ3JvdXBfYnkoTUxUZWNobmlxdWVzU2VsZWN0KSAlPiUgDQogIGNvdW50KCkgJT4lIA0KICB1bmdyb3VwKCkgJT4lIA0KICBhcnJhbmdlKC1uKSAlPiUgDQogIHB1bGwoTUxUZWNobmlxdWVzU2VsZWN0KSAlPiUgDQogIHN0cl9zcGxpdCgiXFwsIiwgc2ltcGxpZnkgPSBUUlVFKSAlPiUgDQogIGFzLnZlY3RvcigpICU+JSANCiAgc3RyX3NwbGl0KCItIiwgc2ltcGxpZnkgPSBUUlVFKSAlPiUgDQogIGFzLnZlY3RvcigpICU+JSANCiAgc3RyX3NxdWlzaCgpICU+JSANCiAgdGFibGUoKSAlPiUgDQogIGFzLmRhdGEuZnJhbWUoKSAtPiBkZjMNCg0KbmFtZXMoZGYzKSA8LSBjKCJNb2RlbCIsICJOIikNCg0KDQpkZjMgJT4lIA0KICBhcnJhbmdlKC1OKSAlPiUgDQogIHNsaWNlKC0xKSAlPiUgDQogIGZpbHRlcighc3RyX2RldGVjdChNb2RlbCwgImNvbG9uIikpICU+JSANCiAgZmlsdGVyKCFzdHJfZGV0ZWN0KE1vZGVsLCAiT3RoZXIiKSkgJT4lIA0KICBtdXRhdGUoTW9kZWwgPSBhcy5jaGFyYWN0ZXIoTW9kZWwpKSAlPiUgDQogIG11dGF0ZShNb2RlbCA9IGNhc2Vfd2hlbihzdHJfZGV0ZWN0KE1vZGVsLCAiR3JhZGllbnQiKSB+ICJHcmFkaWVudCBCb29zdGluZyBNYWNoaW5lIiwgDQogICAgICAgICAgICAgICAgICAgICAgICAgICBzdHJfZGV0ZWN0KE1vZGVsLCAiU3VwcG9ydCIpIH4gIlN1cHBvcnQgVmVjdG9yIE1hY2hpbmVzIiwgDQogICAgICAgICAgICAgICAgICAgICAgICAgICBzdHJfZGV0ZWN0KE1vZGVsLCAiQ05OIikgfiAiQ29udm9sdXRpb25hbCBOZXVyYWwgTmV0d29ya3MiLCANCiAgICAgICAgICAgICAgICAgICAgICAgICAgIHN0cl9kZXRlY3QoTW9kZWwsICJSTk4iKSB+ICJSZWN1cnJlbnQgTmV1cmFsIE5ldHdvcmtzIiwgDQogICAgICAgICAgICAgICAgICAgICAgICAgICBzdHJfZGV0ZWN0KE1vZGVsLCAiR0FOIikgfiAiR2VuZXJhdGl2ZSBBZHZlcnNhcnkgTmV0d29ya3MiLCANCiAgICAgICAgICAgICAgICAgICAgICAgICAgIFRSVUUgfiBNb2RlbCkpICU+JSANCiAgZ3JvdXBfYnkoTW9kZWwpICU+JSANCiAgc3VtbWFyaXNlKG4gPSBzdW0oTikpICU+JSANCiAgdW5ncm91cCgpICU+JSANCiAgYXJyYW5nZSgtbikgLT4gZGYzDQoNCg0KbGlicmFyeShocmJydGhlbWVzKQ0KbGlicmFyeShleHRyYWZvbnQpDQpsaWJyYXJ5KGdyaWRFeHRyYSkNCm15X2NvbG9ycyA8LSBjKCIjM0U2MDZGIikNCm15X2ZvbnQgPC0gIlJvYm90byBDb25kZW5zZWQiDQoNCg0KbXlfYmFyIDwtIGZ1bmN0aW9uKGRmX3NlbGVjdGVkKSB7DQogIA0KICBuYW1lcyhkZl9zZWxlY3RlZCkgPC0gYygiTW9kZWwiLCAidmFsdWUiKQ0KICBkZl9zZWxlY3RlZCAlPiUgDQogICAgYXJyYW5nZSh2YWx1ZSkgJT4lDQogICAgbXV0YXRlKE1vZGVsID0gZmFjdG9yKE1vZGVsLCBsZXZlbHMgPSBNb2RlbCkpIC0+IG0NCiAgDQogIG0gJT4lIA0KICAgIGdncGxvdChhZXMoTW9kZWwsIHZhbHVlKSkgKw0KICAgIGdlb21fY29sKGZpbGwgPSBteV9jb2xvcnMsIGNvbG9yID0gbXlfY29sb3JzLCB3aWR0aCA9IDAuOCkgKw0KICAgIGNvb3JkX2ZsaXAoKSArDQogICAgZ2VvbV90ZXh0KGRhdGEgPSBtLCBhZXMobGFiZWwgPSB2YWx1ZSksIGhqdXN0ID0gMS4xLCBjb2xvciA9ICJ3aGl0ZSIsIHNpemUgPSA2LCBmYW1pbHkgPSBteV9mb250KSArIA0KICAgIHRoZW1lX2Z0X3JjKCkgKyANCiAgICB0aGVtZShwYW5lbC5ncmlkID0gZWxlbWVudF9ibGFuaygpKSArIA0KICAgIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF9ibGFuaygpKSArIA0KICAgIHRoZW1lKGF4aXMudGV4dC55ID0gZWxlbWVudF90ZXh0KGNvbG9yID0gIndoaXRlIiwgc2l6ZSA9IDE0LCBmYW1pbHkgPSBteV9mb250KSkgKyANCiAgICB0aGVtZShwbG90LnRpdGxlID0gZWxlbWVudF90ZXh0KHNpemUgPSAyMCkpICsgDQogICAgc2NhbGVfeV9kaXNjcmV0ZShleHBhbmQgPSBjKDAuMDEsIDApKSArIA0KICAgIGxhYnMoeCA9IE5VTEwsIHkgPSBOVUxMKQ0KfQ0KDQoNCm15X2JhcihkZjMpICsgDQogIGxhYnModGl0bGUgPSAiRmlndXJlIDE6IE1vc3QgVXNlZCBNTCBNb2RlbHMiKSAtPiBwMQ0KDQpteV9iYXIoZGYyKSArIA0KICBsYWJzKHRpdGxlID0gIkZpZ3VyZSAyOiBCZXN0IFRvb2xzIE5leHQgWWVhciIpIC0+IHAyDQoNCg0KZ3JpZC5hcnJhbmdlKHAxLCBwMiwgbnJvdyA9IDEpDQpgYGANCg0K