Data Source and Description

You can dowload data here.

R codes for this graph:

library(tidyverse)

df_survey <- read.csv("/Users/jennynguyen/Documents/r-programming/kaggle-survey-2017/multipleChoiceResponses.csv", stringsAsFactors = FALSE)

tool_used <- c("IBMSPSSModeler", "IBMSPSSStatistics", "C", "MATLAB", "Statistica", 
               "Java", "Julia", "Excel", "MicrosoftSQL", "TensorFlow", "Spark", 
               "NoSQL", "Oracle", "Perl", "Python", "R", "DataRobot", "NoSQL", 
               "RapidMinerCommercial", "RapidMinerFree", "SASBase", "Mathematica", 
               "SASEnterprise", "SASJMP", "SQL", "Tableau")

df_survey %>% 
  select(paste0("WorkToolsFrequency", tool_used)) %>% 
  gather(tool, response) %>% 
  group_by(tool, response) %>% 
  count() %>% 
  ungroup() %>% 
  filter(response != "") %>% 
  mutate(tool = str_sub(tool, 19, str_count(tool))) %>% 
  group_by(tool, response) %>% 
  summarise(n = sum(n)) %>% 
  mutate(response = factor(response, levels = c("Rarely", "Sometimes", "Often", "Most of the time"))) -> df1


library(extrafont)
my_colors <- c("#8C3F4D", "#3E606F")
my_font <- "Roboto Condensed"

library(ggplot2)
df1 %>% 
  ggplot(aes(response, n)) + 
  geom_col(fill = my_colors[2]) +
  geom_col(data = df1 %>% filter(tool == "Python"), aes(response, n), fill = my_colors[1]) + 
  geom_col(data = df1 %>% filter(tool == "R"), aes(response, n), fill = my_colors[1]) + 
  geom_col(data = df1 %>% filter(tool == "SQL"), aes(response, n), fill = my_colors[1]) + 
  facet_wrap(~ tool, strip.position = "top") + 
  coord_flip() + 
  geom_text(data = df1 %>% filter(tool != "Python"), aes(response, n, label = n), hjust = -0.1, family = my_font) + 
  geom_text(data = df1 %>% filter(tool == "Python", response == "Most of the time"), aes(response, n, label = n), hjust = 1.2, color = "white") + 
  geom_text(data = df1 %>% filter(tool == "Python", response != "Most of the time"), aes(response, n, label = n), hjust = -0.1) +
  theme(panel.grid = element_blank()) + 
  theme(axis.text.x = element_blank()) + 
  theme(plot.margin = unit(c(1.2, 1.2, 1.2, 1.2), "cm")) + 
  scale_y_continuous(expand = c(0, 0)) + 
  labs(x = NULL, y = NULL, 
       title = "Figure 1: Most Common Tools for Data Science", 
       caption = "Data Source: Kaggle Data Science Survey")

LS0tCnRpdGxlOiAiS2FnZ2xlIERhdGEgU2NpZW5jZSBTdXJ2ZXkgKE1vc3QgY29tbW9uIHRvb2xzIGZvciBEYXRhIFNjaWVuY2UpIgphdXRob3I6ICItSmVubi0iCnN1YnRpdGxlOiAiRGFpbHkgR3JhcGggU2VyaWVzIgpvdXRwdXQ6CiAgaHRtbF9kb2N1bWVudDoKICAgIGNvZGVfZG93bmxvYWQ6IHllcwogICAgIyBjb2RlX2ZvbGRpbmc6IGhpZGUKICAgIGhpZ2hsaWdodDogemVuYnVybgogICAgdGhlbWU6IGZsYXRseQogICAgdG9jOiB5ZXMKICAgIHRvY19mbG9hdDogeWVzCiAgd29yZF9kb2N1bWVudDoKICAgIHRvYzogeWVzCi0tLQoKYGBge3Igc2V0dXAsaW5jbHVkZT1GQUxTRX0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFLCB3YXJuaW5nID0gRkFMU0UsIG1lc3NhZ2UgPSBGQUxTRSwgZmlnLnJldGluYT0yKQpgYGAKCiMgRGF0YSBTb3VyY2UgYW5kIERlc2NyaXB0aW9uCgpZb3UgY2FuIGRvd2xvYWQgZGF0YSBbaGVyZV0oaHR0cHM6Ly93d3cua2FnZ2xlLmNvbS9rYWdnbGUva2FnZ2xlLXN1cnZleS0yMDE3L2RhdGEpLiAKCgpSIGNvZGVzIGZvciB0aGlzIGdyYXBoOiAKCmBgYHtyIH0KCgoKbGlicmFyeSh0aWR5dmVyc2UpCgpkZl9zdXJ2ZXkgPC0gcmVhZC5jc3YoIi9Vc2Vycy9qZW5ueW5ndXllbi9Eb2N1bWVudHMvci1wcm9ncmFtbWluZy9rYWdnbGUtc3VydmV5LTIwMTcvbXVsdGlwbGVDaG9pY2VSZXNwb25zZXMuY3N2Iiwgc3RyaW5nc0FzRmFjdG9ycyA9IEZBTFNFKQoKdG9vbF91c2VkIDwtIGMoIklCTVNQU1NNb2RlbGVyIiwgIklCTVNQU1NTdGF0aXN0aWNzIiwgIkMiLCAiTUFUTEFCIiwgIlN0YXRpc3RpY2EiLCAKICAgICAgICAgICAgICAgIkphdmEiLCAiSnVsaWEiLCAiRXhjZWwiLCAiTWljcm9zb2Z0U1FMIiwgIlRlbnNvckZsb3ciLCAiU3BhcmsiLCAKICAgICAgICAgICAgICAgIk5vU1FMIiwgIk9yYWNsZSIsICJQZXJsIiwgIlB5dGhvbiIsICJSIiwgIkRhdGFSb2JvdCIsICJOb1NRTCIsIAogICAgICAgICAgICAgICAiUmFwaWRNaW5lckNvbW1lcmNpYWwiLCAiUmFwaWRNaW5lckZyZWUiLCAiU0FTQmFzZSIsICJNYXRoZW1hdGljYSIsIAogICAgICAgICAgICAgICAiU0FTRW50ZXJwcmlzZSIsICJTQVNKTVAiLCAiU1FMIiwgIlRhYmxlYXUiKQoKZGZfc3VydmV5ICU+JSAKICBzZWxlY3QocGFzdGUwKCJXb3JrVG9vbHNGcmVxdWVuY3kiLCB0b29sX3VzZWQpKSAlPiUgCiAgZ2F0aGVyKHRvb2wsIHJlc3BvbnNlKSAlPiUgCiAgZ3JvdXBfYnkodG9vbCwgcmVzcG9uc2UpICU+JSAKICBjb3VudCgpICU+JSAKICB1bmdyb3VwKCkgJT4lIAogIGZpbHRlcihyZXNwb25zZSAhPSAiIikgJT4lIAogIG11dGF0ZSh0b29sID0gc3RyX3N1Yih0b29sLCAxOSwgc3RyX2NvdW50KHRvb2wpKSkgJT4lIAogIGdyb3VwX2J5KHRvb2wsIHJlc3BvbnNlKSAlPiUgCiAgc3VtbWFyaXNlKG4gPSBzdW0obikpICU+JSAKICBtdXRhdGUocmVzcG9uc2UgPSBmYWN0b3IocmVzcG9uc2UsIGxldmVscyA9IGMoIlJhcmVseSIsICJTb21ldGltZXMiLCAiT2Z0ZW4iLCAiTW9zdCBvZiB0aGUgdGltZSIpKSkgLT4gZGYxCgoKbGlicmFyeShleHRyYWZvbnQpCm15X2NvbG9ycyA8LSBjKCIjOEMzRjREIiwgIiMzRTYwNkYiKQpteV9mb250IDwtICJSb2JvdG8gQ29uZGVuc2VkIgoKbGlicmFyeShnZ3Bsb3QyKQpkZjEgJT4lIAogIGdncGxvdChhZXMocmVzcG9uc2UsIG4pKSArIAogIGdlb21fY29sKGZpbGwgPSBteV9jb2xvcnNbMl0pICsKICBnZW9tX2NvbChkYXRhID0gZGYxICU+JSBmaWx0ZXIodG9vbCA9PSAiUHl0aG9uIiksIGFlcyhyZXNwb25zZSwgbiksIGZpbGwgPSBteV9jb2xvcnNbMV0pICsgCiAgZ2VvbV9jb2woZGF0YSA9IGRmMSAlPiUgZmlsdGVyKHRvb2wgPT0gIlIiKSwgYWVzKHJlc3BvbnNlLCBuKSwgZmlsbCA9IG15X2NvbG9yc1sxXSkgKyAKICBnZW9tX2NvbChkYXRhID0gZGYxICU+JSBmaWx0ZXIodG9vbCA9PSAiU1FMIiksIGFlcyhyZXNwb25zZSwgbiksIGZpbGwgPSBteV9jb2xvcnNbMV0pICsgCiAgZmFjZXRfd3JhcCh+IHRvb2wsIHN0cmlwLnBvc2l0aW9uID0gInRvcCIpICsgCiAgY29vcmRfZmxpcCgpICsgCiAgZ2VvbV90ZXh0KGRhdGEgPSBkZjEgJT4lIGZpbHRlcih0b29sICE9ICJQeXRob24iKSwgYWVzKHJlc3BvbnNlLCBuLCBsYWJlbCA9IG4pLCBoanVzdCA9IC0wLjEsIGZhbWlseSA9IG15X2ZvbnQpICsgCiAgZ2VvbV90ZXh0KGRhdGEgPSBkZjEgJT4lIGZpbHRlcih0b29sID09ICJQeXRob24iLCByZXNwb25zZSA9PSAiTW9zdCBvZiB0aGUgdGltZSIpLCBhZXMocmVzcG9uc2UsIG4sIGxhYmVsID0gbiksIGhqdXN0ID0gMS4yLCBjb2xvciA9ICJ3aGl0ZSIpICsgCiAgZ2VvbV90ZXh0KGRhdGEgPSBkZjEgJT4lIGZpbHRlcih0b29sID09ICJQeXRob24iLCByZXNwb25zZSAhPSAiTW9zdCBvZiB0aGUgdGltZSIpLCBhZXMocmVzcG9uc2UsIG4sIGxhYmVsID0gbiksIGhqdXN0ID0gLTAuMSkgKwogIHRoZW1lKHBhbmVsLmdyaWQgPSBlbGVtZW50X2JsYW5rKCkpICsgCiAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X2JsYW5rKCkpICsgCiAgdGhlbWUocGxvdC5tYXJnaW4gPSB1bml0KGMoMS4yLCAxLjIsIDEuMiwgMS4yKSwgImNtIikpICsgCiAgc2NhbGVfeV9jb250aW51b3VzKGV4cGFuZCA9IGMoMCwgMCkpICsgCiAgbGFicyh4ID0gTlVMTCwgeSA9IE5VTEwsIAogICAgICAgdGl0bGUgPSAiRmlndXJlIDE6IE1vc3QgQ29tbW9uIFRvb2xzIGZvciBEYXRhIFNjaWVuY2UiLCAKICAgICAgIGNhcHRpb24gPSAiRGF0YSBTb3VyY2U6IEthZ2dsZSBEYXRhIFNjaWVuY2UgU3VydmV5IikKCmBgYAoKCg==