Data Source and Description

Kaggle conducted an industry-wide survey to establish a comprehensive view of the state of data science and machine learning. The survey received over 16,000 responses and we learned a ton about who is working with data, what’s happening at the cutting edge of machine learning across industries, and how new data scientists can best break into the field.

You can dowload data here.

Hardware Using for Data Science Projects

R codes for this graph:

rm(list = ls())
library(tidyverse)

df_survey <- read.csv("E:\\R_project\\Kaggle\\ds_survey\\multipleChoiceResponses.csv", stringsAsFactors = FALSE)
df_survey %>% 
  select(HardwarePersonalProjectsSelect, WorkHardwareSelect, WorkDatasetSize, MajorSelect, WorkDataTypeSelect, FirstTrainingSelect) -> df_profile


df_profile$WorkHardwareSelect %>% 
  str_split("\\,", simplify = TRUE) %>% 
  as.vector() %>% 
  str_split(" or ", simplify = TRUE) %>% 
  as.vector() %>% 
  str_squish() %>% 
  table() %>% 
  as.data.frame() -> df_work_hard

names(df_work_hard) <- c("w_hardware", "n")  

df_work_hard %>% 
  arrange(-n) %>% 
  slice(-1, -13) %>% 
  mutate(phan_cung = c("Laptop", "Macbook", "Workstation + Private Datacenters", "Laptop + Cloud Servers", 
                       "Laptop + Cloud Servers", "Laptop + Cloud Servers", "Workstation + Local IT Servers", 
                       "Traditional Workstation", "Workstation + Cloud Servers", "GPU accelerated Workstation", "CUDA/GPU Gaming Laptop")) %>% 
  group_by(phan_cung) %>% 
  summarise(n = sum(n)) %>% 
  arrange(-n) -> df_phancung_lamviec
  


df_profile$HardwarePersonalProjectsSelect %>% 
  str_split("\\,", simplify = TRUE) %>% 
  as.vector() %>% 
  str_split(" or ", simplify = TRUE) %>% 
  as.vector() %>% 
  str_squish() %>% 
  table() %>% 
  as.data.frame() -> df_work_hard

names(df_work_hard) <- c("w_hardware", "n") 

df_work_hard %>% 
  arrange(-n) %>% 
  slice(-1, -12) %>% 
  mutate(phan_cung = c("Macbook", "Laptop + Cloud Servers", "Laptop + Cloud Servers", "Laptop + Cloud Servers", 
                       "CUDA/GPU Gaming Laptop", "Traditional Workstation", "Laptop", "Workstation + Local IT Servers", 
                       "GPU accelerated Workstation", "Workstation + Cloud Servers")) %>% 
  group_by(phan_cung) %>% 
  summarise(n = sum(n)) %>% 
  arrange(-n) %>% 
  bind_rows(data_frame(phan_cung = "Workstation + Private Datacenters", n = 0)) -> df_phancung_canhan


library(hrbrthemes)
library(gridExtra)
my_colors <- c("#3E606F")
my_font <- "Roboto Condensed"


my_bar <- function(df_selected) {
  
  names(df_selected) <- c("Model", "value")
  df_selected %>% 
    arrange(value) %>%
    mutate(Model = factor(Model, levels = Model)) -> m
  
  m %>% 
    ggplot(aes(Model, value)) +
    geom_col(fill = my_colors, color = my_colors, width = 0.6) +
    coord_flip() +
    geom_text(data = m, aes(label = value), hjust = 1.1, color = "white", size = 6, family = my_font) + 
    theme_ft_rc() + 
    theme(panel.grid = element_blank()) + 
    theme(axis.text.x = element_blank()) + 
    theme(axis.text.y = element_text(color = "white", size = 15, family = my_font)) + 
    theme(plot.title = element_text(size = 22)) + 
    theme(plot.caption = element_text(size = 14, face = "italic")) + 
    scale_y_discrete(expand = c(0.01, 0)) + 
    labs(x = NULL, y = NULL)
}


my_bar(df_phancung_canhan) + 
  labs(title = "Hardware for Personal Project", caption = "") -> p1

my_bar(df_phancung_lamviec) + 
  labs(title = "Hardware for Company Project", caption = "Data Source: Data Source: Kaggle Data Science Survey") -> p2


grid.arrange(p1, p2, nrow = 1)
LS0tDQp0aXRsZTogIkthZ2dsZSBEYXRhIFNjaWVuY2UgU3VydmV5IChIYXJkd2FyZSBVc2luZyBmb3IgRGF0YSBTY2llbmNlIFByb2plY3RzKSINCmF1dGhvcjogIk5ndXllbiBDaGkgRHVuZyINCnN1YnRpdGxlOiAiRGFpbHkgR3JhcGggU2VyaWVzIg0Kb3V0cHV0Og0KICBodG1sX2RvY3VtZW50Og0KICAgIGNvZGVfZG93bmxvYWQ6IHllcw0KICAgICMgY29kZV9mb2xkaW5nOiBoaWRlDQogICAgaGlnaGxpZ2h0OiB6ZW5idXJuDQogICAgdGhlbWU6IGZsYXRseQ0KICAgIHRvYzogeWVzDQogICAgdG9jX2Zsb2F0OiB5ZXMNCiAgd29yZF9kb2N1bWVudDoNCiAgICB0b2M6IHllcw0KLS0tDQoNCmBgYHtyIHNldHVwLGluY2x1ZGU9RkFMU0V9DQprbml0cjo6b3B0c19jaHVuayRzZXQoZWNobyA9IFRSVUUsIHdhcm5pbmcgPSBGQUxTRSwgbWVzc2FnZSA9IEZBTFNFLCBmaWcucmV0aW5hPTIpDQpgYGANCg0KIyBEYXRhIFNvdXJjZSBhbmQgRGVzY3JpcHRpb24NCg0KS2FnZ2xlIGNvbmR1Y3RlZCBhbiBpbmR1c3RyeS13aWRlIHN1cnZleSB0byBlc3RhYmxpc2ggYSBjb21wcmVoZW5zaXZlIHZpZXcgb2YgdGhlIHN0YXRlIG9mIGRhdGEgc2NpZW5jZSBhbmQgbWFjaGluZSBsZWFybmluZy4gVGhlIHN1cnZleSByZWNlaXZlZCBvdmVyIDE2LDAwMCByZXNwb25zZXMgYW5kIHdlIGxlYXJuZWQgYSB0b24gYWJvdXQgd2hvIGlzIHdvcmtpbmcgd2l0aCBkYXRhLCB3aGF04oCZcyBoYXBwZW5pbmcgYXQgdGhlIGN1dHRpbmcgZWRnZSBvZiBtYWNoaW5lIGxlYXJuaW5nIGFjcm9zcyBpbmR1c3RyaWVzLCBhbmQgaG93IG5ldyBkYXRhIHNjaWVudGlzdHMgY2FuIGJlc3QgYnJlYWsgaW50byB0aGUgZmllbGQuDQoNCllvdSBjYW4gZG93bG9hZCBkYXRhIFtoZXJlXShodHRwczovL3d3dy5rYWdnbGUuY29tL2thZ2dsZS9rYWdnbGUtc3VydmV5LTIwMTcvZGF0YSkuIA0KDQojIEhhcmR3YXJlIFVzaW5nIGZvciBEYXRhIFNjaWVuY2UgUHJvamVjdHMNCg0KIVtdKEM6XFVzZXJzXFpib29rXERlc2t0b3BccGljXHAxNS5qcGcpDQoNClIgY29kZXMgZm9yIHRoaXMgZ3JhcGg6IA0KDQpgYGB7ciwgZXZhbD1GQUxTRX0NCg0KDQpybShsaXN0ID0gbHMoKSkNCmxpYnJhcnkodGlkeXZlcnNlKQ0KDQpkZl9zdXJ2ZXkgPC0gcmVhZC5jc3YoIkU6XFxSX3Byb2plY3RcXEthZ2dsZVxcZHNfc3VydmV5XFxtdWx0aXBsZUNob2ljZVJlc3BvbnNlcy5jc3YiLCBzdHJpbmdzQXNGYWN0b3JzID0gRkFMU0UpDQpkZl9zdXJ2ZXkgJT4lIA0KICBzZWxlY3QoSGFyZHdhcmVQZXJzb25hbFByb2plY3RzU2VsZWN0LCBXb3JrSGFyZHdhcmVTZWxlY3QsIFdvcmtEYXRhc2V0U2l6ZSwgTWFqb3JTZWxlY3QsIFdvcmtEYXRhVHlwZVNlbGVjdCwgRmlyc3RUcmFpbmluZ1NlbGVjdCkgLT4gZGZfcHJvZmlsZQ0KDQoNCmRmX3Byb2ZpbGUkV29ya0hhcmR3YXJlU2VsZWN0ICU+JSANCiAgc3RyX3NwbGl0KCJcXCwiLCBzaW1wbGlmeSA9IFRSVUUpICU+JSANCiAgYXMudmVjdG9yKCkgJT4lIA0KICBzdHJfc3BsaXQoIiBvciAiLCBzaW1wbGlmeSA9IFRSVUUpICU+JSANCiAgYXMudmVjdG9yKCkgJT4lIA0KICBzdHJfc3F1aXNoKCkgJT4lIA0KICB0YWJsZSgpICU+JSANCiAgYXMuZGF0YS5mcmFtZSgpIC0+IGRmX3dvcmtfaGFyZA0KDQpuYW1lcyhkZl93b3JrX2hhcmQpIDwtIGMoIndfaGFyZHdhcmUiLCAibiIpICANCg0KZGZfd29ya19oYXJkICU+JSANCiAgYXJyYW5nZSgtbikgJT4lIA0KICBzbGljZSgtMSwgLTEzKSAlPiUgDQogIG11dGF0ZShwaGFuX2N1bmcgPSBjKCJMYXB0b3AiLCAiTWFjYm9vayIsICJXb3Jrc3RhdGlvbiArIFByaXZhdGUgRGF0YWNlbnRlcnMiLCAiTGFwdG9wICsgQ2xvdWQgU2VydmVycyIsIA0KICAgICAgICAgICAgICAgICAgICAgICAiTGFwdG9wICsgQ2xvdWQgU2VydmVycyIsICJMYXB0b3AgKyBDbG91ZCBTZXJ2ZXJzIiwgIldvcmtzdGF0aW9uICsgTG9jYWwgSVQgU2VydmVycyIsIA0KICAgICAgICAgICAgICAgICAgICAgICAiVHJhZGl0aW9uYWwgV29ya3N0YXRpb24iLCAiV29ya3N0YXRpb24gKyBDbG91ZCBTZXJ2ZXJzIiwgIkdQVSBhY2NlbGVyYXRlZCBXb3Jrc3RhdGlvbiIsICJDVURBL0dQVSBHYW1pbmcgTGFwdG9wIikpICU+JSANCiAgZ3JvdXBfYnkocGhhbl9jdW5nKSAlPiUgDQogIHN1bW1hcmlzZShuID0gc3VtKG4pKSAlPiUgDQogIGFycmFuZ2UoLW4pIC0+IGRmX3BoYW5jdW5nX2xhbXZpZWMNCiAgDQoNCg0KZGZfcHJvZmlsZSRIYXJkd2FyZVBlcnNvbmFsUHJvamVjdHNTZWxlY3QgJT4lIA0KICBzdHJfc3BsaXQoIlxcLCIsIHNpbXBsaWZ5ID0gVFJVRSkgJT4lIA0KICBhcy52ZWN0b3IoKSAlPiUgDQogIHN0cl9zcGxpdCgiIG9yICIsIHNpbXBsaWZ5ID0gVFJVRSkgJT4lIA0KICBhcy52ZWN0b3IoKSAlPiUgDQogIHN0cl9zcXVpc2goKSAlPiUgDQogIHRhYmxlKCkgJT4lIA0KICBhcy5kYXRhLmZyYW1lKCkgLT4gZGZfd29ya19oYXJkDQoNCm5hbWVzKGRmX3dvcmtfaGFyZCkgPC0gYygid19oYXJkd2FyZSIsICJuIikgDQoNCmRmX3dvcmtfaGFyZCAlPiUgDQogIGFycmFuZ2UoLW4pICU+JSANCiAgc2xpY2UoLTEsIC0xMikgJT4lIA0KICBtdXRhdGUocGhhbl9jdW5nID0gYygiTWFjYm9vayIsICJMYXB0b3AgKyBDbG91ZCBTZXJ2ZXJzIiwgIkxhcHRvcCArIENsb3VkIFNlcnZlcnMiLCAiTGFwdG9wICsgQ2xvdWQgU2VydmVycyIsIA0KICAgICAgICAgICAgICAgICAgICAgICAiQ1VEQS9HUFUgR2FtaW5nIExhcHRvcCIsICJUcmFkaXRpb25hbCBXb3Jrc3RhdGlvbiIsICJMYXB0b3AiLCAiV29ya3N0YXRpb24gKyBMb2NhbCBJVCBTZXJ2ZXJzIiwgDQogICAgICAgICAgICAgICAgICAgICAgICJHUFUgYWNjZWxlcmF0ZWQgV29ya3N0YXRpb24iLCAiV29ya3N0YXRpb24gKyBDbG91ZCBTZXJ2ZXJzIikpICU+JSANCiAgZ3JvdXBfYnkocGhhbl9jdW5nKSAlPiUgDQogIHN1bW1hcmlzZShuID0gc3VtKG4pKSAlPiUgDQogIGFycmFuZ2UoLW4pICU+JSANCiAgYmluZF9yb3dzKGRhdGFfZnJhbWUocGhhbl9jdW5nID0gIldvcmtzdGF0aW9uICsgUHJpdmF0ZSBEYXRhY2VudGVycyIsIG4gPSAwKSkgLT4gZGZfcGhhbmN1bmdfY2FuaGFuDQoNCg0KbGlicmFyeShocmJydGhlbWVzKQ0KbGlicmFyeShncmlkRXh0cmEpDQpteV9jb2xvcnMgPC0gYygiIzNFNjA2RiIpDQpteV9mb250IDwtICJSb2JvdG8gQ29uZGVuc2VkIg0KDQoNCm15X2JhciA8LSBmdW5jdGlvbihkZl9zZWxlY3RlZCkgew0KICANCiAgbmFtZXMoZGZfc2VsZWN0ZWQpIDwtIGMoIk1vZGVsIiwgInZhbHVlIikNCiAgZGZfc2VsZWN0ZWQgJT4lIA0KICAgIGFycmFuZ2UodmFsdWUpICU+JQ0KICAgIG11dGF0ZShNb2RlbCA9IGZhY3RvcihNb2RlbCwgbGV2ZWxzID0gTW9kZWwpKSAtPiBtDQogIA0KICBtICU+JSANCiAgICBnZ3Bsb3QoYWVzKE1vZGVsLCB2YWx1ZSkpICsNCiAgICBnZW9tX2NvbChmaWxsID0gbXlfY29sb3JzLCBjb2xvciA9IG15X2NvbG9ycywgd2lkdGggPSAwLjYpICsNCiAgICBjb29yZF9mbGlwKCkgKw0KICAgIGdlb21fdGV4dChkYXRhID0gbSwgYWVzKGxhYmVsID0gdmFsdWUpLCBoanVzdCA9IDEuMSwgY29sb3IgPSAid2hpdGUiLCBzaXplID0gNiwgZmFtaWx5ID0gbXlfZm9udCkgKyANCiAgICB0aGVtZV9mdF9yYygpICsgDQogICAgdGhlbWUocGFuZWwuZ3JpZCA9IGVsZW1lbnRfYmxhbmsoKSkgKyANCiAgICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfYmxhbmsoKSkgKyANCiAgICB0aGVtZShheGlzLnRleHQueSA9IGVsZW1lbnRfdGV4dChjb2xvciA9ICJ3aGl0ZSIsIHNpemUgPSAxNSwgZmFtaWx5ID0gbXlfZm9udCkpICsgDQogICAgdGhlbWUocGxvdC50aXRsZSA9IGVsZW1lbnRfdGV4dChzaXplID0gMjIpKSArIA0KICAgIHRoZW1lKHBsb3QuY2FwdGlvbiA9IGVsZW1lbnRfdGV4dChzaXplID0gMTQsIGZhY2UgPSAiaXRhbGljIikpICsgDQogICAgc2NhbGVfeV9kaXNjcmV0ZShleHBhbmQgPSBjKDAuMDEsIDApKSArIA0KICAgIGxhYnMoeCA9IE5VTEwsIHkgPSBOVUxMKQ0KfQ0KDQoNCm15X2JhcihkZl9waGFuY3VuZ19jYW5oYW4pICsgDQogIGxhYnModGl0bGUgPSAiSGFyZHdhcmUgZm9yIFBlcnNvbmFsIFByb2plY3QiLCBjYXB0aW9uID0gIiIpIC0+IHAxDQoNCm15X2JhcihkZl9waGFuY3VuZ19sYW12aWVjKSArIA0KICBsYWJzKHRpdGxlID0gIkhhcmR3YXJlIGZvciBDb21wYW55IFByb2plY3QiLCBjYXB0aW9uID0gIkRhdGEgU291cmNlOiBEYXRhIFNvdXJjZTogS2FnZ2xlIERhdGEgU2NpZW5jZSBTdXJ2ZXkiKSAtPiBwMg0KDQoNCmdyaWQuYXJyYW5nZShwMSwgcDIsIG5yb3cgPSAxKQ0KDQoNCmBgYA0KDQo=