Data Source and Description

Kaggle conducted an industry-wide survey to establish a comprehensive view of the state of data science and machine learning. The survey received over 16,000 responses and we learned a ton about who is working with data, what’s happening at the cutting edge of machine learning across industries, and how new data scientists can best break into the field.

You can dowload data here.

Median Compensation by Degree

Degree Level

R codes for the two graphs:

rm(list = ls())
library(tidyverse)
library(hrbrthemes)

df_survey <- read.csv("E:\\R_project\\Kaggle\\ds_survey\\multipleChoiceResponses.csv", stringsAsFactors = FALSE)

educ <- df_survey$FormalEducation %>% unique()

df_survey %>% 
  filter(Country == "United States") %>% 
  select(FormalEducation, CompensationAmount) %>% 
  mutate(Educ = case_when(FormalEducation == educ[1] ~ "Bachelor", 
                          FormalEducation == educ[2] ~ "Master", 
                          FormalEducation == educ[3] ~ "Doctoral", 
                          FormalEducation == educ[4] ~ "Unknown", 
                          FormalEducation == educ[5] ~ "No Degree", 
                          FormalEducation == educ[6] ~ "No Degree", 
                          FormalEducation == educ[7] ~ "Professional", 
                          TRUE ~ "Unknown")) %>% 
  mutate(Com = str_replace_all(CompensationAmount, "\\,", "") %>% as.numeric) %>% 
  mutate(Com = Com / 1000) -> survey_from_us


survey_from_us %>% 
  group_by(Educ) %>% 
  count() %>% 
  ungroup() %>% 
  arrange(-n) -> df_degree


survey_from_us %>% 
  filter(Com <= 500) %>% 
  group_by(Educ) %>% 
  summarise(med_income = median(Com, na.rm = TRUE)) %>% 
  ungroup() %>% 
  arrange(-med_income) -> df_median


survey_from_us %>% 
  filter(Com <= 500) %>% 
  right_join(df_median, by = "Educ") %>% 
  mutate(Educ = paste0(Educ, ": ", med_income)) %>% 
  ggplot(aes(Com, color = Educ, fill = Educ)) + 
  geom_density(alpha = 0.15, show.legend = FALSE) + 
  facet_wrap(~ Educ, scales = "free") + 
  geom_vline(data = df_median %>% mutate(Educ = paste0(Educ, ": ", med_income)), 
             aes(xintercept = med_income), color = "white", linetype = "dashed") + 
  theme_ft_rc() + 
  scale_fill_ft() + 
  scale_color_ft() + 
  scale_x_continuous(expand = c(0, 0)) + 
  theme(strip.text.x = element_text(color = "white", size = 15)) + 
  theme(plot.title = element_text(size = 23)) + 
  theme(plot.subtitle = element_text(size = 14, color = "grey90")) + 
  theme(plot.caption = element_text(size = 12, face = "italic")) + 
  theme(axis.text.y = element_blank()) + 
  theme(axis.text.x = element_text(size = 14)) + 
  labs(x = NULL, y = NULL, 
       title = "Compensation Distribution by Degree for Data Scientists in The United States", 
       subtitle = "Data Scientists with professional degree are earning best: about 50% of them have a median compensation of 122.5 (in thousands)", 
       caption = "Data Source: Kaggle Data Science Survey")




my_colors <- c("#3E606F")

df_degree %>% 
  arrange(n) %>% 
  mutate(Educ = factor(Educ, levels = Educ)) -> df_degee

df_degee %>% 
  ggplot(aes(Educ, n)) +
  geom_col(fill = my_colors, color = my_colors, width = 0.6) +
  coord_flip() +
  geom_text(aes(label = n), hjust = 1.1, color = "white", size = 6, data = df_degee %>% filter(Educ != "Professional")) + 
  geom_text(aes(label = n), hjust = -0.15, color = "white", size = 6, data = df_degee %>% filter(Educ == "Professional")) + 
  theme_ft_rc() + 
  theme(panel.grid = element_blank()) + 
  theme(axis.text.x = element_blank()) + 
  theme(axis.text.y = element_text(size = 16, color = "white")) + 
  theme(plot.title = element_text(size = 23)) + 
  theme(plot.subtitle = element_text(size = 14, color = "grey90")) + 
  theme(plot.caption = element_text(size = 12, face = "italic")) + 
  scale_y_continuous(expand = c(0, 0)) + 
  labs(x = NULL, y = NULL, 
       title = "Degree Level Required in The United States for Data Science", 
       subtitle = "Master and Bachelor Degree are dominant among data scientists.", 
       caption = "Data Source: Kaggle Data Science Survey")
LS0tDQp0aXRsZTogIkthZ2dsZSBEYXRhIFNjaWVuY2UgU3VydmV5IChDb21wZW5zYXRpb24gYW5kIERlZ3JlZSkiDQphdXRob3I6ICJOZ3V5ZW4gQ2hpIER1bmciDQpzdWJ0aXRsZTogIkRhaWx5IEdyYXBoIFNlcmllcyINCm91dHB1dDoNCiAgaHRtbF9kb2N1bWVudDoNCiAgICBjb2RlX2Rvd25sb2FkOiB5ZXMNCiAgICAjIGNvZGVfZm9sZGluZzogaGlkZQ0KICAgIGhpZ2hsaWdodDogemVuYnVybg0KICAgIHRoZW1lOiBmbGF0bHkNCiAgICB0b2M6IHllcw0KICAgIHRvY19mbG9hdDogeWVzDQogIHdvcmRfZG9jdW1lbnQ6DQogICAgdG9jOiB5ZXMNCi0tLQ0KDQpgYGB7ciBzZXR1cCxpbmNsdWRlPUZBTFNFfQ0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFLCB3YXJuaW5nID0gRkFMU0UsIG1lc3NhZ2UgPSBGQUxTRSwgZmlnLnJldGluYT0yKQ0KYGBgDQoNCiMgRGF0YSBTb3VyY2UgYW5kIERlc2NyaXB0aW9uDQoNCkthZ2dsZSBjb25kdWN0ZWQgYW4gaW5kdXN0cnktd2lkZSBzdXJ2ZXkgdG8gZXN0YWJsaXNoIGEgY29tcHJlaGVuc2l2ZSB2aWV3IG9mIHRoZSBzdGF0ZSBvZiBkYXRhIHNjaWVuY2UgYW5kIG1hY2hpbmUgbGVhcm5pbmcuIFRoZSBzdXJ2ZXkgcmVjZWl2ZWQgb3ZlciAxNiwwMDAgcmVzcG9uc2VzIGFuZCB3ZSBsZWFybmVkIGEgdG9uIGFib3V0IHdobyBpcyB3b3JraW5nIHdpdGggZGF0YSwgd2hhdOKAmXMgaGFwcGVuaW5nIGF0IHRoZSBjdXR0aW5nIGVkZ2Ugb2YgbWFjaGluZSBsZWFybmluZyBhY3Jvc3MgaW5kdXN0cmllcywgYW5kIGhvdyBuZXcgZGF0YSBzY2llbnRpc3RzIGNhbiBiZXN0IGJyZWFrIGludG8gdGhlIGZpZWxkLg0KDQpZb3UgY2FuIGRvd2xvYWQgZGF0YSBbaGVyZV0oaHR0cHM6Ly93d3cua2FnZ2xlLmNvbS9rYWdnbGUva2FnZ2xlLXN1cnZleS0yMDE3L2RhdGEpLiANCg0KIyBNZWRpYW4gQ29tcGVuc2F0aW9uIGJ5IERlZ3JlZQ0KDQohW10oQzpcVXNlcnNcWmJvb2tcRGVza3RvcFxwaWNccDExLmpwZykNCg0KIyBEZWdyZWUgTGV2ZWwNCg0KIVtdKEM6XFVzZXJzXFpib29rXERlc2t0b3BccGljXHAxMC5qcGcpDQoNClIgY29kZXMgZm9yIHRoZSB0d28gZ3JhcGhzOiANCg0KYGBge3IsIGV2YWw9RkFMU0V9DQpybShsaXN0ID0gbHMoKSkNCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShocmJydGhlbWVzKQ0KDQpkZl9zdXJ2ZXkgPC0gcmVhZC5jc3YoIkU6XFxSX3Byb2plY3RcXEthZ2dsZVxcZHNfc3VydmV5XFxtdWx0aXBsZUNob2ljZVJlc3BvbnNlcy5jc3YiLCBzdHJpbmdzQXNGYWN0b3JzID0gRkFMU0UpDQoNCmVkdWMgPC0gZGZfc3VydmV5JEZvcm1hbEVkdWNhdGlvbiAlPiUgdW5pcXVlKCkNCg0KZGZfc3VydmV5ICU+JSANCiAgZmlsdGVyKENvdW50cnkgPT0gIlVuaXRlZCBTdGF0ZXMiKSAlPiUgDQogIHNlbGVjdChGb3JtYWxFZHVjYXRpb24sIENvbXBlbnNhdGlvbkFtb3VudCkgJT4lIA0KICBtdXRhdGUoRWR1YyA9IGNhc2Vfd2hlbihGb3JtYWxFZHVjYXRpb24gPT0gZWR1Y1sxXSB+ICJCYWNoZWxvciIsIA0KICAgICAgICAgICAgICAgICAgICAgICAgICBGb3JtYWxFZHVjYXRpb24gPT0gZWR1Y1syXSB+ICJNYXN0ZXIiLCANCiAgICAgICAgICAgICAgICAgICAgICAgICAgRm9ybWFsRWR1Y2F0aW9uID09IGVkdWNbM10gfiAiRG9jdG9yYWwiLCANCiAgICAgICAgICAgICAgICAgICAgICAgICAgRm9ybWFsRWR1Y2F0aW9uID09IGVkdWNbNF0gfiAiVW5rbm93biIsIA0KICAgICAgICAgICAgICAgICAgICAgICAgICBGb3JtYWxFZHVjYXRpb24gPT0gZWR1Y1s1XSB+ICJObyBEZWdyZWUiLCANCiAgICAgICAgICAgICAgICAgICAgICAgICAgRm9ybWFsRWR1Y2F0aW9uID09IGVkdWNbNl0gfiAiTm8gRGVncmVlIiwgDQogICAgICAgICAgICAgICAgICAgICAgICAgIEZvcm1hbEVkdWNhdGlvbiA9PSBlZHVjWzddIH4gIlByb2Zlc3Npb25hbCIsIA0KICAgICAgICAgICAgICAgICAgICAgICAgICBUUlVFIH4gIlVua25vd24iKSkgJT4lIA0KICBtdXRhdGUoQ29tID0gc3RyX3JlcGxhY2VfYWxsKENvbXBlbnNhdGlvbkFtb3VudCwgIlxcLCIsICIiKSAlPiUgYXMubnVtZXJpYykgJT4lIA0KICBtdXRhdGUoQ29tID0gQ29tIC8gMTAwMCkgLT4gc3VydmV5X2Zyb21fdXMNCg0KDQpzdXJ2ZXlfZnJvbV91cyAlPiUgDQogIGdyb3VwX2J5KEVkdWMpICU+JSANCiAgY291bnQoKSAlPiUgDQogIHVuZ3JvdXAoKSAlPiUgDQogIGFycmFuZ2UoLW4pIC0+IGRmX2RlZ3JlZQ0KDQoNCnN1cnZleV9mcm9tX3VzICU+JSANCiAgZmlsdGVyKENvbSA8PSA1MDApICU+JSANCiAgZ3JvdXBfYnkoRWR1YykgJT4lIA0KICBzdW1tYXJpc2UobWVkX2luY29tZSA9IG1lZGlhbihDb20sIG5hLnJtID0gVFJVRSkpICU+JSANCiAgdW5ncm91cCgpICU+JSANCiAgYXJyYW5nZSgtbWVkX2luY29tZSkgLT4gZGZfbWVkaWFuDQoNCg0Kc3VydmV5X2Zyb21fdXMgJT4lIA0KICBmaWx0ZXIoQ29tIDw9IDUwMCkgJT4lIA0KICByaWdodF9qb2luKGRmX21lZGlhbiwgYnkgPSAiRWR1YyIpICU+JSANCiAgbXV0YXRlKEVkdWMgPSBwYXN0ZTAoRWR1YywgIjogIiwgbWVkX2luY29tZSkpICU+JSANCiAgZ2dwbG90KGFlcyhDb20sIGNvbG9yID0gRWR1YywgZmlsbCA9IEVkdWMpKSArIA0KICBnZW9tX2RlbnNpdHkoYWxwaGEgPSAwLjE1LCBzaG93LmxlZ2VuZCA9IEZBTFNFKSArIA0KICBmYWNldF93cmFwKH4gRWR1Yywgc2NhbGVzID0gImZyZWUiKSArIA0KICBnZW9tX3ZsaW5lKGRhdGEgPSBkZl9tZWRpYW4gJT4lIG11dGF0ZShFZHVjID0gcGFzdGUwKEVkdWMsICI6ICIsIG1lZF9pbmNvbWUpKSwgDQogICAgICAgICAgICAgYWVzKHhpbnRlcmNlcHQgPSBtZWRfaW5jb21lKSwgY29sb3IgPSAid2hpdGUiLCBsaW5ldHlwZSA9ICJkYXNoZWQiKSArIA0KICB0aGVtZV9mdF9yYygpICsgDQogIHNjYWxlX2ZpbGxfZnQoKSArIA0KICBzY2FsZV9jb2xvcl9mdCgpICsgDQogIHNjYWxlX3hfY29udGludW91cyhleHBhbmQgPSBjKDAsIDApKSArIA0KICB0aGVtZShzdHJpcC50ZXh0LnggPSBlbGVtZW50X3RleHQoY29sb3IgPSAid2hpdGUiLCBzaXplID0gMTUpKSArIA0KICB0aGVtZShwbG90LnRpdGxlID0gZWxlbWVudF90ZXh0KHNpemUgPSAyMykpICsgDQogIHRoZW1lKHBsb3Quc3VidGl0bGUgPSBlbGVtZW50X3RleHQoc2l6ZSA9IDE0LCBjb2xvciA9ICJncmV5OTAiKSkgKyANCiAgdGhlbWUocGxvdC5jYXB0aW9uID0gZWxlbWVudF90ZXh0KHNpemUgPSAxMiwgZmFjZSA9ICJpdGFsaWMiKSkgKyANCiAgdGhlbWUoYXhpcy50ZXh0LnkgPSBlbGVtZW50X2JsYW5rKCkpICsgDQogIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KHNpemUgPSAxNCkpICsgDQogIGxhYnMoeCA9IE5VTEwsIHkgPSBOVUxMLCANCiAgICAgICB0aXRsZSA9ICJDb21wZW5zYXRpb24gRGlzdHJpYnV0aW9uIGJ5IERlZ3JlZSBmb3IgRGF0YSBTY2llbnRpc3RzIGluIFRoZSBVbml0ZWQgU3RhdGVzIiwgDQogICAgICAgc3VidGl0bGUgPSAiRGF0YSBTY2llbnRpc3RzIHdpdGggcHJvZmVzc2lvbmFsIGRlZ3JlZSBhcmUgZWFybmluZyBiZXN0OiBhYm91dCA1MCUgb2YgdGhlbSBoYXZlIGEgbWVkaWFuIGNvbXBlbnNhdGlvbiBvZiAxMjIuNSAoaW4gdGhvdXNhbmRzKSIsIA0KICAgICAgIGNhcHRpb24gPSAiRGF0YSBTb3VyY2U6IEthZ2dsZSBEYXRhIFNjaWVuY2UgU3VydmV5IikNCg0KDQoNCg0KbXlfY29sb3JzIDwtIGMoIiMzRTYwNkYiKQ0KDQpkZl9kZWdyZWUgJT4lIA0KICBhcnJhbmdlKG4pICU+JSANCiAgbXV0YXRlKEVkdWMgPSBmYWN0b3IoRWR1YywgbGV2ZWxzID0gRWR1YykpIC0+IGRmX2RlZ2VlDQoNCmRmX2RlZ2VlICU+JSANCiAgZ2dwbG90KGFlcyhFZHVjLCBuKSkgKw0KICBnZW9tX2NvbChmaWxsID0gbXlfY29sb3JzLCBjb2xvciA9IG15X2NvbG9ycywgd2lkdGggPSAwLjYpICsNCiAgY29vcmRfZmxpcCgpICsNCiAgZ2VvbV90ZXh0KGFlcyhsYWJlbCA9IG4pLCBoanVzdCA9IDEuMSwgY29sb3IgPSAid2hpdGUiLCBzaXplID0gNiwgZGF0YSA9IGRmX2RlZ2VlICU+JSBmaWx0ZXIoRWR1YyAhPSAiUHJvZmVzc2lvbmFsIikpICsgDQogIGdlb21fdGV4dChhZXMobGFiZWwgPSBuKSwgaGp1c3QgPSAtMC4xNSwgY29sb3IgPSAid2hpdGUiLCBzaXplID0gNiwgZGF0YSA9IGRmX2RlZ2VlICU+JSBmaWx0ZXIoRWR1YyA9PSAiUHJvZmVzc2lvbmFsIikpICsgDQogIHRoZW1lX2Z0X3JjKCkgKyANCiAgdGhlbWUocGFuZWwuZ3JpZCA9IGVsZW1lbnRfYmxhbmsoKSkgKyANCiAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X2JsYW5rKCkpICsgDQogIHRoZW1lKGF4aXMudGV4dC55ID0gZWxlbWVudF90ZXh0KHNpemUgPSAxNiwgY29sb3IgPSAid2hpdGUiKSkgKyANCiAgdGhlbWUocGxvdC50aXRsZSA9IGVsZW1lbnRfdGV4dChzaXplID0gMjMpKSArIA0KICB0aGVtZShwbG90LnN1YnRpdGxlID0gZWxlbWVudF90ZXh0KHNpemUgPSAxNCwgY29sb3IgPSAiZ3JleTkwIikpICsgDQogIHRoZW1lKHBsb3QuY2FwdGlvbiA9IGVsZW1lbnRfdGV4dChzaXplID0gMTIsIGZhY2UgPSAiaXRhbGljIikpICsgDQogIHNjYWxlX3lfY29udGludW91cyhleHBhbmQgPSBjKDAsIDApKSArIA0KICBsYWJzKHggPSBOVUxMLCB5ID0gTlVMTCwgDQogICAgICAgdGl0bGUgPSAiRGVncmVlIExldmVsIFJlcXVpcmVkIGluIFRoZSBVbml0ZWQgU3RhdGVzIGZvciBEYXRhIFNjaWVuY2UiLCANCiAgICAgICBzdWJ0aXRsZSA9ICJNYXN0ZXIgYW5kIEJhY2hlbG9yIERlZ3JlZSBhcmUgZG9taW5hbnQgYW1vbmcgZGF0YSBzY2llbnRpc3RzLiIsIA0KICAgICAgIGNhcHRpb24gPSAiRGF0YSBTb3VyY2U6IEthZ2dsZSBEYXRhIFNjaWVuY2UgU3VydmV5IikNCiAgDQoNCmBgYA0KDQoNCg==