suppressPackageStartupMessages(library('XML'))
suppressPackageStartupMessages(library('rvest'))
library(stringr)
library(ggplot2)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(RMySQL)
## Loading required package: DBI
load data into R
jobdata <- tbl_df(read.csv("https://raw.githubusercontent.com/ntlrs/Data-607-Project-3/master/cleandata", stringsAsFactors = FALSE, check.names = FALSE))
head(jobdata)
## # A tibble: 6 x 3
## Industry `Skill Set` `Skill Type`
## <chr> <chr> <chr>
## 1 Marketing Soft Skill Team Player
## 2 Marketing Programming/Technical Database Management
## 3 Marketing Programming/Technical Java
## 4 Marketing Programming/Technical SPSS
## 5 Marketing Programming/Technical SAS
## 6 Marketing Programming/Technical STATA
find the data specifically for marketing:
mrktdata_count <- jobdata %>%
filter(Industry == "Marketing") %>%
count(`Skill Type`, sort= TRUE)
mrktdata_count
## # A tibble: 47 x 2
## `Skill Type` n
## <chr> <int>
## 1 Predictive Anaytics 8
## 2 SQL 8
## 3 Python 7
## 4 Build Statisical Learning Models 6
## 5 Good Communication 6
## 6 R 6
## 7 Tableau 5
## 8 Hadoop 4
## 9 SAS 4
## 10 Team Player 4
## # ... with 37 more rows
marketingdata <- mrktdata_count[1:10,]
marketingdata
## # A tibble: 10 x 2
## `Skill Type` n
## <chr> <int>
## 1 Predictive Anaytics 8
## 2 SQL 8
## 3 Python 7
## 4 Build Statisical Learning Models 6
## 5 Good Communication 6
## 6 R 6
## 7 Tableau 5
## 8 Hadoop 4
## 9 SAS 4
## 10 Team Player 4
plot the top 10 skills for marketing
ggplot(marketingdata, aes(x = marketingdata$`Skill Type`, y = marketingdata$n, fill = marketingdata$`Skill Type`)) +
geom_bar(stat = "identity") +
xlab("Skills") +
ylab("Frequency") +
theme(legend.position = "none",
axis.text.x = element_text(angle = 45, hjust = 1)) +
ggtitle("Top Skills of Data Scientist in Marketing")
Conclusion:
There is a good mix of soft skills and technical skills required to be a data scientist.