This part is the analysis what is the most important skills requried for data scientists at Austrialia.
Using the code from: https://www.seek.com.au/
library(dplyr)
library(tidyr)
library(knitr)
library(ggplot2)
library(plotly)
australiajob <- read.csv("~/seekJobs.csv")
glimpse(australiajob)
## Observations: 440
## Variables: 11
## $ X <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,...
## $ detailUrl <fctr> https://www.seek.com.au/job/33028220, https://www...
## $ jobTitle <fctr> Data Scientist, Data Scientist, Data Scientist, D...
## $ company <fctr> Charterhouse, Charterhouse, Charterhouse, Charter...
## $ location <fctr> Melbourne, Melbourne, Melbourne, Melbourne, Melbo...
## $ date <fctr> 2017-03-15T04:01:41Z, 2017-03-15T04:01:41Z, 2017-...
## $ Salary.From <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ Salary.To <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ Id <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,...
## $ Skill <fctr> Python, programming, analytics, mathematics, mach...
## $ Count <int> 1, 0, 2, 0, 1, 2, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,...
unique(australiajob$Skill)
## [1] Python programming analytics mathematics
## [5] machine learning team Cooperation statistics
## [9] SQL communication big data design
## [13] visualization Hadoop Java research
## [17] creative MATLAB SAS R
## [21] Modelling Phd
## 22 Levels: R analytics big data communication Cooperation ... visualization
unique(australiajob$Count)
## [1] 1 0 2 3 6 5 4 11 7 8
audf <- aggregate(australiajob$Count, list(australiajob$Skill), sum)
colnames(audf)<- c("Skills","TotalNumber")
audftop<-arrange(audf, desc(TotalNumber))
audftop<-audftop[2:len,]
audftop<-arrange(audftop, desc(TotalNumber))
audftop
## Skills TotalNumber
## 1 team 51
## 2 SQL 38
## 3 Python 31
## 4 machine learning 18
## 5 Hadoop 15
## 6 communication 13
## 7 Java 10
## 8 big data 9
## 9 design 9
## 10 SAS 9
## 11 statistics 9
## 12 R 8
## 13 creative 8
## 14 research 4
## 15 Modelling 2
## 16 programming 2
## 17 mathematics 1
## 18 Cooperation 0
## 19 MATLAB 0
names <- audftop$Skills
barplot(audftop$TotalNumber,main="Total number of skills", horiz=TRUE, names.arg=names, las=1, cex.axis=0.5, cex.names = 0.5)
# Top three of skills are "team", "SQL" and "Python".
audftop3<-audftop[1:3,]
ggplot() +
geom_bar(data=audftop3, aes(x=Skills, y=audftop3$TotalNumber, fill='Skills'), stat = "identity")
audflo <- aggregate(australiajob$Count, list(australiajob$location), sum)
colnames(audflo)<- c("Location","TotalNumber")
audflo<-arrange(audflo, desc(TotalNumber))
audflo
## Location TotalNumber
## 1 Sydney 122
## 2 Brisbane 88
## 3 Melbourne 63
## 4 Perth 26
## 5 Adelaide 9
# Most number of jobs is located at Sydney of Australia
names <- audflo$Location
barplot(audflo$TotalNumber,main="Location for number of jobs", horiz=TRUE, names.arg=names, las=1, cex.axis=0.5, cex.names = 0.5)
ggplot() +
geom_bar(data=audflo, aes(x=Location, y=TotalNumber, fill='Location'), stat = "identity")
Result: 1. Top three of skills required for data scientists are “analysis”, “team” and “SQL”