This part is the analysis what is the most important skills requried for data scientists at Austrialia.

Using the code from: https://www.seek.com.au/

library(dplyr)
library(tidyr)
library(knitr)
library(ggplot2)
library(plotly)

australiajob <- read.csv("~/seekJobs.csv")
glimpse(australiajob)
## Observations: 440
## Variables: 11
## $ X           <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,...
## $ detailUrl   <fctr> https://www.seek.com.au/job/33028220, https://www...
## $ jobTitle    <fctr> Data Scientist, Data Scientist, Data Scientist, D...
## $ company     <fctr> Charterhouse, Charterhouse, Charterhouse, Charter...
## $ location    <fctr> Melbourne, Melbourne, Melbourne, Melbourne, Melbo...
## $ date        <fctr> 2017-03-15T04:01:41Z, 2017-03-15T04:01:41Z, 2017-...
## $ Salary.From <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ Salary.To   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ Id          <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,...
## $ Skill       <fctr> Python, programming, analytics, mathematics, mach...
## $ Count       <int> 1, 0, 2, 0, 1, 2, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,...

Aggregation

What is the most important skills required for data Scientists?

unique(australiajob$Skill)
##  [1] Python           programming      analytics        mathematics     
##  [5] machine learning team             Cooperation      statistics      
##  [9] SQL              communication    big data         design          
## [13] visualization    Hadoop           Java             research        
## [17] creative         MATLAB           SAS               R              
## [21] Modelling        Phd             
## 22 Levels:  R  analytics big data communication Cooperation ... visualization
unique(australiajob$Count)
##  [1]  1  0  2  3  6  5  4 11  7  8
audf <- aggregate(australiajob$Count, list(australiajob$Skill), sum)
colnames(audf)<- c("Skills","TotalNumber")

audftop<-arrange(audf, desc(TotalNumber))
audftop<-audftop[2:len,]
audftop<-arrange(audftop, desc(TotalNumber))
audftop
##              Skills TotalNumber
## 1              team          51
## 2               SQL          38
## 3            Python          31
## 4  machine learning          18
## 5            Hadoop          15
## 6     communication          13
## 7              Java          10
## 8          big data           9
## 9            design           9
## 10              SAS           9
## 11       statistics           9
## 12               R            8
## 13         creative           8
## 14         research           4
## 15        Modelling           2
## 16      programming           2
## 17      mathematics           1
## 18      Cooperation           0
## 19           MATLAB           0

What is the most total number of skills in jobs description?

names <- audftop$Skills
barplot(audftop$TotalNumber,main="Total number of skills", horiz=TRUE, names.arg=names, las=1, cex.axis=0.5, cex.names = 0.5)

# Top three of skills are "team", "SQL" and "Python".
audftop3<-audftop[1:3,]

ggplot() +
geom_bar(data=audftop3, aes(x=Skills, y=audftop3$TotalNumber, fill='Skills'), stat = "identity")

Which states have most number of jobs for data scientist in Australia?

audflo <- aggregate(australiajob$Count, list(australiajob$location), sum)
colnames(audflo)<- c("Location","TotalNumber")
audflo<-arrange(audflo, desc(TotalNumber))
audflo
##    Location TotalNumber
## 1    Sydney         122
## 2  Brisbane          88
## 3 Melbourne          63
## 4     Perth          26
## 5  Adelaide           9
# Most number of jobs is located at Sydney of Australia

Where have the most total number of jobs for data scientists in Australia?

names <- audflo$Location
barplot(audflo$TotalNumber,main="Location for number of jobs", horiz=TRUE, names.arg=names, las=1, cex.axis=0.5, cex.names = 0.5)

ggplot() +
geom_bar(data=audflo, aes(x=Location, y=TotalNumber, fill='Location'), stat = "identity")

Result: 1. Top three of skills required for data scientists are “analysis”, “team” and “SQL”

  1. Most number of jobs is located at Sydney of Australia.