require(networkD3)
## Loading required package: networkD3
require(rattle)
## Loading required package: rattle
## Rattle: A free graphical interface for data mining with R.
## Version 4.1.0 Copyright (c) 2006-2015 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
require(partykit)
## Loading required package: partykit
## Loading required package: grid
require(htmlTable)
## Loading required package: htmlTable
require(ggplot2)
## Loading required package: ggplot2
require(plotly)
## Loading required package: plotly
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
require(treemap)
## Loading required package: treemap
require(data.tree)
## Loading required package: data.tree
require(knitr)
## Loading required package: knitr
require(stringr)
## Loading required package: stringr
require(RMySQL)
## Loading required package: RMySQL
## Loading required package: DBI
require(tidyr)
## Loading required package: tidyr
require(dplyr)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
require(rpart)
## Loading required package: rpart
require(rpart.plot)
## Loading required package: rpart.plot
require(RColorBrewer)
## Loading required package: RColorBrewer
f <- read.csv("https://raw.githubusercontent.com/kylegilde/D607-Group-Project/master/tidyPaysa.csv")
df <- data.frame(f)
df$Salary <- as.numeric(as.character(df$Salary))
## Warning: NAs introduced by coercion
df<- df %>%
select(Position,Company,State,City,Skills,Type,Salary) %>%
filter(Salary >=200000 ) %>%
filter( Type == "Base Salary")
str(df)
## 'data.frame': 38 obs. of 7 variables:
## $ Position: Factor w/ 173 levels "Area Lead, Research Data Science Facilitation",..: 72 97 97 89 70 62 71 72 97 97 ...
## $ Company : Factor w/ 139 levels "","AbbVie","About.com",..: 62 9 9 74 10 1 41 62 9 9 ...
## $ State : Factor w/ 11 levels "CA","CO","CT",..: 1 1 1 11 11 10 1 1 1 1 ...
## $ City : Factor w/ 54 levels "Arlington","Atlanta",..: 24 42 42 4 44 51 38 24 42 42 ...
## $ Skills : Factor w/ 95 levels " Algorithm Design",..: 76 80 80 80 88 88 74 10 16 16 ...
## $ Type : Factor w/ 4 levels "Annual Salary",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ Salary : num 253000 204000 204000 200000 265000 240000 202000 253000 204000 204000 ...
head(df)
## Position Company State
## 1 Head of SBG Data Science Engineering Intuit CA
## 2 Principal Lead Data Scientist Akamai Technologies CA
## 3 Principal Lead Data Scientist Akamai Technologies CA
## 4 Principal Data Scientist Microsoft WA
## 5 Head of Data Science and Engineering Amazon WA
## 6 Director VA
## City Skills Type Salary
## 1 Mountain View Distributed Systems Base Salary 253000
## 2 Santa Clara Hadoop Base Salary 204000
## 3 Santa Clara Hadoop Base Salary 204000
## 4 Bellevue Hadoop Base Salary 200000
## 5 Seattle Product Management Base Salary 265000
## 6 Vienna Product Management Base Salary 240000
df$pathString <- paste("Skills with Salary > 200k",
df$State,
df$City,
df$Company,
df$Position,
df$Salary,
str_trim(df$Skills),
sep = "/")
pop <- as.Node(df)
#Prune(pop, pruneFun = function(x) !x$isLeaf || x$Type == "Base Salary")
print(pop, limit = 150)
## levelName
## 1 Skills with Salary > 200k
## 2 ¦--CA
## 3 ¦ ¦--Mountain View
## 4 ¦ ¦ °--Intuit
## 5 ¦ ¦ °--Head of SBG Data Science Engineering
## 6 ¦ ¦ °--253000
## 7 ¦ ¦ ¦--Distributed Systems
## 8 ¦ ¦ ¦--Big Data
## 9 ¦ ¦ ¦--Algorithms
## 10 ¦ ¦ ¦--Data Science
## 11 ¦ ¦ ¦--Strategy
## 12 ¦ ¦ °--Databases
## 13 ¦ ¦--Santa Clara
## 14 ¦ ¦ °--Akamai Technologies
## 15 ¦ ¦ °--Principal Lead Data Scientist
## 16 ¦ ¦ °--204000
## 17 ¦ ¦ ¦--Hadoop
## 18 ¦ ¦ ¦--Data Mining
## 19 ¦ ¦ ¦--Machine Learning
## 20 ¦ ¦ ¦--Big Data
## 21 ¦ ¦ ¦--Python
## 22 ¦ ¦ ¦--Algorithms
## 23 ¦ ¦ ¦--Matlab
## 24 ¦ ¦ °--Ruby
## 25 ¦ °--San Francisco
## 26 ¦ °--First Republic Bank
## 27 ¦ °--Head of Data Science, Liquidity
## 28 ¦ °--202000
## 29 ¦ ¦--Data Science
## 30 ¦ ¦--Analytics
## 31 ¦ °--Statistics
## 32 ¦--WA
## 33 ¦ ¦--Bellevue
## 34 ¦ ¦ °--Microsoft
## 35 ¦ ¦ °--Principal Data Scientist
## 36 ¦ ¦ °--2e+05
## 37 ¦ ¦ ¦--Hadoop
## 38 ¦ ¦ ¦--Data Mining
## 39 ¦ ¦ ¦--Optimization
## 40 ¦ ¦ ¦--Algorithms
## 41 ¦ ¦ ¦--MapReduce
## 42 ¦ ¦ °--C++
## 43 ¦ °--Seattle
## 44 ¦ °--Amazon
## 45 ¦ °--Head of Data Science and Engineering
## 46 ¦ °--265000
## 47 ¦ ¦--Product Management
## 48 ¦ ¦--Machine Learning
## 49 ¦ ¦--Data Science
## 50 ¦ ¦--Analytics
## 51 ¦ °--Statistics
## 52 °--VA
## 53 °--Vienna
## 54 °--Director
## 55 °--240000
## 56 ¦--Product Management
## 57 ¦--Machine Learning
## 58 ¦--AWS
## 59 ¦--Management
## 60 ¦--Relational Databases
## 61 °--Scala
SetGraphStyle(pop, rankdir = "TB")
SetEdgeStyle(pop, arrowhead = "vee", color = "grey35", penwidth = 5 )
SetNodeStyle(pop, style = "filled,rounded", shape = "box", fillcolor = "GreenYellow",
fontname = "helvetica", tooltip = GetDefaultTooltip)
SetNodeStyle(pop$CA, fillcolor = "LightBlue", penwidth = "5px", cex=10)
plot(pop, quartz(width=1000, height=800), cex =10)
#plot with networkD3
useRtreeList <- ToListExplicit(pop, unname = TRUE)
radialNetwork( useRtreeList)
knitr::opts_chunk$set(echo = TRUE)