require(networkD3)
## Loading required package: networkD3
require(rattle)
## Loading required package: rattle
## Rattle: A free graphical interface for data mining with R.
## Version 4.1.0 Copyright (c) 2006-2015 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
require(partykit)
## Loading required package: partykit
## Loading required package: grid
require(htmlTable)
## Loading required package: htmlTable
require(ggplot2)
## Loading required package: ggplot2
require(plotly)
## Loading required package: plotly
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
require(treemap)
## Loading required package: treemap
require(data.tree)
## Loading required package: data.tree
require(knitr)
## Loading required package: knitr
require(stringr)
## Loading required package: stringr
require(RMySQL)
## Loading required package: RMySQL
## Loading required package: DBI
require(tidyr)
## Loading required package: tidyr
require(dplyr)
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
require(rpart)
## Loading required package: rpart
require(rpart.plot)
## Loading required package: rpart.plot
require(RColorBrewer)
## Loading required package: RColorBrewer
f <- read.csv("https://raw.githubusercontent.com/kylegilde/D607-Group-Project/master/tidyPaysa.csv")
df <- data.frame(f)


df$Salary <- as.numeric(as.character(df$Salary)) 
## Warning: NAs introduced by coercion
df<- df %>%
     select(Position,Company,State,City,Skills,Type,Salary) %>%
       filter(Salary >=200000 )  %>%
         filter( Type == "Base Salary")

str(df)
## 'data.frame':    38 obs. of  7 variables:
##  $ Position: Factor w/ 173 levels "Area Lead, Research Data Science Facilitation",..: 72 97 97 89 70 62 71 72 97 97 ...
##  $ Company : Factor w/ 139 levels "","AbbVie","About.com",..: 62 9 9 74 10 1 41 62 9 9 ...
##  $ State   : Factor w/ 11 levels "CA","CO","CT",..: 1 1 1 11 11 10 1 1 1 1 ...
##  $ City    : Factor w/ 54 levels "Arlington","Atlanta",..: 24 42 42 4 44 51 38 24 42 42 ...
##  $ Skills  : Factor w/ 95 levels " Algorithm Design",..: 76 80 80 80 88 88 74 10 16 16 ...
##  $ Type    : Factor w/ 4 levels "Annual Salary",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ Salary  : num  253000 204000 204000 200000 265000 240000 202000 253000 204000 204000 ...
head(df)
##                               Position             Company State
## 1 Head of SBG Data Science Engineering              Intuit    CA
## 2        Principal Lead Data Scientist Akamai Technologies    CA
## 3        Principal Lead Data Scientist Akamai Technologies    CA
## 4             Principal Data Scientist           Microsoft    WA
## 5 Head of Data Science and Engineering              Amazon    WA
## 6                             Director                        VA
##            City              Skills        Type Salary
## 1 Mountain View Distributed Systems Base Salary 253000
## 2   Santa Clara              Hadoop Base Salary 204000
## 3   Santa Clara              Hadoop Base Salary 204000
## 4      Bellevue              Hadoop Base Salary 200000
## 5       Seattle  Product Management Base Salary 265000
## 6        Vienna  Product Management Base Salary 240000
df$pathString <- paste("Skills with Salary > 200k", 
                            df$State,
                            df$City,
                            df$Company,
                            df$Position, 
                            df$Salary, 
                            str_trim(df$Skills),
                            sep = "/")

pop <- as.Node(df)

#Prune(pop, pruneFun = function(x) !x$isLeaf || x$Type == "Base Salary")

print(pop,  limit = 150)
##                                               levelName
## 1  Skills with Salary > 200k                           
## 2   ¦--CA                                              
## 3   ¦   ¦--Mountain View                               
## 4   ¦   ¦   °--Intuit                                  
## 5   ¦   ¦       °--Head of SBG Data Science Engineering
## 6   ¦   ¦           °--253000                          
## 7   ¦   ¦               ¦--Distributed Systems         
## 8   ¦   ¦               ¦--Big Data                    
## 9   ¦   ¦               ¦--Algorithms                  
## 10  ¦   ¦               ¦--Data Science                
## 11  ¦   ¦               ¦--Strategy                    
## 12  ¦   ¦               °--Databases                   
## 13  ¦   ¦--Santa Clara                                 
## 14  ¦   ¦   °--Akamai Technologies                     
## 15  ¦   ¦       °--Principal Lead Data Scientist       
## 16  ¦   ¦           °--204000                          
## 17  ¦   ¦               ¦--Hadoop                      
## 18  ¦   ¦               ¦--Data Mining                 
## 19  ¦   ¦               ¦--Machine Learning            
## 20  ¦   ¦               ¦--Big Data                    
## 21  ¦   ¦               ¦--Python                      
## 22  ¦   ¦               ¦--Algorithms                  
## 23  ¦   ¦               ¦--Matlab                      
## 24  ¦   ¦               °--Ruby                        
## 25  ¦   °--San Francisco                               
## 26  ¦       °--First Republic Bank                     
## 27  ¦           °--Head of Data Science, Liquidity     
## 28  ¦               °--202000                          
## 29  ¦                   ¦--Data Science                
## 30  ¦                   ¦--Analytics                   
## 31  ¦                   °--Statistics                  
## 32  ¦--WA                                              
## 33  ¦   ¦--Bellevue                                    
## 34  ¦   ¦   °--Microsoft                               
## 35  ¦   ¦       °--Principal Data Scientist            
## 36  ¦   ¦           °--2e+05                           
## 37  ¦   ¦               ¦--Hadoop                      
## 38  ¦   ¦               ¦--Data Mining                 
## 39  ¦   ¦               ¦--Optimization                
## 40  ¦   ¦               ¦--Algorithms                  
## 41  ¦   ¦               ¦--MapReduce                   
## 42  ¦   ¦               °--C++                         
## 43  ¦   °--Seattle                                     
## 44  ¦       °--Amazon                                  
## 45  ¦           °--Head of Data Science and Engineering
## 46  ¦               °--265000                          
## 47  ¦                   ¦--Product Management          
## 48  ¦                   ¦--Machine Learning            
## 49  ¦                   ¦--Data Science                
## 50  ¦                   ¦--Analytics                   
## 51  ¦                   °--Statistics                  
## 52  °--VA                                              
## 53      °--Vienna                                      
## 54          °--Director                                
## 55              °--240000                              
## 56                  ¦--Product Management              
## 57                  ¦--Machine Learning                
## 58                  ¦--AWS                             
## 59                  ¦--Management                      
## 60                  ¦--Relational Databases            
## 61                  °--Scala
SetGraphStyle(pop, rankdir = "TB")
SetEdgeStyle(pop, arrowhead = "vee", color = "grey35", penwidth = 5 )
SetNodeStyle(pop, style = "filled,rounded", shape = "box", fillcolor = "GreenYellow", 
            fontname = "helvetica", tooltip = GetDefaultTooltip)

SetNodeStyle(pop$CA, fillcolor = "LightBlue", penwidth = "5px", cex=10)


plot(pop, quartz(width=1000, height=800), cex =10)
#plot with networkD3
useRtreeList <- ToListExplicit(pop, unname = TRUE)
radialNetwork( useRtreeList)
knitr::opts_chunk$set(echo = TRUE)