Data Preparation

library(XML)
## Warning: package 'XML' was built under R version 3.3.2
library(RCurl)
## Warning: package 'RCurl' was built under R version 3.3.2
## Loading required package: bitops
## Warning: package 'bitops' was built under R version 3.3.2
library(ggmap)
## Warning: package 'ggmap' was built under R version 3.3.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.3.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.3.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(RColorBrewer)
Sys.setenv("LANGUAGE"="EN")
url <- getURL("http://online.wsj.com/public/resources/documents/info-Salaries_for_Colleges_by_Region-sort.html")
data <- readHTMLTable(url)
salaries <- data$mySortableTable
salaries$`School Name`<- as.character(salaries$`School Name`)
salaries$school <- salaries$`School Name`
salaries$starting <- salaries$`Starting Median Salary`

Text Cleaning

salaries$`Mid-Career Median Salary` <- stringi::stri_escape_unicode(salaries$`Mid-Career Median Salary`)
salaries$`Mid-Career Median Salary` <- as.character(salaries$`Mid-Career Median Salary`)
salaries$`Mid-Career Median Salary`  <- gsub("\\u00c2","",salaries$`Mid-Career Median Salary`)
salaries$`Mid-Career Median Salary`  <- gsub("[\\]","",salaries$`Mid-Career Median Salary`)
salaries$`Mid-Career Median Salary`  <- gsub("[,]","",salaries$`Mid-Career Median Salary`)
salaries$`Mid-Career Median Salary`  <- gsub("[.]","",salaries$`Mid-Career Median Salary`)
salaries$`Mid-Career Median Salary`  <- gsub("[$]","",salaries$`Mid-Career Median Salary`)
salaries$`Mid-Career Median Salary` <- as.numeric(salaries$`Mid-Career Median Salary`)
salaries$`Mid-Career Median Salary` <- salaries$`Mid-Career Median Salary`/100
salaries$`Mid-Career Median Salary`
##   [1] 129000 123000 122000 112000 105000 101000 101000 101000 101000  99600
##  [11]  99600  96700  95600  95000  88100  87000  86400  85200  84700  84300
##  [21]  84100  82400  81300  80400  72100  71400  71300  67500 106000  97600
##  [31]  93400  88600  86100  85300  84700  84400  84100  83300  83200  82900
##  [41]  82800  82000  81600  81500  81400  81100  80100  79500  79000  78700
##  [51]  78700  78400  77500  76000  76000  75400  73800  73400  72600  72600
##  [61]  71900  71600  70900  70900  69800  69500  67100  63900  56500  50600
##  [71] 116000 113000 103000  97800  96500  96100  95900  95800  93400  93000
##  [81]  90500  88400  88200  87800  87300  86200  85300  84800  84600  84200
##  [91]  84000  83900  83700  83500  81700  81600  81600  81000  80900  80800
## [101]  80600  80600  79000  78500  78200  77800  76600  76100  75900  75500
## [111]  74700  74600  73800  73500  73400  73400  73100  72600  72500  72100
## [121]  72100  71400  71400  70300  69500  69300  68300  68200  67100  66400
## [131]  65800  64800  64500  64300  64000  62600  60600  58500  58200  46600
## [141]  43900 110000 110000 106000 106000 104000 104000 104000 103000  97900
## [151]  96100  95800  95400  95000  94600  93900  91600  90800  88700  88600
## [161]  87900  87800  86900  86000  86000  84700  84500  83600  83300  82900
## [171]  82800  82700  81800  81500  81400  80800  80700  80000  79900  79700
## [181]  79400  79300  78300  78100  78100  77800  76300  75500  74600  74500
## [191]  74000  74000  73800  73000  72100  71700  71700  71100  71100  70700
## [201]  70100  69700  69100  68400  67500  66600  66200  64400  64300  63300
## [211]  62400  60600  60400  60200  59200  57800  55500  54900  53900  51000
## [221] 134000 131000 126000 126000 124000 120000 114000 114000 114000 111000
## [231] 110000 110000 110000 109000 108000 107000 107000 107000 107000 106000
## [241] 105000 105000 104000 103000 102000 101000  99900  97900  96700  96500
## [251]  95900  95800  95600  94600  94200  94200  93900  93500  93400  93200
## [261]  93000  92800  92700  92200  91800  89900  89700  89200  88900  88800
## [271]  88600  88200  87400  86600  86400  85900  85800  85800  85700  85300
## [281]  85200  84700  84600  84400  84200  83900  83700  83500  83400  82900
## [291]  82800  82700  81700  81300  81000  80300  80000  79200  78900  78700
## [301]  78300  78200  77800  77700  76700  76500  76200  75300  74600  74400
## [311]  74000  74000  72600  72300  72100  70300  69700  66200  63600  62600
salaries$Mid_Career_Median_salary <- salaries$`Mid-Career Median Salary`

salaries$school <- stringi::stri_escape_unicode(salaries$school)
salaries$school  <- gsub("\\u00c2","",salaries$school)
salaries$school  <- gsub("[\\]","",salaries$school)
library(plotly)
## Warning: package 'plotly' was built under R version 3.3.2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggmap':
## 
##     wind
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
states <- map_data("state")
## Warning: package 'maps' was built under R version 3.3.3
yor_col <- brewer.pal(6, "YlOrRd")

p <- ggplot(salaries[-c(59, 55), ]) +
    geom_polygon(aes(x = long, y = lat, group = group),
                 data = states, fill = "black",
                 color = "white") +
    geom_point(aes(x = lon, y = lat,
                   color = Mid_Career_Median_salary,text = school))+
  scale_color_gradientn(name = "Starting\nSalary",
                          colors = (yor_col),
                          labels = "Mid_Career_Median_salary",
                        breaks = "Mid_Career_Median_salary")+
    coord_fixed(1.3) +
    guides(size = FALSE) +
    theme_bw() +
    theme(axis.text = element_blank(),
          axis.line = element_blank(),
          axis.ticks = element_blank(),
          panel.border = element_blank(),
          panel.grid = element_blank(),
          axis.title = element_blank())
## Warning: Ignoring unknown aesthetics: text
ggplotly(p, tooltip = c("text", "Mid_Career_Median_salary"),
         width = 800, height = 500)