Data Preparation
library(XML)
## Warning: package 'XML' was built under R version 3.3.2
library(RCurl)
## Warning: package 'RCurl' was built under R version 3.3.2
## Loading required package: bitops
## Warning: package 'bitops' was built under R version 3.3.2
library(ggmap)
## Warning: package 'ggmap' was built under R version 3.3.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.3.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.3.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(RColorBrewer)
Sys.setenv("LANGUAGE"="EN")
url <- getURL("http://online.wsj.com/public/resources/documents/info-Salaries_for_Colleges_by_Region-sort.html")
data <- readHTMLTable(url)
salaries <- data$mySortableTable
salaries$`School Name`<- as.character(salaries$`School Name`)
salaries$school <- salaries$`School Name`
salaries$starting <- salaries$`Starting Median Salary`
Text Cleaning
salaries$`Mid-Career Median Salary` <- stringi::stri_escape_unicode(salaries$`Mid-Career Median Salary`)
salaries$`Mid-Career Median Salary` <- as.character(salaries$`Mid-Career Median Salary`)
salaries$`Mid-Career Median Salary` <- gsub("\\u00c2","",salaries$`Mid-Career Median Salary`)
salaries$`Mid-Career Median Salary` <- gsub("[\\]","",salaries$`Mid-Career Median Salary`)
salaries$`Mid-Career Median Salary` <- gsub("[,]","",salaries$`Mid-Career Median Salary`)
salaries$`Mid-Career Median Salary` <- gsub("[.]","",salaries$`Mid-Career Median Salary`)
salaries$`Mid-Career Median Salary` <- gsub("[$]","",salaries$`Mid-Career Median Salary`)
salaries$`Mid-Career Median Salary` <- as.numeric(salaries$`Mid-Career Median Salary`)
salaries$`Mid-Career Median Salary` <- salaries$`Mid-Career Median Salary`/100
salaries$`Mid-Career Median Salary`
## [1] 129000 123000 122000 112000 105000 101000 101000 101000 101000 99600
## [11] 99600 96700 95600 95000 88100 87000 86400 85200 84700 84300
## [21] 84100 82400 81300 80400 72100 71400 71300 67500 106000 97600
## [31] 93400 88600 86100 85300 84700 84400 84100 83300 83200 82900
## [41] 82800 82000 81600 81500 81400 81100 80100 79500 79000 78700
## [51] 78700 78400 77500 76000 76000 75400 73800 73400 72600 72600
## [61] 71900 71600 70900 70900 69800 69500 67100 63900 56500 50600
## [71] 116000 113000 103000 97800 96500 96100 95900 95800 93400 93000
## [81] 90500 88400 88200 87800 87300 86200 85300 84800 84600 84200
## [91] 84000 83900 83700 83500 81700 81600 81600 81000 80900 80800
## [101] 80600 80600 79000 78500 78200 77800 76600 76100 75900 75500
## [111] 74700 74600 73800 73500 73400 73400 73100 72600 72500 72100
## [121] 72100 71400 71400 70300 69500 69300 68300 68200 67100 66400
## [131] 65800 64800 64500 64300 64000 62600 60600 58500 58200 46600
## [141] 43900 110000 110000 106000 106000 104000 104000 104000 103000 97900
## [151] 96100 95800 95400 95000 94600 93900 91600 90800 88700 88600
## [161] 87900 87800 86900 86000 86000 84700 84500 83600 83300 82900
## [171] 82800 82700 81800 81500 81400 80800 80700 80000 79900 79700
## [181] 79400 79300 78300 78100 78100 77800 76300 75500 74600 74500
## [191] 74000 74000 73800 73000 72100 71700 71700 71100 71100 70700
## [201] 70100 69700 69100 68400 67500 66600 66200 64400 64300 63300
## [211] 62400 60600 60400 60200 59200 57800 55500 54900 53900 51000
## [221] 134000 131000 126000 126000 124000 120000 114000 114000 114000 111000
## [231] 110000 110000 110000 109000 108000 107000 107000 107000 107000 106000
## [241] 105000 105000 104000 103000 102000 101000 99900 97900 96700 96500
## [251] 95900 95800 95600 94600 94200 94200 93900 93500 93400 93200
## [261] 93000 92800 92700 92200 91800 89900 89700 89200 88900 88800
## [271] 88600 88200 87400 86600 86400 85900 85800 85800 85700 85300
## [281] 85200 84700 84600 84400 84200 83900 83700 83500 83400 82900
## [291] 82800 82700 81700 81300 81000 80300 80000 79200 78900 78700
## [301] 78300 78200 77800 77700 76700 76500 76200 75300 74600 74400
## [311] 74000 74000 72600 72300 72100 70300 69700 66200 63600 62600
salaries$Mid_Career_Median_salary <- salaries$`Mid-Career Median Salary`
salaries$school <- stringi::stri_escape_unicode(salaries$school)
salaries$school <- gsub("\\u00c2","",salaries$school)
salaries$school <- gsub("[\\]","",salaries$school)
library(plotly)
## Warning: package 'plotly' was built under R version 3.3.2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggmap':
##
## wind
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
states <- map_data("state")
## Warning: package 'maps' was built under R version 3.3.3
yor_col <- brewer.pal(6, "YlOrRd")
p <- ggplot(salaries[-c(59, 55), ]) +
geom_polygon(aes(x = long, y = lat, group = group),
data = states, fill = "black",
color = "white") +
geom_point(aes(x = lon, y = lat,
color = Mid_Career_Median_salary,text = school))+
scale_color_gradientn(name = "Starting\nSalary",
colors = (yor_col),
labels = "Mid_Career_Median_salary",
breaks = "Mid_Career_Median_salary")+
coord_fixed(1.3) +
guides(size = FALSE) +
theme_bw() +
theme(axis.text = element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
panel.border = element_blank(),
panel.grid = element_blank(),
axis.title = element_blank())
## Warning: Ignoring unknown aesthetics: text
ggplotly(p, tooltip = c("text", "Mid_Career_Median_salary"),
width = 800, height = 500)