# Visualizing Median Income For All 3,000+ U.S. Counties
# Inspiration and dataset: http://www.visualcapitalist.com/interactive-median-income-u-s-counties/
# Asked myself - can I reproduce it?
# Install packages
library(ggplot2)
library(ggthemes)
library(dplyr)
library(choroplethr)
library(choroplethrMaps)
library(plotly)
library(RColorBrewer)
library(devtools)
library(mapproj)
library(maptools)
library(sp)
library(gridExtra)
library(leaflet)
library(rgdal)
# Set directory
setwd("C:/DC/R/Cool datasets/Median income by county and state")
# Read dataset
income <- read.csv("Median income by county and state.csv", header = TRUE)
# Group and summarise state and count via dplyr
income.by.state <- income %>% group_by(State)
# Create a box plot, reversing y-axis.
income.boxplot <- ggplot(income, aes(x = State, y = Estimate)) +
geom_boxplot(fill = "darkorange", alpha = 0.5) +
coord_flip() + scale_x_discrete(limits = rev(levels(income$Estimate))) +
labs(title = "Median household income by state",
subtitle = "Source: 2011-2015 American Community Survey") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(x = "State",
y = "Median household income",
fill = "Median household income") +
theme_fivethirtyeight() +
theme(axis.line.x = element_line(size = .5, colour = "black"),
axis.title = element_text(size = 14),
legend.position = "right",
legend.direction = "vertical",
legend.box = "vertical",
legend.key.size = unit(0.7, "cm"),
legend.text = element_text(size = 10),
text = element_text(family = "OfficinaSanITC-Book"),
plot.title = element_text(family = "OfficinaSanITC-Book"))
income.boxplot

# Map by state
states <- readOGR(dsn = "C:/DC/R/Cool datasets/Median income by county and state",
layer = "cb_2016_us_state_500k",
encoding = "UTF-8", verbose = FALSE)
# Merge data
# require(sp)! For spatial dataframe!
View(states)
View(income.by.state)
# Create a mean, summarize county data by state
income.by.state <- income %>% group_by(State) %>% summarize(state_mean = mean(Estimate))
round(income.by.state$state_mean, digits = 2)
## [1] 37973.13 64197.10 43252.20 36626.48 56013.16 51263.19 71184.12
## [8] 70848.00 58067.67 44046.48 40704.91 64879.00 43607.75 50163.44
## [15] 48745.40 50483.12 47322.21 39137.30 41411.78 46141.75 69200.38
## [22] 65974.43 44464.99 53926.99 34938.93 41755.40 44497.02 48646.13
## [29] 53689.71 60648.90 73014.10 40183.67 55275.69 41784.20 55574.87
## [36] 48446.41 44097.38 45171.22 50316.25 65783.40 39756.70 47356.85
## [43] 40168.03 46745.78 54687.03 52653.50 53083.70 50217.08 39411.82
## [50] 50649.00 57042.30
income.merged <- merge(states, income.by.state, by.x = "NAME", by.y = "State")
class(income.merged)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
income.merged$state_mean <- as.numeric(as.character(income.merged$state_mean))
pal <- colorBin("Oranges", c(20000, 120000), na.color = "#808080",
alpha = FALSE, reverse = FALSE)
# Create a pop-up
state_popup <- paste0("<strong>State: </strong>",
income.merged$NAME,
"<br><strong>Median household income ($): </strong>",
income.merged$state_mean)
income.map <- leaflet(data = income.merged) %>%
addPolygons(fillColor = ~pal(income.merged$state_mean),
popup = state_popup,
fillOpacity = 0.8,
color = "#BDBDC3",
weight = 1) %>%
addLegend("bottomright",
pal = pal, values = income.merged$state_mean,
title = "Median household income ($)",
opacity = 1)
income.map
# Conclusion:
# So, I faced two fundamental problems here:
# Boxplots cannot have gradient palette in ggplot (according to H.Wickham tweet)
# I do not know how to match county shape file with data,
# if my county data does not contain FIPS codes.
# Please tweet me @OleksiyAnokhin, if you know how to solve it.
# Stackoverflow attempt
# https://stackoverflow.com/questions/45830150/fixing-problems-with-ggplot-palette-how-to-create-a-gradient-boxplot