US Cities by population

Benjamin Rouillé d’Orfeuil

November 4, 2016

Getting the Data

library(plotly)
library(XML)
library(httr)
library(ggmap)
url <- "https://en.wikipedia.org/wiki/List_of_United_States_cities_by_population"
tables <- GET(url)
tables <- readHTMLTable(rawToChar(tables$content) )
tables.size <- unlist(lapply(tables, function(t) dim(t)[1]) )

data <- tables[[which.max(tables.size)]]

Building the Data Frame

data <- data[,1:4]
names(data) <- c("rank", "city", "state", "population")
data$rank <- as.numeric(gsub("-\\(T\\)","", data$rank) )
data$city <- gsub("\\[[0-9]+\\]", "", data$city); data$city[88] <- "Winstond-Salem"
data$state <- as.character(data$state)
data$population <- as.numeric(gsub(",","", data$pop) )
data$quantile <- with(data, cut(population, quantile(population, include.lowest = TRUE) ) )
levels(data$quantile) <- paste(c("1st", "2nd", "3rd", "4th", "5th"), "quantile")
data$quantile <- as.ordered(data$q)
coordinates <- geocode(data$city, messaging = FALSE)
data$lon <- coordinates$lon
data$lat <- coordinates$lat

Building the Map

g <- list(scope = 'usa', projection = list(type = 'albers usa'), showland = TRUE, 
          landcolor = toRGB("gray90"), showlakes = TRUE, subunitwidth = 1, 
          countrywidth = 1, subunitcolor = toRGB("black"), 
          countrycolor = toRGB("black") )
p <- plot_geo(data, locationmode = 'USA-states', width = 1000, sizes = c(1, 1000) ) %>% 
    add_markers(x = ~lon, y = ~lat, size = ~population, color = ~quantile, 
                hoverinfo = "text", 
                text = ~paste(data$city,"<br />", data$population/1e6," million") ) %>% 
    layout(title = '2015 US city populations<br>(Click legend to toggle)', geo = g)

Plotting the Map