This document fulfills Course 9 “Developing Data Products” Week 3 Assignment.
The course is part of the Data Science Specialization offered on Coursera by Johns Hopkins University.
I will replicate a map of smoking rates in the US, available at CDC website.
This code is for data loading, cleaning & pre-processing
library(dplyr)
# Download Data from link provided, .csv format file
CDC.data <- read.csv("/Users/francescoaldo/Downloads/CDC_Smoking_2018.csv",
header = TRUE)
# head(CDC.data, 5) ; tail(CDC.data, 5)
CDC.data <- CDC.data[-54, ] # last line void
# Get rid of DC, Guam, and Puerto Rico
CDC.data <- CDC.data[-c(9, 12, 41), ]
# Now a 50 x 5 data frame (1st col. is State Names)
# Match 2-letter code with corresponding state name
CDC.data$code <- state.abb[match(CDC.data$Location, state.name)]
Here I create the “hover labels” which will appear in the map
CDC.data$hover <- with(CDC.data,
paste("", Location, '<br>',
"Smoking Rate", Data_Value, "%", '<br>',
"Lower Bound", Low_Confidence_Limit, "%", '<br>',
"Upper Bound", High_Confidence_Limit, "%", '<br>',
"N° Obs.", Sample_Size
)
)
This code generates some of the settings that will be called as arguments when generating the choropleth map.
library(plotly)
# Make state borders white
l <- list(color = toRGB("white"), width = 2)
# Set up mapping options
g <- list(
scope = 'usa',
projection = list(type = 'albers usa'),
showlakes = TRUE,
lakecolor = toRGB('white')
)
library(plotly)
library(dplyr)
library(webshot)
p <- plot_ly(z = CDC.data$Data_Value, text = CDC.data$hover,
locations = CDC.data$code, type = 'choropleth',
locationmode = 'USA-states', color = CDC.data$Data_Value,
colors = 'Blues', marker = list(line = l)) %>%
layout(title = 'Smoking Rates in the US (CDC 2018)<br>
(hover for details) ', geo = g)
p