R Leaflet 500 Cities

library(leaflet)

## Warning: package 'leaflet' was built under R version 3.4.4

library(dplyr)

## Warning: package 'dplyr' was built under R version 3.4.4

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

setwd ("E:/Stats")
geodata <- read.csv("500CitiesDiabetes.csv", header = T)
str(geodata)

## 'data.frame':    500 obs. of  24 variables:
##  $ Year                      : int  2016 2016 2016 2016 2016 2016 2016 2016 2016 2016 ...
##  $ StateAbbr                 : Factor w/ 51 levels "AK","AL","AR",..: 2 2 2 2 2 2 1 4 4 4 ...
##  $ StateDesc                 : Factor w/ 51 levels "Alabama","Alaska",..: 1 1 1 1 1 1 2 3 3 3 ...
##  $ CityName                  : Factor w/ 474 levels "Abilene","Akron",..: 42 180 183 265 267 437 12 27 72 157 ...
##  $ GeographicLevel           : Factor w/ 1 level "City": 1 1 1 1 1 1 1 1 1 1 ...
##  $ DataSource                : Factor w/ 1 level "BRFSS": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Category                  : Factor w/ 1 level "Health Outcomes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ UniqueID                  : int  107000 135896 137000 150000 151000 177256 203000 404720 412000 427400 ...
##  $ Measure                   : Factor w/ 1 level "Diagnosed diabetes among adults aged >=18 Years": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Data_Value_Unit           : Factor w/ 1 level "%": 1 1 1 1 1 1 1 1 1 1 ...
##  $ DataValueTypeID           : Factor w/ 1 level "AgeAdjPrv": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Data_Value_Type           : Factor w/ 1 level "Age-adjusted prevalence": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Data_Value                : num  16.7 8 11.9 14.5 13.9 13.9 8.4 11.2 8 7.3 ...
##  $ Low_Confidence_Limit      : num  16.6 7.9 11.7 14.4 13.8 13.7 8.3 11.1 7.9 7.2 ...
##  $ High_Confidence_Limit     : num  16.8 8.2 12 14.6 14.1 14.1 8.5 11.4 8.1 7.4 ...
##  $ Data_Value_Footnote_Symbol: logi  NA NA NA NA NA NA ...
##  $ Data_Value_Footnote       : logi  NA NA NA NA NA NA ...
##  $ PopulationCount           : Factor w/ 497 levels "1,197,816","1,307,402",..: 182 394 150 162 177 448 215 348 199 178 ...
##  $ GeoLocation               : Factor w/ 500 levels "(21.4588039305, -157.973296737)",..: 117 112 199 61 74 108 500 113 109 110 ...
##  $ CategoryID                : Factor w/ 1 level "HLTHOUT": 1 1 1 1 1 1 1 1 1 1 ...
##  $ MeasureId                 : Factor w/ 1 level "DIABETES": 1 1 1 1 1 1 1 1 1 1 ...
##  $ CityFIPS                  : int  107000 135896 137000 150000 151000 177256 203000 404720 412000 427400 ...
##  $ TractFIPS                 : logi  NA NA NA NA NA NA ...
##  $ Short_Question_Text       : Factor w/ 1 level "Diabetes": 1 1 1 1 1 1 1 1 1 1 ...

# geodata <- read.csv(file = "", header = TRUE, sep = "," )

geodata <- subset(geodata, select = c("CityName", "Data_Value","CityFIPS","PopulationCount","StateDesc","GeoLocation"))
colnames(geodata) <- c("CityName", "Data_Value","CityFIPS","PopulationCount","StateDesc","GeoLocation")
head(geodata$GeoLocation,3)

## [1] (33.5275663773, -86.7988174678) (33.3767602729, -86.8051937568)
## [3] (34.6989692671, -86.6387042882)
## 500 Levels: (21.4588039305, -157.973296737) ...

geodata$GeoLocation_Lat <- as.numeric(substr((geodata$GeoLocation), 2,14))

## Warning: NAs introduced by coercion

geodata$GeoLocation_Lng <- as.numeric(substr((geodata$GeoLocation), 16,28))

## Warning: NAs introduced by coercion

str(geodata)

## 'data.frame':    500 obs. of  8 variables:
##  $ CityName       : Factor w/ 474 levels "Abilene","Akron",..: 42 180 183 265 267 437 12 27 72 157 ...
##  $ Data_Value     : num  16.7 8 11.9 14.5 13.9 13.9 8.4 11.2 8 7.3 ...
##  $ CityFIPS       : int  107000 135896 137000 150000 151000 177256 203000 404720 412000 427400 ...
##  $ PopulationCount: Factor w/ 497 levels "1,197,816","1,307,402",..: 182 394 150 162 177 448 215 348 199 178 ...
##  $ StateDesc      : Factor w/ 51 levels "Alabama","Alaska",..: 1 1 1 1 1 1 2 3 3 3 ...
##  $ GeoLocation    : Factor w/ 500 levels "(21.4588039305, -157.973296737)",..: 117 112 199 61 74 108 500 113 109 110 ...
##  $ GeoLocation_Lat: num  33.5 33.4 34.7 30.7 32.3 ...
##  $ GeoLocation_Lng: num  -86.8 -86.8 -86.6 -88.1 -86.3 ...

head(geodata$GeoLocation_Lat,3)

## [1] 33.52757 33.37676 34.69897

head(geodata$GeoLocation_Lng,3)

## [1] -86.79882 -86.80519 -86.63870

geodata$popsep <- format(geodata$populationCount, scientific=FALSE)
geodata$metropop <- do.call(paste, c(geodata[c("CityName","StateDesc",  "PopulationCount","Data_Value")], sep = ", "))

my_map <- geodata %>%
        leaflet() %>%
        addTiles() %>%
        setView(lng = -95.7129, lat = 37.0902, zoom = 4) %>%
        addMarkers(clusterOptions = markerClusterOptions(), 
                   popup = strwrap(geodata$metropop), 
                   lng = (geodata$GeoLocation_Lng), 
                   lat = (geodata$GeoLocation_Lat))%>% 
        addTiles()

## Warning in validateCoords(lng, lat, funcName): Data contains 58 rows with
## either missing or invalid lat/lon values and will be ignored

my_map

R Leaflet 500 Cities

Thomas Marrone

February 4, 2019