Set Working Directory
setwd("C:/Users/Stephania/Google Drive/Applied Epi Using R/Project2/")
getwd()
## [1] "C:/Users/Stephania/Google Drive/Applied Epi Using R/Project2"
step one: import data
library(maps)
##
## # ATTENTION: maps v3.0 has an updated 'world' map. #
## # Many country borders and names have changed since 1990. #
## # Type '?world' or 'news(package="maps")'. See README_v3. #
library(leaflet)
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data(countyMapEnv) #load maps package data
#load dummy data: this file includes unique entries from 400 hypothetical survey respondents from all over the state.
dummyData <- read.csv("./DummyData.csv")
#show summary of dummyData
summary(dummyData)
## ID RespDate County CertStatus
## Min. : 1.0 11/18/2016: 8 Alameda : 21 Min. :0.000
## 1st Qu.:100.8 10/27/2016: 6 Los Angeles : 21 1st Qu.:0.000
## Median :200.5 10/13/2016: 5 Riverside : 21 Median :0.000
## Mean :200.5 10/20/2016: 5 Sacramento : 21 Mean :0.455
## 3rd Qu.:300.2 11/12/2016: 5 San Diego : 21 3rd Qu.:1.000
## Max. :400.0 11/5/2016 : 5 San Francisco: 21 Max. :1.000
## (Other) :366 (Other) :274
#summarize by County: to match with county reponse goals and prep for the map, responses will need to be summarized into county-level data
sumByCounty <- tapply(dummyData$ID, dummyData$County, length)
sumByCounty <- as.data.frame(sumByCounty)
sumByCounty$NAME <- row.names(sumByCounty)
sumByCounty
## sumByCounty NAME
## Alameda 21 Alameda
## Alpine 6 Alpine
## Amador 6 Amador
## Butte 6 Butte
## Calaveras 6 Calaveras
## Colusa 6 Colusa
## Contra Costa 6 Contra Costa
## Del Norte 6 Del Norte
## El Dorado 6 El Dorado
## Fresno 8 Fresno
## Glenn 8 Glenn
## Humboldt 8 Humboldt
## Imperial 8 Imperial
## Inyo 8 Inyo
## Kern 8 Kern
## Kings 8 Kings
## Lake 8 Lake
## Lassen 8 Lassen
## Los Angeles 21 Los Angeles
## Madera 5 Madera
## Marin 5 Marin
## Mariposa 5 Mariposa
## Mendocino 5 Mendocino
## Merced 5 Merced
## Modoc 5 Modoc
## Mono 8 Mono
## Monterey 8 Monterey
## Napa 8 Napa
## Nevada 8 Nevada
## Orange 8 Orange
## Placer 8 Placer
## Plumas 8 Plumas
## Riverside 21 Riverside
## Sacramento 21 Sacramento
## San Benito 8 San Benito
## San Bernardino 8 San Bernardino
## San Diego 21 San Diego
## San Francisco 21 San Francisco
## San Joaquin 3 San Joaquin
## San Luis Obispo 3 San Luis Obispo
## San Mateo 3 San Mateo
## Santa Barbara 3 Santa Barbara
## Santa Clara 3 Santa Clara
## Santa Cruz 3 Santa Cruz
## Shasta 3 Shasta
## Sierra 3 Sierra
## Siskiyou 3 Siskiyou
## Solano 3 Solano
## Sonoma 3 Sonoma
## Stanislaus 3 Stanislaus
## Sutter 2 Sutter
## Tehama 2 Tehama
## Trinity 2 Trinity
## Tulare 2 Tulare
## Tuolumne 2 Tuolumne
## Ventura 2 Ventura
## Yolo 2 Yolo
## Yuba 2 Yuba
#Now that the dataframe is in the correct format, merge in the target numbers for each county--counties often have unique target numbers.
goal <- read.csv("./CACountyGoal.csv")
names(goal) <- c("NAME", "Goal")
goal
## NAME Goal
## 1 Alameda 25
## 2 Alpine 10
## 3 Amador 10
## 4 Butte 10
## 5 Calaveras 10
## 6 Colusa 10
## 7 Contra Costa 10
## 8 Del Norte 10
## 9 El Dorado 10
## 10 Fresno 10
## 11 Glenn 10
## 12 Humboldt 10
## 13 Imperial 10
## 14 Inyo 10
## 15 Kern 15
## 16 Kings 10
## 17 Lake 10
## 18 Lassen 10
## 19 Los Angeles 30
## 20 Madera 10
## 21 Marin 10
## 22 Mariposa 10
## 23 Mendocino 10
## 24 Merced 10
## 25 Modoc 10
## 26 Mono 10
## 27 Monterey 10
## 28 Napa 10
## 29 Nevada 10
## 30 Orange 10
## 31 Placer 10
## 32 Plumas 10
## 33 Riverside 30
## 34 Sacramento 25
## 35 San Benito 10
## 36 San Bernardino 15
## 37 San Diego 25
## 38 San Francisco 20
## 39 San Joaquin 10
## 40 San Luis Obispo 10
## 41 San Mateo 10
## 42 Santa Barbara 15
## 43 Santa Clara 10
## 44 Santa Cruz 10
## 45 Shasta 10
## 46 Sierra 10
## 47 Siskiyou 10
## 48 Solano 10
## 49 Sonoma 10
## 50 Stanislaus 15
## 51 Sutter 10
## 52 Tehama 10
## 53 Trinity 10
## 54 Tulare 10
## 55 Tuolumne 10
## 56 Ventura 10
## 57 Yolo 10
## 58 Yuba 10
sumByCounty <- merge(sumByCounty,goal,by="NAME")
sumByCounty
## NAME sumByCounty Goal
## 1 Alameda 21 25
## 2 Alpine 6 10
## 3 Amador 6 10
## 4 Butte 6 10
## 5 Calaveras 6 10
## 6 Colusa 6 10
## 7 Contra Costa 6 10
## 8 Del Norte 6 10
## 9 El Dorado 6 10
## 10 Fresno 8 10
## 11 Glenn 8 10
## 12 Humboldt 8 10
## 13 Imperial 8 10
## 14 Inyo 8 10
## 15 Kern 8 15
## 16 Kings 8 10
## 17 Lake 8 10
## 18 Lassen 8 10
## 19 Los Angeles 21 30
## 20 Madera 5 10
## 21 Marin 5 10
## 22 Mariposa 5 10
## 23 Mendocino 5 10
## 24 Merced 5 10
## 25 Modoc 5 10
## 26 Mono 8 10
## 27 Monterey 8 10
## 28 Napa 8 10
## 29 Nevada 8 10
## 30 Orange 8 10
## 31 Placer 8 10
## 32 Plumas 8 10
## 33 Riverside 21 30
## 34 Sacramento 21 25
## 35 San Benito 8 10
## 36 San Bernardino 8 15
## 37 San Diego 21 25
## 38 San Francisco 21 20
## 39 San Joaquin 3 10
## 40 San Luis Obispo 3 10
## 41 San Mateo 3 10
## 42 Santa Barbara 3 15
## 43 Santa Clara 3 10
## 44 Santa Cruz 3 10
## 45 Shasta 3 10
## 46 Sierra 3 10
## 47 Siskiyou 3 10
## 48 Solano 3 10
## 49 Sonoma 3 10
## 50 Stanislaus 3 15
## 51 Sutter 2 10
## 52 Tehama 2 10
## 53 Trinity 2 10
## 54 Tulare 2 10
## 55 Tuolumne 2 10
## 56 Ventura 2 10
## 57 Yolo 2 10
## 58 Yuba 2 10
#Now calculate percentage of county goal complete
sumByCounty$progress <- round(((sumByCounty$sumByCounty/sumByCounty$Goal)*100), digits=0)
sumByCounty
## NAME sumByCounty Goal progress
## 1 Alameda 21 25 84
## 2 Alpine 6 10 60
## 3 Amador 6 10 60
## 4 Butte 6 10 60
## 5 Calaveras 6 10 60
## 6 Colusa 6 10 60
## 7 Contra Costa 6 10 60
## 8 Del Norte 6 10 60
## 9 El Dorado 6 10 60
## 10 Fresno 8 10 80
## 11 Glenn 8 10 80
## 12 Humboldt 8 10 80
## 13 Imperial 8 10 80
## 14 Inyo 8 10 80
## 15 Kern 8 15 53
## 16 Kings 8 10 80
## 17 Lake 8 10 80
## 18 Lassen 8 10 80
## 19 Los Angeles 21 30 70
## 20 Madera 5 10 50
## 21 Marin 5 10 50
## 22 Mariposa 5 10 50
## 23 Mendocino 5 10 50
## 24 Merced 5 10 50
## 25 Modoc 5 10 50
## 26 Mono 8 10 80
## 27 Monterey 8 10 80
## 28 Napa 8 10 80
## 29 Nevada 8 10 80
## 30 Orange 8 10 80
## 31 Placer 8 10 80
## 32 Plumas 8 10 80
## 33 Riverside 21 30 70
## 34 Sacramento 21 25 84
## 35 San Benito 8 10 80
## 36 San Bernardino 8 15 53
## 37 San Diego 21 25 84
## 38 San Francisco 21 20 105
## 39 San Joaquin 3 10 30
## 40 San Luis Obispo 3 10 30
## 41 San Mateo 3 10 30
## 42 Santa Barbara 3 15 20
## 43 Santa Clara 3 10 30
## 44 Santa Cruz 3 10 30
## 45 Shasta 3 10 30
## 46 Sierra 3 10 30
## 47 Siskiyou 3 10 30
## 48 Solano 3 10 30
## 49 Sonoma 3 10 30
## 50 Stanislaus 3 15 20
## 51 Sutter 2 10 20
## 52 Tehama 2 10 20
## 53 Trinity 2 10 20
## 54 Tulare 2 10 20
## 55 Tuolumne 2 10 20
## 56 Ventura 2 10 20
## 57 Yolo 2 10 20
## 58 Yuba 2 10 20
Making the map
#Example of a leaflet map
library(rgdal)
## Loading required package: sp
## rgdal: version: 1.1-1, (SVN revision 572)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 1.11.2, released 2015/02/10
## Path to GDAL shared files: C:/Users/Stephania/Documents/R/win-library/3.2/rgdal/gdal
## GDAL does not use iconv for recoding strings.
## Loaded PROJ.4 runtime: Rel. 4.9.1, 04 March 2015, [PJ_VERSION: 491]
## Path to PROJ.4 shared files: C:/Users/Stephania/Documents/R/win-library/3.2/rgdal/proj
## Linking to sp version: 1.2-1
#loading shapefile
counties <- readOGR("./shapefiles", layer="cb_2014_us_county_20m")
## OGR data source with driver: ESRI Shapefile
## Source: "./shapefiles", layer: "cb_2014_us_county_20m"
## with 3220 features
## It has 9 fields
## Warning in readOGR("./shapefiles", layer = "cb_2014_us_county_20m"): Z-
## dimension discarded
#filtering for only california
counties <- subset(counties, counties@data$STATEFP=="06")
#making a leaflet map of california counties!
leaflet() %>% addTiles() %>% addPolygons(data=counties)
#merging in the data into this shapefile
counties@data = data.frame(counties@data, sumByCounty[match(counties@data[,"NAME"], sumByCounty[,"NAME"]),])
#set color palette
colorRamp <- colorRamp(c("#2c7fb8","#7fcdbb","#edf8b1"), interpolate="spline")
palette <- colorNumeric(colorRamp, counties@data$progress)
leaflet() %>% addProviderTiles("Stamen.TonerLite") %>%
addPolygons(
weight= 2,
stroke = TRUE,
fillOpacity = .65,
data=counties,
color = ~palette(progress),
popup = ~paste("<strong>County:</strong>",NAME,
"<br>",
"<strong>Total Responses:</strong>",sumByCounty,
"<br>",
"<strong>Complete:</strong>",progress,"<strong>%</strong>")
) %>% addLegend(title = "Response <br> Goal Met", pal = palette, values = counties@data$progress, bins=5, opacity = 1, position="topright", labFormat = labelFormat(suffix = '%'))