Lyme Disease Cases

lyme_count_by_county <- read.csv(("lyme_count_by_county.csv"), header = TRUE, stringsAsFactors = FALSE)
lyme_incidence_rates_by_state <- read.csv(("lyme_incidence_rates_by_state.csv"), header = TRUE, stringsAsFactors = FALSE)
lyme_reported_cases_by_state <- read.csv(("lyme_reported_cases_by_state.csv"), header = TRUE, stringsAsFactors = FALSE)
maine_lyme_count <- read.csv(("maine_lyme_count.csv"), header = TRUE, stringsAsFactors = FALSE)

Loading in the lyme .csv files I found

# Convert to continental US states only and remove STCODE and CTYCODE
lyme_count_by_county <- subset(lyme_count_by_county, STNAME != "Alaska", STNAME != "Hawaii") %>% select(-STCODE, -CTYCODE)
# Convert STNAME from full state name to state abbreviation
lyme_count_by_county$ABB <- state.abb[match(lyme_count_by_county$STNAME, state.name)]
# increase maximum printed to 20000
options(max.print = 20000)

I tried to take the coordinates from the county.map data and join it to my .csv file

data("county.map")
county_map <- as.data.frame(county.map)
county_map$CTYNAME <- paste(county_map$NAME, county_map$LSAD)
lyme_count_county_map <- lyme_count_by_county %>% inner_join(county_map)
## Joining, by = "CTYNAME"
## Then I filtered out the data where there were no cases of lyme disease. This brought the number of observations down from 97253 to 46868.
lyme_count_county_map <- lyme_count_county_map %>% filter(Cases2000 > 0, Cases2001 > 0, Cases2002 > 0, Cases2003 > 0, Cases2004 > 0, Cases2005 > 0, Cases2006 > 0, Cases2007 > 0, Cases2008 > 0, Cases2009 > 0, Cases2010 > 0, Cases2011 > 0, Cases2012 > 0, Cases2013 > 0, Cases2014 > 0)

I removed all of the duplicate values, since the data is for counties and this brought the observations down to 243.

lyme_county_count <- subset(lyme_count_county_map, !duplicated(lyme_count_county_map[,2]))
lyme_county_count_map <- leaflet() %>% addTiles() %>% addMarkers(lng = lyme_county_count$long, lat = lyme_county_count$lat)
lyme_county_count_map

I was looking at which counties had the maximum number of lyme cases in 2005 and 2014 in Maine.

maine_lyme_2014 <- maine_lyme_count[-17 , ] %>% select(County, X2014.Count) %>% arrange(desc(X2014.Count))
maine_lyme_2005 <- maine_lyme_count[-17 , ] %>% select(County, X2005.Count) %>% arrange(desc(X2005.Count))
pander(tbl_df(maine_lyme_2014))
County X2014.Count
Cumberland 337
York 262
Kennebec 136
Hancock 119
Knox 106
Androscoggin 93
Lincoln 79
Sagadahoc 63
Penobscot 50
Waldo 47
Oxford 42
Somerset 17
Washington 14
Franklin 9
Aroostook 5
Piscataquis 2
pander(tbl_df(maine_lyme_2005))
County X2005.Count
York 97
Cumberland 70
Lincoln 18
Knox 16
Kennebec 12
Hancock 7
Sagadahoc 7
Penobscot 6
Androscoggin 5
Oxford 3
Aroostook 2
Somerset 1
Waldo 1
Franklin 0
Piscataquis 0
Washington 0

Then I was trying to compare that to the rest of the country.

max(maine_lyme_2014$X2014.Count)
## [1] 337
us_lyme_count_2000 <- lyme_county_count %>% select(STNAME, CTYNAME, Cases2000, long, lat, region) %>% arrange(desc(Cases2000))
us_lyme_count_2005 <- lyme_county_count %>% select(STNAME, CTYNAME, Cases2005, long, lat, region) %>% arrange(desc(Cases2005))
us_lyme_count_2014 <- lyme_county_count %>% select(STNAME, CTYNAME, Cases2014, long, lat, region) %>% arrange(desc(Cases2014))
kable(head(us_lyme_count_2014))
STNAME CTYNAME Cases2014 long lat region
Pennsylvania Allegheny County 822 -79.70162 40.52545 42003
Massachusetts Plymouth County 789 -96.52326 42.90906 19149
Massachusetts Norfolk County 648 -71.49826 42.01722 25021
Massachusetts Bristol County 628 -71.22480 41.71050 44001
Connecticut New Haven County 459 -73.15573 41.51528 9009
Massachusetts Essex County 459 -74.16480 40.75472 34013
kable(head(us_lyme_count_2005))
STNAME CTYNAME Cases2005 long lat region
New York Dutchess County 1398 -73.95331 41.58998 36027
Pennsylvania Chester County 670 -75.36107 40.06554 42029
New York Orange County 536 -93.90145 30.24267 48361
New Jersey Morris County 524 -96.50156 38.86970 20127
Pennsylvania Bucks County 500 -75.10688 40.19228 42017
New York Westchester County 458 -73.51462 41.19843 36119
I found the max imums and their corre sponding coo rdinates
us_2000_max <- geocode("Fairfield County")
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Fairfield%20County&sensor=false
us_2000_max_map <- leaflet() %>% addTiles() %>% addMarkers(lng = us_2000_max$lon, lat = us_2000_max$lat)
us_2000_max_map
us_2014_max <- geocode("Allegheny County")
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Allegheny%20County&sensor=false
us_2014_max_map <- leaflet() %>% addTiles() %>% addMarkers(lng = us_2014_max$lon, lat = us_2014_max$lat)
us_2014_max_map
us_2005_2014_max <- leaflet() %>% addProviderTiles("Stamen.Toner") %>% addMarkers(lng = us_2000_max$lon, lat = us_2000_max$lat) %>% addMarkers(lng = us_2014_max$lon, lat = us_2014_max$lat)
us_2000_2014_max <- leaflet() %>% addProviderTiles("Stamen.Toner") %>% addMarkers(lng = us_2000_max$lon, lat = us_2000_max$lat, popup = "Hartford Connecticut") %>% addMarkers(lng = us_2014_max$lon, lat = us_2014_max$lat, popup = "Harrisburg Pennsylvania")
us_2000_2014_max

Map of US counties

us_counties <- readOGR("./cb_2015_us_county_20m", "cb_2015_us_county_20m")
## OGR data source with driver: ESRI Shapefile 
## Source: "./cb_2015_us_county_20m", layer: "cb_2015_us_county_20m"
## with 3220 features
## It has 9 fields
us_counties <- spTransform(us_counties, CRS("+proj=longlat +datum=WGS84"))
leaflet() %>% addProviderTiles("CartoDB.Positron") %>% addPolygons( data = us_counties, popup = ~NAME)