Lyme Disease Cases

lyme_count_by_county <- read.csv(("lyme_count_by_county.csv"), header = TRUE, stringsAsFactors = FALSE)
lyme_incidence_rates_by_state <- read.csv(("lyme_incidence_rates_by_state.csv"), header = TRUE, stringsAsFactors = FALSE)
lyme_reported_cases_by_state <- read.csv(("lyme_reported_cases_by_state.csv"), header = TRUE, stringsAsFactors = FALSE)
maine_lyme_count <- read.csv(("maine_lyme_count.csv"), header = TRUE, stringsAsFactors = FALSE)

Loading in the lyme .csv files I found

# Convert to continental US states only and remove STCODE and CTYCODE
lyme_count_by_county <- subset(lyme_count_by_county, STNAME != "Alaska", STNAME != "Hawaii") %>% select(-STCODE, -CTYCODE)
# Convert STNAME from full state name to state abbreviation
lyme_count_by_county$ABB <- state.abb[match(lyme_count_by_county$STNAME, state.name)]
# increase maximum printed to 20000
options(max.print = 20000)

I tried to take the coordinates from the county.map data and join it to my .csv file

data("county.map")
county_map <- as.data.frame(county.map)
county_map$CTYNAME <- paste(county_map$NAME, county_map$LSAD)
lyme_count_county_map <- lyme_count_by_county %>% inner_join(county_map)

## Joining, by = "CTYNAME"

## Then I filtered out the data where there were no cases of lyme disease. This brought the number of observations down from 97253 to 46868.
lyme_count_county_map <- lyme_count_county_map %>% filter(Cases2000 > 0, Cases2001 > 0, Cases2002 > 0, Cases2003 > 0, Cases2004 > 0, Cases2005 > 0, Cases2006 > 0, Cases2007 > 0, Cases2008 > 0, Cases2009 > 0, Cases2010 > 0, Cases2011 > 0, Cases2012 > 0, Cases2013 > 0, Cases2014 > 0)

I removed all of the duplicate values, since the data is for counties and this brought the observations down to 243.

lyme_county_count <- subset(lyme_count_county_map, !duplicated(lyme_count_county_map[,2]))

lyme_county_count_map <- leaflet() %>% addTiles() %>% addMarkers(lng = lyme_county_count$long, lat = lyme_county_count$lat)
lyme_county_count_map

I was looking at which counties had the maximum number of lyme cases in 2005 and 2014 in Maine.

maine_lyme_2014 <- maine_lyme_count[-17 , ] %>% select(County, X2014.Count) %>% arrange(desc(X2014.Count))
maine_lyme_2005 <- maine_lyme_count[-17 , ] %>% select(County, X2005.Count) %>% arrange(desc(X2005.Count))
pander(tbl_df(maine_lyme_2014))

County	X2014.Count
Cumberland	337
York	262
Kennebec	136
Hancock	119
Knox	106
Androscoggin	93
Lincoln	79
Sagadahoc	63
Penobscot	50
Waldo	47
Oxford	42
Somerset	17
Washington	14
Franklin	9
Aroostook	5
Piscataquis	2

pander(tbl_df(maine_lyme_2005))

County	X2005.Count
York	97
Cumberland	70
Lincoln	18
Knox	16
Kennebec	12
Hancock	7
Sagadahoc	7
Penobscot	6
Androscoggin	5
Oxford	3
Aroostook	2
Somerset	1
Waldo	1
Franklin	0
Piscataquis	0
Washington	0

Then I was trying to compare that to the rest of the country.

max(maine_lyme_2014$X2014.Count)

## [1] 337

us_lyme_count_2000 <- lyme_county_count %>% select(STNAME, CTYNAME, Cases2000, long, lat, region) %>% arrange(desc(Cases2000))
us_lyme_count_2005 <- lyme_county_count %>% select(STNAME, CTYNAME, Cases2005, long, lat, region) %>% arrange(desc(Cases2005))
us_lyme_count_2014 <- lyme_county_count %>% select(STNAME, CTYNAME, Cases2014, long, lat, region) %>% arrange(desc(Cases2014))
kable(head(us_lyme_count_2014))

STNAME	CTYNAME	Cases2014	long	lat	region
Pennsylvania	Allegheny County	822	-79.70162	40.52545	42003
Massachusetts	Plymouth County	789	-96.52326	42.90906	19149
Massachusetts	Norfolk County	648	-71.49826	42.01722	25021
Massachusetts	Bristol County	628	-71.22480	41.71050	44001
Connecticut	New Haven County	459	-73.15573	41.51528	9009
Massachusetts	Essex County	459	-74.16480	40.75472	34013

kable(head(us_lyme_count_2005))

STNAME	CTYNAME	Cases2005	long	lat	region
New York	Dutchess County	1398	-73.95331	41.58998	36027
Pennsylvania	Chester County	670	-75.36107	40.06554	42029
New York	Orange County	536	-93.90145	30.24267	48361
New Jersey	Morris County	524	-96.50156	38.86970	20127
Pennsylvania	Bucks County	500	-75.10688	40.19228	42017
New York	Westchester County	458	-73.51462	41.19843	36119
I found the max	imums and their corre	sponding coo	rdinates

us_2000_max <- geocode("Fairfield County")

## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Fairfield%20County&sensor=false

us_2000_max_map <- leaflet() %>% addTiles() %>% addMarkers(lng = us_2000_max$lon, lat = us_2000_max$lat)
us_2000_max_map

us_2014_max <- geocode("Allegheny County")

## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Allegheny%20County&sensor=false

us_2014_max_map <- leaflet() %>% addTiles() %>% addMarkers(lng = us_2014_max$lon, lat = us_2014_max$lat)
us_2014_max_map

us_2005_2014_max <- leaflet() %>% addProviderTiles("Stamen.Toner") %>% addMarkers(lng = us_2000_max$lon, lat = us_2000_max$lat) %>% addMarkers(lng = us_2014_max$lon, lat = us_2014_max$lat)
us_2000_2014_max <- leaflet() %>% addProviderTiles("Stamen.Toner") %>% addMarkers(lng = us_2000_max$lon, lat = us_2000_max$lat, popup = "Hartford Connecticut") %>% addMarkers(lng = us_2014_max$lon, lat = us_2014_max$lat, popup = "Harrisburg Pennsylvania")
us_2000_2014_max

Map of US counties

us_counties <- readOGR("./cb_2015_us_county_20m", "cb_2015_us_county_20m")

## OGR data source with driver: ESRI Shapefile 
## Source: "./cb_2015_us_county_20m", layer: "cb_2015_us_county_20m"
## with 3220 features
## It has 9 fields

us_counties <- spTransform(us_counties, CRS("+proj=longlat +datum=WGS84"))
leaflet() %>% addProviderTiles("CartoDB.Positron") %>% addPolygons( data = us_counties, popup = ~NAME)

Assignment 3

Meryn Lounsbury

October 23, 2016

Lyme Disease Cases