lyme_count_by_county <- read.csv(("lyme_count_by_county.csv"), header = TRUE, stringsAsFactors = FALSE)
lyme_incidence_rates_by_state <- read.csv(("lyme_incidence_rates_by_state.csv"), header = TRUE, stringsAsFactors = FALSE)
lyme_reported_cases_by_state <- read.csv(("lyme_reported_cases_by_state.csv"), header = TRUE, stringsAsFactors = FALSE)
maine_lyme_count <- read.csv(("maine_lyme_count.csv"), header = TRUE, stringsAsFactors = FALSE)
Loading in the lyme .csv files I found
# Convert to continental US states only and remove STCODE and CTYCODE
lyme_count_by_county <- subset(lyme_count_by_county, STNAME != "Alaska", STNAME != "Hawaii") %>% select(-STCODE, -CTYCODE)
# Convert STNAME from full state name to state abbreviation
lyme_count_by_county$ABB <- state.abb[match(lyme_count_by_county$STNAME, state.name)]
# increase maximum printed to 20000
options(max.print = 20000)
I tried to take the coordinates from the county.map data and join it to my .csv file
data("county.map")
county_map <- as.data.frame(county.map)
county_map$CTYNAME <- paste(county_map$NAME, county_map$LSAD)
lyme_count_county_map <- lyme_count_by_county %>% inner_join(county_map)
## Joining, by = "CTYNAME"
## Then I filtered out the data where there were no cases of lyme disease. This brought the number of observations down from 97253 to 46868.
lyme_count_county_map <- lyme_count_county_map %>% filter(Cases2000 > 0, Cases2001 > 0, Cases2002 > 0, Cases2003 > 0, Cases2004 > 0, Cases2005 > 0, Cases2006 > 0, Cases2007 > 0, Cases2008 > 0, Cases2009 > 0, Cases2010 > 0, Cases2011 > 0, Cases2012 > 0, Cases2013 > 0, Cases2014 > 0)
I removed all of the duplicate values, since the data is for counties and this brought the observations down to 243.
lyme_county_count <- subset(lyme_count_county_map, !duplicated(lyme_count_county_map[,2]))
lyme_county_count_map <- leaflet() %>% addTiles() %>% addMarkers(lng = lyme_county_count$long, lat = lyme_county_count$lat)
lyme_county_count_map
I was looking at which counties had the maximum number of lyme cases in 2005 and 2014 in Maine.
maine_lyme_2014 <- maine_lyme_count[-17 , ] %>% select(County, X2014.Count) %>% arrange(desc(X2014.Count))
maine_lyme_2005 <- maine_lyme_count[-17 , ] %>% select(County, X2005.Count) %>% arrange(desc(X2005.Count))
pander(tbl_df(maine_lyme_2014))
| County | X2014.Count |
|---|---|
| Cumberland | 337 |
| York | 262 |
| Kennebec | 136 |
| Hancock | 119 |
| Knox | 106 |
| Androscoggin | 93 |
| Lincoln | 79 |
| Sagadahoc | 63 |
| Penobscot | 50 |
| Waldo | 47 |
| Oxford | 42 |
| Somerset | 17 |
| Washington | 14 |
| Franklin | 9 |
| Aroostook | 5 |
| Piscataquis | 2 |
pander(tbl_df(maine_lyme_2005))
| County | X2005.Count |
|---|---|
| York | 97 |
| Cumberland | 70 |
| Lincoln | 18 |
| Knox | 16 |
| Kennebec | 12 |
| Hancock | 7 |
| Sagadahoc | 7 |
| Penobscot | 6 |
| Androscoggin | 5 |
| Oxford | 3 |
| Aroostook | 2 |
| Somerset | 1 |
| Waldo | 1 |
| Franklin | 0 |
| Piscataquis | 0 |
| Washington | 0 |
Then I was trying to compare that to the rest of the country.
max(maine_lyme_2014$X2014.Count)
## [1] 337
us_lyme_count_2000 <- lyme_county_count %>% select(STNAME, CTYNAME, Cases2000, long, lat, region) %>% arrange(desc(Cases2000))
us_lyme_count_2005 <- lyme_county_count %>% select(STNAME, CTYNAME, Cases2005, long, lat, region) %>% arrange(desc(Cases2005))
us_lyme_count_2014 <- lyme_county_count %>% select(STNAME, CTYNAME, Cases2014, long, lat, region) %>% arrange(desc(Cases2014))
kable(head(us_lyme_count_2014))
| STNAME | CTYNAME | Cases2014 | long | lat | region |
|---|---|---|---|---|---|
| Pennsylvania | Allegheny County | 822 | -79.70162 | 40.52545 | 42003 |
| Massachusetts | Plymouth County | 789 | -96.52326 | 42.90906 | 19149 |
| Massachusetts | Norfolk County | 648 | -71.49826 | 42.01722 | 25021 |
| Massachusetts | Bristol County | 628 | -71.22480 | 41.71050 | 44001 |
| Connecticut | New Haven County | 459 | -73.15573 | 41.51528 | 9009 |
| Massachusetts | Essex County | 459 | -74.16480 | 40.75472 | 34013 |
kable(head(us_lyme_count_2005))
| STNAME | CTYNAME | Cases2005 | long | lat | region |
|---|---|---|---|---|---|
| New York | Dutchess County | 1398 | -73.95331 | 41.58998 | 36027 |
| Pennsylvania | Chester County | 670 | -75.36107 | 40.06554 | 42029 |
| New York | Orange County | 536 | -93.90145 | 30.24267 | 48361 |
| New Jersey | Morris County | 524 | -96.50156 | 38.86970 | 20127 |
| Pennsylvania | Bucks County | 500 | -75.10688 | 40.19228 | 42017 |
| New York | Westchester County | 458 | -73.51462 | 41.19843 | 36119 |
| I found the max | imums and their corre | sponding coo | rdinates |
us_2000_max <- geocode("Fairfield County")
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Fairfield%20County&sensor=false
us_2000_max_map <- leaflet() %>% addTiles() %>% addMarkers(lng = us_2000_max$lon, lat = us_2000_max$lat)
us_2000_max_map
us_2014_max <- geocode("Allegheny County")
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Allegheny%20County&sensor=false
us_2014_max_map <- leaflet() %>% addTiles() %>% addMarkers(lng = us_2014_max$lon, lat = us_2014_max$lat)
us_2014_max_map
us_2005_2014_max <- leaflet() %>% addProviderTiles("Stamen.Toner") %>% addMarkers(lng = us_2000_max$lon, lat = us_2000_max$lat) %>% addMarkers(lng = us_2014_max$lon, lat = us_2014_max$lat)
us_2000_2014_max <- leaflet() %>% addProviderTiles("Stamen.Toner") %>% addMarkers(lng = us_2000_max$lon, lat = us_2000_max$lat, popup = "Hartford Connecticut") %>% addMarkers(lng = us_2014_max$lon, lat = us_2014_max$lat, popup = "Harrisburg Pennsylvania")
us_2000_2014_max
Map of US counties
us_counties <- readOGR("./cb_2015_us_county_20m", "cb_2015_us_county_20m")
## OGR data source with driver: ESRI Shapefile
## Source: "./cb_2015_us_county_20m", layer: "cb_2015_us_county_20m"
## with 3220 features
## It has 9 fields
us_counties <- spTransform(us_counties, CRS("+proj=longlat +datum=WGS84"))
leaflet() %>% addProviderTiles("CartoDB.Positron") %>% addPolygons( data = us_counties, popup = ~NAME)