I decided to look at this dataset from NYC OpenData regarding asbestos complaints received by the Department of Environmental Protection (DEP) and the Department of Health and Mental Hygiene (DOHMH) from 2010 to present. I also decided to follow parts of the "Manipulating and mapping US Census data in R using the acs, tigris and leaflet packages by ZevRoss as well as the "Census Mapping Tutorial tutorial by Laura Krull lkrull and Jeff Rosenblum.
I will be using functions from a couple of different libraries. They can be installed with install.packages() if it is not already on the device.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(rgdal)
## Loading required package: sp
## rgdal: version: 1.4-4, (SVN revision 833)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
## Path to GDAL shared files: C:/Users/murrayl/OneDrive - KCIC/Documents/R/win-library/3.6/rgdal/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
## Path to PROJ.4 shared files: C:/Users/murrayl/OneDrive - KCIC/Documents/R/win-library/3.6/rgdal/proj
## Linking to sp version: 1.3-1
library(ggplot2)
library(leaflet)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(tigris)
## To enable
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
##
## Attaching package: 'tigris'
## The following object is masked from 'package:graphics':
##
## plot
library(acs)
## Loading required package: stringr
## Loading required package: XML
##
## Attaching package: 'acs'
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:base':
##
## apply
library(stringr)
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
#reading the data
nyc_complaints <- read.csv("Asbestos_Complaints.csv", stringsAsFactors = FALSE)
#observing nyc_complaints
str(nyc_complaints)
## 'data.frame': 19601 obs. of 39 variables:
## $ Unique.Key : int 43416920 43397586 43319486 43284428 43264741 43100492 42935089 42805214 42122223 43427094 ...
## $ Created.Date : chr "07/30/2019 10:14:00 AM" "07/27/2019 02:54:00 PM" "07/18/2019 05:04:00 PM" "07/15/2019 11:40:00 AM" ...
## $ Closed.Date : chr "07/30/2019 10:20:00 AM" "07/27/2019 03:01:00 PM" "07/18/2019 05:10:00 PM" "07/15/2019 12:30:00 PM" ...
## $ Agency : chr "DEP" "DEP" "DEP" "DEP" ...
## $ Agency.Name : chr "Department of Environmental Protection" "Department of Environmental Protection" "Department of Environmental Protection" "Department of Environmental Protection" ...
## $ Complaint.Type : chr "Asbestos" "Asbestos" "Asbestos" "Asbestos" ...
## $ Descriptor : chr "Asbestos Complaint (B1)" "Asbestos Complaint (B1)" "Asbestos Complaint (B1)" "Asbestos Complaint (B1)" ...
## $ Location.Type : chr "" "" "" "" ...
## $ Incident.Zip : chr "11411" "11217" "10006" "11692" ...
## $ Incident.Address : chr "114-28 212 STREET" "449 PACIFIC STREET" "" "328 BEACH 63 STREET" ...
## $ Street.Name : chr "212 STREET" "PACIFIC STREET" "" "BEACH 63 STREET" ...
## $ Cross.Street.1 : chr "COLFAX ST" "BOND ST" "LIBERTY ST" "DEAD END" ...
## $ Cross.Street.2 : chr "NASHVILLE BLVD" "NEVINS ST" "CHURCH ST" "BEACH CHANNEL DR" ...
## $ Intersection.Street.1 : chr "" "" "LIBERTY STREET" "" ...
## $ Intersection.Street.2 : chr "" "" "CHURCH STREET" "" ...
## $ Address.Type : chr "ADDRESS" "ADDRESS" "INTERSECTION" "ADDRESS" ...
## $ City : chr "Cambria Heights" "BROOKLYN" "NEW YORK" "Arverne" ...
## $ Landmark : chr "" "" "" "" ...
## $ Facility.Type : chr "N/A" "N/A" "N/A" "N/A" ...
## $ Status : chr "Closed" "Closed" "Closed" "Closed" ...
## $ Due.Date : chr "" "" "" "" ...
## $ Resolution.Description : chr "The Department of Environmental Protection determined that this complaint is a duplicate to another service request." "The Department of Environmental Protection determined that this complaint is a duplicate to another service request." "The Department of Environmental Protection spoke to the complainant via telephone and was able to resolve the c"| __truncated__ "The Department of Environmental Protection spoke to the complainant via telephone and was able to resolve the c"| __truncated__ ...
## $ Resolution.Action.Updated.Date: chr "07/30/2019 10:20:00 AM" "07/27/2019 03:01:00 PM" "07/18/2019 05:10:00 PM" "07/15/2019 12:30:00 PM" ...
## $ Community.Board : chr "13 QUEENS" "02 BROOKLYN" "01 MANHATTAN" "14 QUEENS" ...
## $ Borough : chr "QUEENS" "BROOKLYN" "MANHATTAN" "QUEENS" ...
## $ X.Coordinate..State.Plane. : int 1055548 988836 981014 1041953 1001675 1012468 987636 1008936 1031238 1001658 ...
## $ Y.Coordinate..State.Plane. : int 195199 189061 197835 155162 241493 211804 204662 185447 216246 213650 ...
## $ Park.Facility.Name : chr "Unspecified" "Unspecified" "Unspecified" "Unspecified" ...
## $ Park.Borough : chr "QUEENS" "BROOKLYN" "MANHATTAN" "QUEENS" ...
## $ Vehicle.Type : logi NA NA NA NA NA NA ...
## $ Taxi.Company.Borough : logi NA NA NA NA NA NA ...
## $ Taxi.Pick.Up.Location : logi NA NA NA NA NA NA ...
## $ Bridge.Highway.Name : logi NA NA NA NA NA NA ...
## $ Bridge.Highway.Direction : logi NA NA NA NA NA NA ...
## $ Road.Ramp : logi NA NA NA NA NA NA ...
## $ Bridge.Highway.Segment : logi NA NA NA NA NA NA ...
## $ Latitude : num 40.7 40.7 40.7 40.6 40.8 ...
## $ Longitude : num -73.7 -74 -74 -73.8 -73.9 ...
## $ Location : chr "(40.7021656966259, -73.74285846196767)" "(40.685604353374636, -73.98346438153149)" "(40.70968753652052, -74.01167217483717)" "(40.59237221211839, -73.79223212100901)" ...
head(nyc_complaints)
## Unique.Key Created.Date Closed.Date Agency
## 1 43416920 07/30/2019 10:14:00 AM 07/30/2019 10:20:00 AM DEP
## 2 43397586 07/27/2019 02:54:00 PM 07/27/2019 03:01:00 PM DEP
## 3 43319486 07/18/2019 05:04:00 PM 07/18/2019 05:10:00 PM DEP
## 4 43284428 07/15/2019 11:40:00 AM 07/15/2019 12:30:00 PM DEP
## 5 43264741 07/12/2019 05:45:00 PM 07/12/2019 06:00:00 PM DEP
## 6 43100492 06/26/2019 09:37:00 PM 06/27/2019 10:00:00 AM DEP
## Agency.Name Complaint.Type
## 1 Department of Environmental Protection Asbestos
## 2 Department of Environmental Protection Asbestos
## 3 Department of Environmental Protection Asbestos
## 4 Department of Environmental Protection Asbestos
## 5 Department of Environmental Protection Asbestos
## 6 Department of Environmental Protection Asbestos
## Descriptor Location.Type Incident.Zip Incident.Address
## 1 Asbestos Complaint (B1) 11411 114-28 212 STREET
## 2 Asbestos Complaint (B1) 11217 449 PACIFIC STREET
## 3 Asbestos Complaint (B1) 10006
## 4 Asbestos Complaint (B1) 11692 328 BEACH 63 STREET
## 5 Asbestos Complaint (B1) 10039 2937 8 AVENUE
## 6 Asbestos Complaint (B1) 11377 37-16 65 STREET
## Street.Name Cross.Street.1
## 1 212 STREET COLFAX ST
## 2 PACIFIC STREET BOND ST
## 3 LIBERTY ST
## 4 BEACH 63 STREET DEAD END
## 5 8 AVENUE MACOMBS DAM BRDG PEDESTRIAN PATH
## 6 65 STREET 37 AVE
## Cross.Street.2 Intersection.Street.1
## 1 NASHVILLE BLVD
## 2 NEVINS ST
## 3 CHURCH ST LIBERTY STREET
## 4 BEACH CHANNEL DR
## 5 HARLEM RIVER DRIVE SERVICE RD W
## 6 37 RD
## Intersection.Street.2 Address.Type City Landmark
## 1 ADDRESS Cambria Heights
## 2 ADDRESS BROOKLYN
## 3 CHURCH STREET INTERSECTION NEW YORK
## 4 ADDRESS Arverne
## 5 ADDRESS NEW YORK
## 6 ADDRESS Woodside
## Facility.Type Status Due.Date
## 1 N/A Closed
## 2 N/A Closed
## 3 N/A Closed
## 4 N/A Closed
## 5 N/A Closed
## 6 N/A Closed
## Resolution.Description
## 1 The Department of Environmental Protection determined that this complaint is a duplicate to another service request.
## 2 The Department of Environmental Protection determined that this complaint is a duplicate to another service request.
## 3 The Department of Environmental Protection spoke to the complainant via telephone and was able to resolve the complaint without inspection.
## 4 The Department of Environmental Protection spoke to the complainant via telephone and was able to resolve the complaint without inspection.
## 5 The Department of Environmental Protection spoke to the complainant via telephone and was able to resolve the complaint without inspection.
## 6 The Department of Environmental Protection (DEP) has investigated this complaint. This is an active asbestos abatement project that has been filed with DEP. The work is in compliance with all of DEP's rules and regulations and therefore no violation was issued.
## Resolution.Action.Updated.Date Community.Board Borough
## 1 07/30/2019 10:20:00 AM 13 QUEENS QUEENS
## 2 07/27/2019 03:01:00 PM 02 BROOKLYN BROOKLYN
## 3 07/18/2019 05:10:00 PM 01 MANHATTAN MANHATTAN
## 4 07/15/2019 12:30:00 PM 14 QUEENS QUEENS
## 5 07/12/2019 06:00:00 PM 10 MANHATTAN MANHATTAN
## 6 06/27/2019 10:00:00 AM 02 QUEENS QUEENS
## X.Coordinate..State.Plane. Y.Coordinate..State.Plane. Park.Facility.Name
## 1 1055548 195199 Unspecified
## 2 988836 189061 Unspecified
## 3 981014 197835 Unspecified
## 4 1041953 155162 Unspecified
## 5 1001675 241493 Unspecified
## 6 1012468 211804 Unspecified
## Park.Borough Vehicle.Type Taxi.Company.Borough Taxi.Pick.Up.Location
## 1 QUEENS NA NA NA
## 2 BROOKLYN NA NA NA
## 3 MANHATTAN NA NA NA
## 4 QUEENS NA NA NA
## 5 MANHATTAN NA NA NA
## 6 QUEENS NA NA NA
## Bridge.Highway.Name Bridge.Highway.Direction Road.Ramp
## 1 NA NA NA
## 2 NA NA NA
## 3 NA NA NA
## 4 NA NA NA
## 5 NA NA NA
## 6 NA NA NA
## Bridge.Highway.Segment Latitude Longitude
## 1 NA 40.70217 -73.74286
## 2 NA 40.68560 -73.98346
## 3 NA 40.70969 -74.01167
## 4 NA 40.59237 -73.79223
## 5 NA 40.82950 -73.93704
## 6 NA 40.74798 -73.89816
## Location
## 1 (40.7021656966259, -73.74285846196767)
## 2 (40.685604353374636, -73.98346438153149)
## 3 (40.70968753652052, -74.01167217483717)
## 4 (40.59237221211839, -73.79223212100901)
## 5 (40.82950076779289, -73.93703525936526)
## 6 (40.74798459930839, -73.89815981373029)
I have decided to remove columns I do not need from the data set, so that it will be easier to work with. I also decided to only look at complaints from the years 2010-2015, as this will match the census data I intend to pull. Thus, I needed to create a year column. Since complaints were generally created and closed in the same years, I decided to go by the date created as the new “year” column.
#removing unwanted columns from the nyc_complaints dataset
sorted_nyc <- select(nyc_complaints, -(Agency.Name), -(Incident.Zip), -(Incident.Address), -(Street.Name), -(Cross.Street.1), -(Cross.Street.2), -(Intersection.Street.1), -(Intersection.Street.2), -(City), -(Landmark), (Facility.Type), -(Due.Date), -(Resolution.Action.Updated.Date), -(Community.Board), -(Park.Facility.Name), -(Vehicle.Type), -(Taxi.Company.Borough), -(Taxi.Pick.Up.Location), -(Bridge.Highway.Name), -(Bridge.Highway.Direction), -(Road.Ramp), -(Bridge.Highway.Segment), -(Location), -(Facility.Type), -(Location.Type))
#creating the POSIXct dates
sorted_nyc$created_date <- mdy_hms(nyc_complaints$Created.Date)
#creating the year column
sorted_nyc$year_created <- year(sorted_nyc$created_date)
#filtering by year
list <- c("2010","2011","2012","2013","2014","2015")
filtered_nyc <- filter(sorted_nyc, year_created %in% list)
#checking to see changes
str(filtered_nyc)
## 'data.frame': 12128 obs. of 17 variables:
## $ Unique.Key : int 31634762 31090297 31088849 31081633 31075198 31067643 31067619 31020650 30946496 30939510 ...
## $ Created.Date : chr "09/28/2015 05:47:46 PM" "07/16/2015 09:28:00 AM" "07/16/2015 09:33:00 PM" "07/15/2015 10:32:00 AM" ...
## $ Closed.Date : chr "" "" "" "" ...
## $ Agency : chr "DOHMH" "DEP" "DEP" "DEP" ...
## $ Complaint.Type : chr "Asbestos" "Asbestos" "Asbestos" "Asbestos" ...
## $ Descriptor : chr "N/A" "Asbestos Complaint (B1)" "Asbestos Complaint (B1)" "Asbestos Complaint (B1)" ...
## $ Address.Type : chr "ADDRESS" "ADDRESS" "ADDRESS" "ADDRESS" ...
## $ Status : chr "Closed" "Started" "Started" "Started" ...
## $ Resolution.Description : chr "The Department of Health and Mental Hygiene will review your complaint to determine appropriate action. Compla"| __truncated__ "The Department of Environmental Protection has inspected your complaint and determined that further investigati"| __truncated__ "The Department of Environmental Protection has inspected your complaint and determined that further investigati"| __truncated__ "The Department of Environmental Protection has inspected your complaint and determined that further investigati"| __truncated__ ...
## $ Borough : chr "QUEENS" "BRONX" "BROOKLYN" "BROOKLYN" ...
## $ X.Coordinate..State.Plane.: int 1031529 1035502 993033 1000072 1002519 986594 987572 990338 1054844 999426 ...
## $ Y.Coordinate..State.Plane.: int 194802 240034 149494 187033 186860 201226 191296 216099 197888 232901 ...
## $ Park.Borough : chr "QUEENS" "BRONX" "BROOKLYN" "BROOKLYN" ...
## $ Latitude : num 40.7 40.8 40.6 40.7 40.7 ...
## $ Longitude : num -73.8 -73.8 -74 -73.9 -73.9 ...
## $ created_date : POSIXct, format: "2015-09-28 17:47:46" "2015-07-16 09:28:00" ...
## $ year_created : num 2015 2015 2015 2015 2015 ...
Because my data set is comrpised of categorical variables, I decided to utilize R to create frequency tables and barcharts to summarize my data.
#creating frequency tables
#table(nyc_complaints$Agency)
#table(nyc_complaints$Borough)
#creating borough table
borough_tab <- filtered_nyc %>% with(table(Borough)) %>% prop.table() %>% addmargins()
borough_tab
## Borough
## BRONX BROOKLYN MANHATTAN QUEENS STATEN ISLAND
## 0.1185686016 0.2823218997 0.3659300792 0.1974769129 0.0353726913
## Unspecified Sum
## 0.0003298153 1.0000000000
#creating borough table
agency_tab <- filtered_nyc %>% with(table(Agency)) %>% prop.table() %>% addmargins()
agency_tab
## Agency
## DEP DOHMH Sum
## 0.7441458 0.2558542 1.0000000
#creating the crosstab
cross<- filtered_nyc %>% with(table(Agency, Borough)) %>% prop.table() %>% addmargins()
cross
## Borough
## Agency BRONX BROOKLYN MANHATTAN QUEENS STATEN ISLAND
## DEP 0.0697559367 0.2055573879 0.2958443272 0.1466853562 0.0259729551
## DOHMH 0.0488126649 0.0767645119 0.0700857520 0.0507915567 0.0093997361
## Sum 0.1185686016 0.2823218997 0.3659300792 0.1974769129 0.0353726913
## Borough
## Agency Unspecified Sum
## DEP 0.0003298153 0.7441457784
## DOHMH 0.0000000000 0.2558542216
## Sum 0.0003298153 1.0000000000
#creating the percent cross table
cross2<- filtered_nyc %>% with(table(Agency, Borough)) %>% prop.table()
cross2*(100) %>% round(2)
## Borough
## Agency BRONX BROOKLYN MANHATTAN QUEENS STATEN ISLAND
## DEP 6.97559367 20.55573879 29.58443272 14.66853562 2.59729551
## DOHMH 4.88126649 7.67645119 7.00857520 5.07915567 0.93997361
## Borough
## Agency Unspecified
## DEP 0.03298153
## DOHMH 0.00000000
I can summarize this information in a quickbarplot:
barplot(prop.table(cross2), xlab='Agency',ylab='Percentages',main="Percentage DEP or DOHMH by Borough",beside=T,col=c("darkblue","lightcyan"), legend=rownames(cross2), args.legend = list(x = "topleft"))
complaints_visual <-ggplot(filtered_nyc, aes(Borough, fill=Borough)) +
geom_bar() +
xlab("Borough") +
ylab("Count") +
ggtitle("NYC Asbestos Complaints (2010-2015)")
complaints_visual
site_locations <- nyc_complaints %>%
leaflet() %>%
addTiles() %>%
addMarkers(clusterOptions = markerClusterOptions(lng = ~Longitude, lat = ~Latitude, popup = ~Unique.Key, label = ~Unique.Key))
## Assuming "Longitude" and "Latitude" are longitude and latitude, respectively
## Warning in validateCoords(lng, lat, funcName): Data contains 285 rows with
## either missing or invalid lat/lon values and will be ignored
site_locations
Now let’s try it with the filtered data!
site_locations <- filtered_nyc %>%
leaflet() %>%
addTiles() %>%
addMarkers(clusterOptions = markerClusterOptions(lng = ~Longitude, lat = ~Latitude, popup = ~Unique.Key, label = ~Unique.Key))
## Assuming "Longitude" and "Latitude" are longitude and latitude, respectively
## Warning in validateCoords(lng, lat, funcName): Data contains 63 rows with
## either missing or invalid lat/lon values and will be ignored
site_locations
Rather than importing this data from a file, I have decided to utilize the acs and tigris packages to gain the census data and geospatial data to create a map for household income.
The first step is to create a shapefile containing the census tract. The county and state codes can be found utilizing the geo.lookup() and lookup_code() functions.
# grab the spatial data (tigris)
counties <- c(5, 47, 61, 81, 85)
shapefile <- tracts(state = '36', county=counties, cb=TRUE)
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 6%
|
|==== | 7%
|
|===== | 8%
|
|====== | 10%
|
|======== | 12%
|
|========= | 13%
|
|========== | 15%
|
|=========== | 17%
|
|============ | 18%
|
|============= | 20%
|
|============== | 22%
|
|=============== | 23%
|
|================ | 25%
|
|================= | 27%
|
|================== | 28%
|
|=================== | 29%
|
|=================== | 30%
|
|==================== | 31%
|
|===================== | 32%
|
|===================== | 33%
|
|====================== | 33%
|
|====================== | 34%
|
|======================= | 35%
|
|======================== | 37%
|
|========================= | 38%
|
|========================== | 40%
|
|=========================== | 42%
|
|============================ | 43%
|
|============================= | 45%
|
|============================== | 47%
|
|=============================== | 48%
|
|================================= | 50%
|
|================================= | 51%
|
|================================== | 52%
|
|================================== | 53%
|
|=================================== | 53%
|
|=================================== | 54%
|
|==================================== | 55%
|
|==================================== | 56%
|
|===================================== | 57%
|
|===================================== | 58%
|
|====================================== | 58%
|
|======================================= | 59%
|
|======================================= | 60%
|
|======================================== | 61%
|
|======================================== | 62%
|
|========================================= | 63%
|
|========================================== | 64%
|
|========================================== | 65%
|
|=========================================== | 67%
|
|============================================ | 68%
|
|============================================== | 70%
|
|=============================================== | 72%
|
|================================================ | 73%
|
|================================================= | 75%
|
|================================================= | 76%
|
|================================================== | 77%
|
|=================================================== | 78%
|
|==================================================== | 80%
|
|===================================================== | 82%
|
|====================================================== | 83%
|
|======================================================= | 85%
|
|======================================================== | 87%
|
|========================================================= | 88%
|
|=========================================================== | 90%
|
|============================================================ | 92%
|
|============================================================ | 93%
|
|============================================================= | 93%
|
|============================================================== | 95%
|
|=============================================================== | 97%
|
|================================================================ | 98%
|
|=================================================================| 100%
#visualizing what we just grabbed
plot(shapefile)
The first step to fetching the ACS data, is to create a geographic set to grab tabular data from. I utilized the state and county codes I found before.
geo<-geo.make(state=36, county=c(5, 47, 61, 81, 85), tract="*")
Fetching the acs data is relatively simple, but requires a key which can be optained from this (link)[https://api.census.gov/data/key_signup.html]. It also requires an endyear, span, and table of the ACS survey I am interested in pulling from. I chose household income data over the 5-year span, ending in 2015 from the Census, so that I can match it with the data available in the complaints dataset.
api.key.install(key="c3b5bdf35c5d95902f2ab45e532daebf326fef66")
mytable <- mytable <- acs.lookup(endyear=2015, table.number="B19013")
## Warning in acs.lookup(endyear = 2015, table.number = "B19013"): temporarily downloading and using archived XML variable lookup files;
## since this is *much* slower, recommend running
## acs.tables.install()
str(mytable)
## Formal class 'acs.lookup' [package "acs"] with 4 slots
## ..@ endyear: num 2015
## ..@ span : num 5
## ..@ args :List of 7
## .. ..$ endyear : num 2015
## .. ..$ span : num 5
## .. ..$ dataset : chr "acs"
## .. ..$ keyword : symbol
## .. ..$ table.name : symbol
## .. ..$ table.number : chr "B19013"
## .. ..$ case.sensitive: logi TRUE
## ..@ results:'data.frame': 1 obs. of 4 variables:
## .. ..$ variable.code: chr "B19013_001"
## .. ..$ table.number : chr "B19013."
## .. ..$ table.name : chr "B19013. Median Household Income in the Past 12 Months (in 2015 Inflation-Adjusted Dollars)"
## .. ..$ variable.name: chr "Median household income in the past 12 months (in 2015 Inflation-adjusted dollars)"
Now, I will choose the column of interest.
results(mytable)$variable.name
## [1] "Median household income in the past 12 months (in 2015 Inflation-adjusted dollars)"
I will now convert the table information into a dataframe.
income_df <- data.frame(paste0(str_pad(mytable@geography$state, 2, "left", pad="0"),
str_pad(mytable@geography$county, 3, "left", pad="0"),
str_pad(mytable@geography$tract, 6, "left", pad="0")),
mytable@estimate[,c("Household Income: Total:",
"Household Income: $200,000 or more")],
stringsAsFactors = FALSE)
## Error in stri_pad_left(string, width, pad = pad): no slot of name "geography" for this object of class "acs.lookup"
I will now create values to fetch my data.
myvars <- mytable[1]
myspan <- 5
myendyear <- 2015
countylist2 <- as.numeric(counties)
mygeo <- geo.make(state=36, county=countylist2, tract="*")
Now, I will join the tabulation and geospatial information, so that I can create a map.
api.key.install(key="c3b5bdf35c5d95902f2ab45e532daebf326fef66")
mydata <- acs.fetch(endyear=myendyear, span=myspan, geography=mygeo, variable=myvars)
## Warning in acs.fetch(endyear = endyear, span = span, geography =
## geography[[1]], : NAs introduced by coercion
## Warning in acs.fetch(endyear = endyear, span = span, geography =
## geography[[1]], : NAs introduced by coercion
## Warning in acs.fetch(endyear = endyear, span = span, geography =
## geography[[1]], : NAs introduced by coercion
#cleaning the data
acsgeoid <- paste0(as.character(mydata@geography$state),'0',
as.character(mydata@geography$county),
as.character(mydata@geography$tract))
#creating a dataframe
mydatadf <- data.frame(acsgeoid, mydata@estimate)
colnames(mydatadf)=c("GEOID", "medianincome")
mydatadf2 <- filter(mydatadf, medianincome>0)
head(mydatadf2)
## GEOID medianincome
## 1 3605000200 72034
## 2 3605000400 74836
## 3 3605001600 32312
## 4 3605001900 37936
## 5 3605002000 18086
## 6 3605002300 14479
#joining the data
mydatamerged <- geo_join(shapefile, mydatadf2, "GEOID", "GEOID")
df <- mydatamerged
#creating the popup
mypopup <- paste0("GEOID: ", df$GEOID, "<br>", "Median Income: $", round(df$medianincome,0))
#setting the pallete
mypal <- colorNumeric(
palette = "YlGnBu",
domain = df$medianincome
)
Creating the map:
mymap<-leaflet() %>%
addProviderTiles("CartoDB.Positron") %>%
addPolygons(data = df,
fillColor = ~mypal(medianincome),
color = "#b2aeae", # you need to use hex colors
fillOpacity = 0.7,
weight = 1,
smoothFactor = 0.2,
popup = mypopup) %>%
addLegend(pal = mypal,
values = df$medianincome,
position = "bottomright",
title = "Median Income",
labFormat = labelFormat(prefix = "$"))
mymap
Let’s put it all together! For the final part of my project, I decided to combine both maps to create a visualization of both percentage of houshold income over 200k, and the count of asbestos complaints from 2010 to 2015.
map4 <- leaflet() %>%
addProviderTiles("CartoDB.Positron") %>%
addMarkers(data = filtered_nyc,
clusterOptions = markerClusterOptions(lng = ~Longitude, lat = ~Latitude, popup = ~Unique.Key, label = ~Unique.Key, group="Complaints")) %>%
addPolygons(data = df,
fillColor = ~mypal(medianincome),
color = "#b2aeae", # you need to use hex colors
fillOpacity = 0.7,
weight = 1,
smoothFactor = 0.2,
popup = mypopup,
group ="Income") %>%
addLegend(pal = mypal,
values = df$medianincome,
position = "bottomright",
title = "Median Income",
labFormat = labelFormat(prefix = "$")) %>%
addLayersControl(overlayGroups = c("Complaints", "Income"), options = layersControlOptions(collapsed = FALSE))
## Assuming "Longitude" and "Latitude" are longitude and latitude, respectively
## Warning in validateCoords(lng, lat, funcName): Data contains 63 rows with
## either missing or invalid lat/lon values and will be ignored
map4