This project is adapted from the work of Lucy Murray, a past student in the Data Visualization class.
This dataset is from NYC OpenData, https://opendata.cityofnewyork.us/ , regarding asbestos complaints received by the Department of Environmental Protection (DEP) and the Department of Health and Mental Hygiene (DOHMH) from 2010 to present. This also follows the tutorial, “Manipulating and mapping US Census data in R using the acs, tigris and leaflet packages by ZevRoss as well as the”Census Mapping Tutorial tutorial by Laura Krull lkrull and Jeff Rosenblum.
library(tidyverse)
## -- Attaching packages ---------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1 v purrr 0.3.3
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 1.0.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts ------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(rgdal)
## Loading required package: sp
## rgdal: version: 1.4-7, (SVN revision 845)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
## Path to GDAL shared files: C:/Users/rsaidi/Documents/R/win-library/3.6/rgdal/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
## Path to PROJ.4 shared files: C:/Users/rsaidi/Documents/R/win-library/3.6/rgdal/proj
## Linking to sp version: 1.3-1
library(leaflet)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(tigris)
## Warning: package 'tigris' was built under R version 3.6.2
## To enable
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
##
## Attaching package: 'tigris'
## The following object is masked from 'package:graphics':
##
## plot
library(acs)
## Warning: package 'acs' was built under R version 3.6.2
## Loading required package: XML
##
## Attaching package: 'acs'
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:base':
##
## apply
library(stringr)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
Read in the Data
# Set the working directory
setwd("C:/Users/rsaidi/Dropbox/Rachel/MontColl/Datasets/Datasets")
# Read in the data
nyc_complaints <- read_csv("Asbestos_ComplaintsNY.csv")
## Parsed with column specification:
## cols(
## .default = col_character(),
## `Unique Key` = col_double(),
## `Incident Zip` = col_double(),
## `X Coordinate (State Plane)` = col_double(),
## `Y Coordinate (State Plane)` = col_double(),
## `Vehicle Type` = col_logical(),
## `Taxi Company Borough` = col_logical(),
## `Taxi Pick Up Location` = col_logical(),
## `Bridge Highway Name` = col_logical(),
## `Bridge Highway Direction` = col_logical(),
## `Road Ramp` = col_logical(),
## `Bridge Highway Segment` = col_logical(),
## Latitude = col_double(),
## Longitude = col_double()
## )
## See spec(...) for full column specifications.
## Warning: 1 parsing failure.
## row col expected actual file
## 3617 Incident Zip a double N/A 'Asbestos_ComplaintsNY.csv'
# Look at the structure of the dataset
str(nyc_complaints)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 20433 obs. of 39 variables:
## $ Unique Key : num 45646394 45642978 45644436 45645315 45642189 ...
## $ Created Date : chr "2/18/2020 13:54" "2/18/2020 9:54" "2/18/2020 7:37" "2/18/2020 12:58" ...
## $ Closed Date : chr NA NA NA "2/18/2020 13:15" ...
## $ Agency : chr "DOHMH" "DEP" "DEP" "DOHMH" ...
## $ Agency Name : chr "Department of Health and Mental Hygiene" "Department of Environmental Protection" "Department of Environmental Protection" "Department of Health and Mental Hygiene" ...
## $ Complaint Type : chr "Asbestos" "Asbestos" "Asbestos" "Asbestos" ...
## $ Descriptor : chr "N/A" "Asbestos Complaint (B1)" "Asbestos Complaint (B1)" "N/A" ...
## $ Location Type : chr "3+ Family Apartment Building" NA NA "1-2 Family Dwelling" ...
## $ Incident Zip : num 10027 11233 10026 10016 10021 ...
## $ Incident Address : chr "410 ST NICHOLAS AVENUE" "473 BAINBRIDGE STREET" "1893 7 AVENUE" "145 EAST 27 STREET" ...
## $ Street Name : chr "ST NICHOLAS AVENUE" "BAINBRIDGE STREET" "7 AVENUE" "EAST 27 STREET" ...
## $ Cross Street 1 : chr "WEST 130 STREET" "HOWARD AVE" "W 114 ST" "LEXINGTON AVENUE" ...
## $ Cross Street 2 : chr "WEST 133 STREET" "SARATOGA AVE" "W 115 ST" "BROADWAY ALLEY" ...
## $ Intersection Street 1 : chr "WEST 130 STREET" NA NA "LEXINGTON AVENUE" ...
## $ Intersection Street 2 : chr "WEST 133 STREET" NA NA "BROADWAY ALLEY" ...
## $ Address Type : chr NA "ADDRESS" "ADDRESS" "ADDRESS" ...
## $ City : chr "NEW YORK" "BROOKLYN" "NEW YORK" "NEW YORK" ...
## $ Landmark : chr "ST NICHOLAS AVENUE" NA NA "EAST 27 STREET" ...
## $ Facility Type : chr NA "N/A" "N/A" NA ...
## $ Status : chr "In Progress" "Open" "Open" "Closed" ...
## $ Due Date : chr NA NA NA NA ...
## $ Resolution Description : chr NA NA NA NA ...
## $ Resolution Action Updated Date: chr NA NA NA "2/18/2020 13:15" ...
## $ Community Board : chr "10 MANHATTAN" "03 BROOKLYN" "10 MANHATTAN" "06 MANHATTAN" ...
## $ Borough : chr "MANHATTAN" "BROOKLYN" "MANHATTAN" "MANHATTAN" ...
## $ X Coordinate (State Plane) : num 998223 1006952 997205 989069 995440 ...
## $ Y Coordinate (State Plane) : num 235925 187926 231569 209574 219561 ...
## $ Park Facility Name : chr "Unspecified" "Unspecified" "Unspecified" "Unspecified" ...
## $ Park Borough : chr "MANHATTAN" "BROOKLYN" "MANHATTAN" "MANHATTAN" ...
## $ Vehicle Type : logi NA NA NA NA NA NA ...
## $ Taxi Company Borough : logi NA NA NA NA NA NA ...
## $ Taxi Pick Up Location : logi NA NA NA NA NA NA ...
## $ Bridge Highway Name : logi NA NA NA NA NA NA ...
## $ Bridge Highway Direction : logi NA NA NA NA NA NA ...
## $ Road Ramp : logi NA NA NA NA NA NA ...
## $ Bridge Highway Segment : logi NA NA NA NA NA NA ...
## $ Latitude : num 40.8 40.7 40.8 40.7 40.8 ...
## $ Longitude : num -73.9 -73.9 -74 -74 -74 ...
## $ Location : chr "(40.814224320719624, -73.94952055422276)" "(40.68246111285433, -73.91814784809289)" "(40.802269859152865, -73.95320662932198)" "(40.74190751693278, -73.98260957594246)" ...
## - attr(*, "problems")=Classes 'tbl_df', 'tbl' and 'data.frame': 1 obs. of 5 variables:
## ..$ row : int 3617
## ..$ col : chr "Incident Zip"
## ..$ expected: chr "a double"
## ..$ actual : chr "N/A"
## ..$ file : chr "'Asbestos_ComplaintsNY.csv'"
## - attr(*, "spec")=
## .. cols(
## .. `Unique Key` = col_double(),
## .. `Created Date` = col_character(),
## .. `Closed Date` = col_character(),
## .. Agency = col_character(),
## .. `Agency Name` = col_character(),
## .. `Complaint Type` = col_character(),
## .. Descriptor = col_character(),
## .. `Location Type` = col_character(),
## .. `Incident Zip` = col_double(),
## .. `Incident Address` = col_character(),
## .. `Street Name` = col_character(),
## .. `Cross Street 1` = col_character(),
## .. `Cross Street 2` = col_character(),
## .. `Intersection Street 1` = col_character(),
## .. `Intersection Street 2` = col_character(),
## .. `Address Type` = col_character(),
## .. City = col_character(),
## .. Landmark = col_character(),
## .. `Facility Type` = col_character(),
## .. Status = col_character(),
## .. `Due Date` = col_character(),
## .. `Resolution Description` = col_character(),
## .. `Resolution Action Updated Date` = col_character(),
## .. `Community Board` = col_character(),
## .. Borough = col_character(),
## .. `X Coordinate (State Plane)` = col_double(),
## .. `Y Coordinate (State Plane)` = col_double(),
## .. `Park Facility Name` = col_character(),
## .. `Park Borough` = col_character(),
## .. `Vehicle Type` = col_logical(),
## .. `Taxi Company Borough` = col_logical(),
## .. `Taxi Pick Up Location` = col_logical(),
## .. `Bridge Highway Name` = col_logical(),
## .. `Bridge Highway Direction` = col_logical(),
## .. `Road Ramp` = col_logical(),
## .. `Bridge Highway Segment` = col_logical(),
## .. Latitude = col_double(),
## .. Longitude = col_double(),
## .. Location = col_character()
## .. )
head(nyc_complaints)
## # A tibble: 6 x 39
## `Unique Key` `Created Date` `Closed Date` Agency `Agency Name`
## <dbl> <chr> <chr> <chr> <chr>
## 1 45646394 2/18/2020 13:~ <NA> DOHMH Department o~
## 2 45642978 2/18/2020 9:54 <NA> DEP Department o~
## 3 45644436 2/18/2020 7:37 <NA> DEP Department o~
## 4 45645315 2/18/2020 12:~ 2/18/2020 13~ DOHMH Department o~
## 5 45642189 2/18/2020 15:~ <NA> DEP Department o~
## 6 45639603 2/18/2020 10:~ <NA> DOHMH Department o~
## # ... with 34 more variables: `Complaint Type` <chr>, Descriptor <chr>,
## # `Location Type` <chr>, `Incident Zip` <dbl>, `Incident Address` <chr>,
## # `Street Name` <chr>, `Cross Street 1` <chr>, `Cross Street 2` <chr>,
## # `Intersection Street 1` <chr>, `Intersection Street 2` <chr>, `Address
## # Type` <chr>, City <chr>, Landmark <chr>, `Facility Type` <chr>,
## # Status <chr>, `Due Date` <chr>, `Resolution Description` <chr>, `Resolution
## # Action Updated Date` <chr>, `Community Board` <chr>, Borough <chr>, `X
## # Coordinate (State Plane)` <dbl>, `Y Coordinate (State Plane)` <dbl>, `Park
## # Facility Name` <chr>, `Park Borough` <chr>, `Vehicle Type` <lgl>, `Taxi
## # Company Borough` <lgl>, `Taxi Pick Up Location` <lgl>, `Bridge Highway
## # Name` <lgl>, `Bridge Highway Direction` <lgl>, `Road Ramp` <lgl>, `Bridge
## # Highway Segment` <lgl>, Latitude <dbl>, Longitude <dbl>, Location <chr>
Remove spaces in the headers of the dataset and make all variables lower case
names(nyc_complaints) <- gsub(" ",".",names(nyc_complaints))
names(nyc_complaints) <- tolower(names(nyc_complaints))
head(nyc_complaints)
## # A tibble: 6 x 39
## unique.key created.date closed.date agency agency.name complaint.type
## <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 45646394 2/18/2020 1~ <NA> DOHMH Department~ Asbestos
## 2 45642978 2/18/2020 9~ <NA> DEP Department~ Asbestos
## 3 45644436 2/18/2020 7~ <NA> DEP Department~ Asbestos
## 4 45645315 2/18/2020 1~ 2/18/2020 ~ DOHMH Department~ Asbestos
## 5 45642189 2/18/2020 1~ <NA> DEP Department~ Asbestos
## 6 45639603 2/18/2020 1~ <NA> DOHMH Department~ Asbestos
## # ... with 33 more variables: descriptor <chr>, location.type <chr>,
## # incident.zip <dbl>, incident.address <chr>, street.name <chr>,
## # cross.street.1 <chr>, cross.street.2 <chr>, intersection.street.1 <chr>,
## # intersection.street.2 <chr>, address.type <chr>, city <chr>,
## # landmark <chr>, facility.type <chr>, status <chr>, due.date <chr>,
## # resolution.description <chr>, resolution.action.updated.date <chr>,
## # community.board <chr>, borough <chr>, `x.coordinate.(state.plane)` <dbl>,
## # `y.coordinate.(state.plane)` <dbl>, park.facility.name <chr>,
## # park.borough <chr>, vehicle.type <lgl>, taxi.company.borough <lgl>,
## # taxi.pick.up.location <lgl>, bridge.highway.name <lgl>,
## # bridge.highway.direction <lgl>, road.ramp <lgl>,
## # bridge.highway.segment <lgl>, latitude <dbl>, longitude <dbl>,
## # location <chr>
Remove unnecessary columns from the data set, so it will be easier to work with. Only look at complaints from the years 2010-2015, as this will match the census data we will use. Therefore, we will create a year column. Since complaints were generally created and closed in the same years, we will use date created as the new “year” column.
# Remove unwanted columns from the nyc_complaints dataset
sorted_nyc <- select(nyc_complaints, -(agency.name), -(incident.zip), -(incident.address), -(street.name), -(cross.street.1), -(cross.street.2), -(intersection.street.1), -(intersection.street.2), -(city), -(landmark), (facility.type), -(due.date), -(resolution.action.updated.date), -(community.board), -(park.facility.name), -(vehicle.type), -(taxi.company.borough), -(taxi.pick.up.location), -(bridge.highway.name), -(bridge.highway.direction), -(road.ramp), -(bridge.highway.segment), -(location), -(facility.type), -(location.type))
str(sorted_nyc)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 20433 obs. of 15 variables:
## $ unique.key : num 45646394 45642978 45644436 45645315 45642189 ...
## $ created.date : chr "2/18/2020 13:54" "2/18/2020 9:54" "2/18/2020 7:37" "2/18/2020 12:58" ...
## $ closed.date : chr NA NA NA "2/18/2020 13:15" ...
## $ agency : chr "DOHMH" "DEP" "DEP" "DOHMH" ...
## $ complaint.type : chr "Asbestos" "Asbestos" "Asbestos" "Asbestos" ...
## $ descriptor : chr "N/A" "Asbestos Complaint (B1)" "Asbestos Complaint (B1)" "N/A" ...
## $ address.type : chr NA "ADDRESS" "ADDRESS" "ADDRESS" ...
## $ status : chr "In Progress" "Open" "Open" "Closed" ...
## $ resolution.description : chr NA NA NA NA ...
## $ borough : chr "MANHATTAN" "BROOKLYN" "MANHATTAN" "MANHATTAN" ...
## $ x.coordinate.(state.plane): num 998223 1006952 997205 989069 995440 ...
## $ y.coordinate.(state.plane): num 235925 187926 231569 209574 219561 ...
## $ park.borough : chr "MANHATTAN" "BROOKLYN" "MANHATTAN" "MANHATTAN" ...
## $ latitude : num 40.8 40.7 40.8 40.7 40.8 ...
## $ longitude : num -73.9 -73.9 -74 -74 -74 ...
## - attr(*, "problems")=Classes 'tbl_df', 'tbl' and 'data.frame': 1 obs. of 5 variables:
## ..$ row : int 3617
## ..$ col : chr "Incident Zip"
## ..$ expected: chr "a double"
## ..$ actual : chr "N/A"
## ..$ file : chr "'Asbestos_ComplaintsNY.csv'"
## - attr(*, "spec")=
## .. cols(
## .. `Unique Key` = col_double(),
## .. `Created Date` = col_character(),
## .. `Closed Date` = col_character(),
## .. Agency = col_character(),
## .. `Agency Name` = col_character(),
## .. `Complaint Type` = col_character(),
## .. Descriptor = col_character(),
## .. `Location Type` = col_character(),
## .. `Incident Zip` = col_double(),
## .. `Incident Address` = col_character(),
## .. `Street Name` = col_character(),
## .. `Cross Street 1` = col_character(),
## .. `Cross Street 2` = col_character(),
## .. `Intersection Street 1` = col_character(),
## .. `Intersection Street 2` = col_character(),
## .. `Address Type` = col_character(),
## .. City = col_character(),
## .. Landmark = col_character(),
## .. `Facility Type` = col_character(),
## .. Status = col_character(),
## .. `Due Date` = col_character(),
## .. `Resolution Description` = col_character(),
## .. `Resolution Action Updated Date` = col_character(),
## .. `Community Board` = col_character(),
## .. Borough = col_character(),
## .. `X Coordinate (State Plane)` = col_double(),
## .. `Y Coordinate (State Plane)` = col_double(),
## .. `Park Facility Name` = col_character(),
## .. `Park Borough` = col_character(),
## .. `Vehicle Type` = col_logical(),
## .. `Taxi Company Borough` = col_logical(),
## .. `Taxi Pick Up Location` = col_logical(),
## .. `Bridge Highway Name` = col_logical(),
## .. `Bridge Highway Direction` = col_logical(),
## .. `Road Ramp` = col_logical(),
## .. `Bridge Highway Segment` = col_logical(),
## .. Latitude = col_double(),
## .. Longitude = col_double(),
## .. Location = col_character()
## .. )
# create the POSIXct dates
list <- c("2010","2011","2012","2013","2014","2015")
#creating the POSIXct dates
sorted_nyc$created_date <- mdy_hm(sorted_nyc$created.date)
#creating the year column
sorted_nyc$year <- year(sorted_nyc$created_date)
filter_nyc <- filter(sorted_nyc, year %in% list)
str(filter_nyc)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 12129 obs. of 17 variables:
## $ unique.key : num 29589040 29009121 30524378 30516788 30507929 ...
## $ created.date : chr "12/28/2014 23:52" "10/4/2014 21:03" "5/1/2015 20:01" "4/30/2015 12:04" ...
## $ closed.date : chr "1/12/2016 12:21" "7/25/2016 12:11" "5/18/2015 9:59" "5/18/2015 9:52" ...
## $ agency : chr "DOHMH" "DOHMH" "DOHMH" "DOHMH" ...
## $ complaint.type : chr "Asbestos" "Asbestos" "Asbestos" "Asbestos" ...
## $ descriptor : chr "N/A" "N/A" "N/A" "N/A" ...
## $ address.type : chr "ADDRESS" "ADDRESS" "ADDRESS" "ADDRESS" ...
## $ status : chr "Closed" "Closed" "Closed" "Closed" ...
## $ resolution.description : chr "The Department of Health and Mental Hygiene has contacted the customer and closed the Service Request. If the "| __truncated__ "The Department of Health and Mental Hygiene has reviewed your Service Request. A warning letter has been sent t"| __truncated__ "The Department of Health and Mental Hygiene has investigated the complaint and no violations were cited." "The Department of Health and Mental Hygiene has investigated the complaint and no violations were cited." ...
## $ borough : chr "BRONX" "BRONX" "BRONX" "BRONX" ...
## $ x.coordinate.(state.plane): num 1012103 1018405 1021630 1007998 1015562 ...
## $ y.coordinate.(state.plane): num 260029 241581 249957 241700 241262 ...
## $ park.borough : chr "BRONX" "BRONX" "BRONX" "BRONX" ...
## $ latitude : num 40.9 40.8 40.9 40.8 40.8 ...
## $ longitude : num -73.9 -73.9 -73.9 -73.9 -73.9 ...
## $ created_date : POSIXct, format: "2014-12-28 23:52:00" "2014-10-04 21:03:00" ...
## $ year : num 2014 2014 2015 2015 2015 ...
## - attr(*, "problems")=Classes 'tbl_df', 'tbl' and 'data.frame': 1 obs. of 5 variables:
## ..$ row : int 3617
## ..$ col : chr "Incident Zip"
## ..$ expected: chr "a double"
## ..$ actual : chr "N/A"
## ..$ file : chr "'Asbestos_ComplaintsNY.csv'"
## - attr(*, "spec")=
## .. cols(
## .. `Unique Key` = col_double(),
## .. `Created Date` = col_character(),
## .. `Closed Date` = col_character(),
## .. Agency = col_character(),
## .. `Agency Name` = col_character(),
## .. `Complaint Type` = col_character(),
## .. Descriptor = col_character(),
## .. `Location Type` = col_character(),
## .. `Incident Zip` = col_double(),
## .. `Incident Address` = col_character(),
## .. `Street Name` = col_character(),
## .. `Cross Street 1` = col_character(),
## .. `Cross Street 2` = col_character(),
## .. `Intersection Street 1` = col_character(),
## .. `Intersection Street 2` = col_character(),
## .. `Address Type` = col_character(),
## .. City = col_character(),
## .. Landmark = col_character(),
## .. `Facility Type` = col_character(),
## .. Status = col_character(),
## .. `Due Date` = col_character(),
## .. `Resolution Description` = col_character(),
## .. `Resolution Action Updated Date` = col_character(),
## .. `Community Board` = col_character(),
## .. Borough = col_character(),
## .. `X Coordinate (State Plane)` = col_double(),
## .. `Y Coordinate (State Plane)` = col_double(),
## .. `Park Facility Name` = col_character(),
## .. `Park Borough` = col_character(),
## .. `Vehicle Type` = col_logical(),
## .. `Taxi Company Borough` = col_logical(),
## .. `Taxi Pick Up Location` = col_logical(),
## .. `Bridge Highway Name` = col_logical(),
## .. `Bridge Highway Direction` = col_logical(),
## .. `Road Ramp` = col_logical(),
## .. `Bridge Highway Segment` = col_logical(),
## .. Latitude = col_double(),
## .. Longitude = col_double(),
## .. Location = col_character()
## .. )
Because the dataset has categorical variables, create frequency tables and barcharts to summarize my data.
# create frequency tables
borough_tab <- filter_nyc %>%
with(table(borough)) %>%
prop.table() %>%
addmargins()
borough_tab
## borough
## BRONX BROOKLYN MANHATTAN QUEENS STATEN ISLAND
## 0.1185588260 0.2822986231 0.3658999093 0.1975430786 0.0353697749
## Unspecified Sum
## 0.0003297881 1.0000000000
# create borough table
agency_tab <- filter_nyc %>%
with(table(agency)) %>%
prop.table() %>%
addmargins()
agency_tab
## agency
## DEP DOHMH Sum
## 0.7440844 0.2559156 1.0000000
# Create the percent crosstab
cross<- filter_nyc %>%
with(table(agency, borough)) %>%
prop.table()
cross*(100) %>%
round(2)
## borough
## agency BRONX BROOKLYN MANHATTAN QUEENS STATEN ISLAND
## DEP 6.97501855 20.55404403 29.58199357 14.66732624 2.59708138
## DOHMH 4.88086404 7.67581829 7.00799736 5.08698161 0.93989612
## borough
## agency Unspecified
## DEP 0.03297881
## DOHMH 0.00000000
Summarize these percents from the table in a barplot
DEP: Department of Environmental Protection DOHMHM: Department of Health and Mental Hygiene
barplot(prop.table(cross),xlab='Agency',ylab='Percentages', main="Percentage DEP or DOHMH by Borough",beside=T,col=c("blue","red"), legend=rownames(cross), args.legend = list(x = "topright"))
Complaints by Borough
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 3.6.2
## Loading required package: magrittr
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
complaints_visual <-ggplot(filter_nyc, aes(borough, fill=borough)) +
geom_bar() +
xlab("Borough") +
theme(axis.text.x = element_text(angle = 90)) +
ylab("Count") +
ggtitle("NYC Asbestos Complaints (2010-2015)")
complaints_visual
Place complaint incidents on the map based on latitude and longitude information using the filtered dataset
site_locations <- filter_nyc %>%
leaflet() %>%
addTiles() %>%
addMarkers(clusterOptions = markerClusterOptions(lng = ~longitude, lat = ~latitude, popup = ~unique.key, label = ~unique.key))
## Assuming "longitude" and "latitude" are longitude and latitude, respectively
## Warning in validateCoords(lng, lat, funcName): Data contains 63 rows with either
## missing or invalid lat/lon values and will be ignored
site_locations
Rather than importing this data from a file, use the acs and tigris packages to gain the census data and geospatial data to create a map for household income.
The first step is to create a shapefile containing the census tract. The county and state codes can be found utilizing the geo.lookup() and lookup_code() functions.
geo.lookup("NY")
## state state.name
## 1 36 New York
# Grab the spatial data (tigris)
counties <- c(5, 47, 61, 81, 85)
shapefile <- tracts(state = '36', county=counties, cb=TRUE)
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 3%
|
|=== | 4%
|
|==== | 6%
|
|===== | 7%
|
|====== | 8%
|
|======= | 9%
|
|======= | 10%
|
|======== | 11%
|
|========= | 12%
|
|========= | 13%
|
|========= | 14%
|
|========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============= | 19%
|
|============== | 20%
|
|============== | 21%
|
|=============== | 21%
|
|================ | 23%
|
|================= | 25%
|
|================== | 26%
|
|==================== | 29%
|
|===================== | 30%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 39%
|
|============================ | 39%
|
|============================ | 40%
|
|============================= | 41%
|
|============================== | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 47%
|
|================================== | 49%
|
|==================================== | 51%
|
|===================================== | 53%
|
|======================================= | 55%
|
|======================================= | 56%
|
|========================================= | 59%
|
|========================================== | 60%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 69%
|
|================================================= | 70%
|
|================================================== | 72%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 78%
|
|======================================================= | 79%
|
|========================================================= | 81%
|
|========================================================== | 83%
|
|============================================================ | 85%
|
|============================================================= | 87%
|
|============================================================== | 89%
|
|=============================================================== | 90%
|
|================================================================ | 91%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|=================================================================== | 95%
|
|==================================================================== | 96%
|
|==================================================================== | 97%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 100%
plot(shapefile) #plot this shapefile
The first step to fetching the ACS data is to create a geographic set to grab tabular data. Use the state and county codes from above.
geo<-geo.make(state=36, county=c(5, 47, 61, 81, 85), tract="*")
Fetching the acs data is relatively simple, but requires a key which can be optained from this (link)[https://api.census.gov/data/key_signup.html]. It also requires an endyear and table of the ACS survey. Chose household income data over the 5-year span, ending in 2015 from the Census. This will match with the data available in the complaints dataset.
api.key.install(key="28291533ae4499be26a9471aedefe57fe26e8ed9")
mytable <- mytable <- acs.lookup(endyear=2015, table.number="B19013")
## Warning in acs.lookup(endyear = 2015, table.number = "B19013"): temporarily downloading and using archived XML variable lookup files;
## since this is *much* slower, recommend running
## acs.tables.install()
str(mytable)
## Formal class 'acs.lookup' [package "acs"] with 4 slots
## ..@ endyear: num 2015
## ..@ span : num 5
## ..@ args :List of 7
## .. ..$ endyear : num 2015
## .. ..$ span : num 5
## .. ..$ dataset : chr "acs"
## .. ..$ keyword : symbol
## .. ..$ table.name : symbol
## .. ..$ table.number : chr "B19013"
## .. ..$ case.sensitive: logi TRUE
## ..@ results:'data.frame': 1 obs. of 4 variables:
## .. ..$ variable.code: chr "B19013_001"
## .. ..$ table.number : chr "B19013."
## .. ..$ table.name : chr "B19013. Median Household Income in the Past 12 Months (in 2015 Inflation-Adjusted Dollars)"
## .. ..$ variable.name: chr "Median household income in the past 12 months (in 2015 Inflation-adjusted dollars)"
# Choose the variable of interest
results(mytable)$variable.name
## [1] "Median household income in the past 12 months (in 2015 Inflation-adjusted dollars)"
myvars <- mytable[1]
myspan <- 5
myendyear <- 2015
countylist2 <- as.numeric(counties)
mygeo <- geo.make(state=36, county=countylist2, tract="*")
api.key.install(key="c3b5bdf35c5d95902f2ab45e532daebf326fef66")
mydata <- acs.fetch(endyear=myendyear, span=myspan, geography=mygeo, variable=myvars)
## Warning in acs.fetch(endyear = endyear, span = span, geography =
## geography[[1]], : NAs introduced by coercion
## Warning in acs.fetch(endyear = endyear, span = span, geography =
## geography[[1]], : NAs introduced by coercion
## Warning in acs.fetch(endyear = endyear, span = span, geography =
## geography[[1]], : NAs introduced by coercion
# Clean the data
acsgeoid <- paste0(as.character(mydata@geography$state),'0',
as.character(mydata@geography$county),
as.character(mydata@geography$tract))
# Create a dataframe
mydatadf <- data.frame(acsgeoid, mydata@estimate)
colnames(mydatadf)=c("GEOID", "medianincome")
mydatadf2 <- filter(mydatadf, medianincome>0)
head(mydatadf2)
## GEOID medianincome
## 1 3605000200 72034
## 2 3605000400 74836
## 3 3605001600 32312
## 4 3605001900 37936
## 5 3605002000 18086
## 6 3605002300 14479
# Join the data
mydatamerged <- geo_join(shapefile, mydatadf2, "GEOID", "GEOID")
df <- mydatamerged
# Create the popup
mypopup <- paste0("GEOID: ", df$GEOID, "<br>", "Median Income: $", round(df$medianincome,0))
# Set the pallete
mypal <- colorNumeric(
palette = "YlGnBu",
domain = df$medianincome
)
mymap<-leaflet() %>%
addProviderTiles("CartoDB.Positron") %>%
addPolygons(data = df,
fillColor = ~mypal(medianincome),
color = "#b2aeae", # you need to use hex colors
fillOpacity = 0.7,
weight = 1,
smoothFactor = 0.2,
popup = mypopup) %>%
addLegend(pal = mypal,
values = df$medianincome,
position = "bottomright",
title = "Median Income",
labFormat = labelFormat(prefix = "$"))
mymap
map4 <- leaflet() %>%
addProviderTiles("CartoDB.Positron") %>%
addMarkers(data = filter_nyc,
clusterOptions = markerClusterOptions(lng = ~longitude, lat = ~latitude, popup = ~unique.key, label = ~unique.key, group="Complaints")) %>%
addPolygons(data = df,
fillColor = ~mypal(medianincome),
color = "#b2aeae", # you need to use hex colors
fillOpacity = 0.7,
weight = 1,
smoothFactor = 0.2,
popup = mypopup,
group ="Income") %>%
addLegend(pal = mypal,
values = df$medianincome,
position = "bottomright",
title = "Median Income",
labFormat = labelFormat(prefix = "$")) %>%
addLayersControl(overlayGroups = c("Complaints", "Income"), options = layersControlOptions(collapsed = FALSE))
## Assuming "longitude" and "latitude" are longitude and latitude, respectively
## Warning in validateCoords(lng, lat, funcName): Data contains 63 rows with either
## missing or invalid lat/lon values and will be ignored
map4