NYC Open Data Asbestos Complaints

Introduction

This project is adapted from the work of Lucy Murray, a past student in the Data Visualization class.

This dataset is from NYC OpenData, https://opendata.cityofnewyork.us/ , regarding asbestos complaints received by the Department of Environmental Protection (DEP) and the Department of Health and Mental Hygiene (DOHMH) from 2010 to present. This also follows the tutorial, “Manipulating and mapping US Census data in R using the acs, tigris and leaflet packages by ZevRoss as well as the”Census Mapping Tutorial tutorial by Laura Krull lkrull and Jeff Rosenblum.

Read in packages

library(tidyverse)

## -- Attaching packages ---------------------------------------------------- tidyverse 1.2.1 --

## v ggplot2 3.2.1     v purrr   0.3.3
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   1.0.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0

## -- Conflicts ------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(rgdal)

## Loading required package: sp

## rgdal: version: 1.4-7, (SVN revision 845)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
##  Path to GDAL shared files: C:/Users/rsaidi/Documents/R/win-library/3.6/rgdal/gdal
##  GDAL binary built with GEOS: TRUE 
##  Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
##  Path to PROJ.4 shared files: C:/Users/rsaidi/Documents/R/win-library/3.6/rgdal/proj
##  Linking to sp version: 1.3-1

library(leaflet)
library(lubridate)

## 
## Attaching package: 'lubridate'

## The following object is masked from 'package:base':
## 
##     date

library(tigris)

## Warning: package 'tigris' was built under R version 3.6.2

## To enable 
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.

## 
## Attaching package: 'tigris'

## The following object is masked from 'package:graphics':
## 
##     plot

library(acs)

## Warning: package 'acs' was built under R version 3.6.2

## Loading required package: XML

## 
## Attaching package: 'acs'

## The following object is masked from 'package:dplyr':
## 
##     combine

## The following object is masked from 'package:base':
## 
##     apply

library(stringr)
library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

library(reshape2)

## 
## Attaching package: 'reshape2'

## The following object is masked from 'package:tidyr':
## 
##     smiths

New York City Asbestos Complaints Data

Read in the Data

# Set the working directory
setwd("C:/Users/rsaidi/Dropbox/Rachel/MontColl/Datasets/Datasets")

# Read in the data
nyc_complaints <- read_csv("Asbestos_ComplaintsNY.csv")

## Parsed with column specification:
## cols(
##   .default = col_character(),
##   `Unique Key` = col_double(),
##   `Incident Zip` = col_double(),
##   `X Coordinate (State Plane)` = col_double(),
##   `Y Coordinate (State Plane)` = col_double(),
##   `Vehicle Type` = col_logical(),
##   `Taxi Company Borough` = col_logical(),
##   `Taxi Pick Up Location` = col_logical(),
##   `Bridge Highway Name` = col_logical(),
##   `Bridge Highway Direction` = col_logical(),
##   `Road Ramp` = col_logical(),
##   `Bridge Highway Segment` = col_logical(),
##   Latitude = col_double(),
##   Longitude = col_double()
## )

## See spec(...) for full column specifications.

## Warning: 1 parsing failure.
##  row          col expected actual                        file
## 3617 Incident Zip a double    N/A 'Asbestos_ComplaintsNY.csv'

# Look at the structure of the dataset
str(nyc_complaints)

## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 20433 obs. of  39 variables:
##  $ Unique Key                    : num  45646394 45642978 45644436 45645315 45642189 ...
##  $ Created Date                  : chr  "2/18/2020 13:54" "2/18/2020 9:54" "2/18/2020 7:37" "2/18/2020 12:58" ...
##  $ Closed Date                   : chr  NA NA NA "2/18/2020 13:15" ...
##  $ Agency                        : chr  "DOHMH" "DEP" "DEP" "DOHMH" ...
##  $ Agency Name                   : chr  "Department of Health and Mental Hygiene" "Department of Environmental Protection" "Department of Environmental Protection" "Department of Health and Mental Hygiene" ...
##  $ Complaint Type                : chr  "Asbestos" "Asbestos" "Asbestos" "Asbestos" ...
##  $ Descriptor                    : chr  "N/A" "Asbestos Complaint (B1)" "Asbestos Complaint (B1)" "N/A" ...
##  $ Location Type                 : chr  "3+ Family Apartment Building" NA NA "1-2 Family Dwelling" ...
##  $ Incident Zip                  : num  10027 11233 10026 10016 10021 ...
##  $ Incident Address              : chr  "410 ST NICHOLAS AVENUE" "473 BAINBRIDGE STREET" "1893 7 AVENUE" "145 EAST   27 STREET" ...
##  $ Street Name                   : chr  "ST NICHOLAS AVENUE" "BAINBRIDGE STREET" "7 AVENUE" "EAST   27 STREET" ...
##  $ Cross Street 1                : chr  "WEST  130 STREET" "HOWARD AVE" "W 114 ST" "LEXINGTON AVENUE" ...
##  $ Cross Street 2                : chr  "WEST  133 STREET" "SARATOGA AVE" "W 115 ST" "BROADWAY ALLEY" ...
##  $ Intersection Street 1         : chr  "WEST  130 STREET" NA NA "LEXINGTON AVENUE" ...
##  $ Intersection Street 2         : chr  "WEST  133 STREET" NA NA "BROADWAY ALLEY" ...
##  $ Address Type                  : chr  NA "ADDRESS" "ADDRESS" "ADDRESS" ...
##  $ City                          : chr  "NEW YORK" "BROOKLYN" "NEW YORK" "NEW YORK" ...
##  $ Landmark                      : chr  "ST NICHOLAS AVENUE" NA NA "EAST   27 STREET" ...
##  $ Facility Type                 : chr  NA "N/A" "N/A" NA ...
##  $ Status                        : chr  "In Progress" "Open" "Open" "Closed" ...
##  $ Due Date                      : chr  NA NA NA NA ...
##  $ Resolution Description        : chr  NA NA NA NA ...
##  $ Resolution Action Updated Date: chr  NA NA NA "2/18/2020 13:15" ...
##  $ Community Board               : chr  "10 MANHATTAN" "03 BROOKLYN" "10 MANHATTAN" "06 MANHATTAN" ...
##  $ Borough                       : chr  "MANHATTAN" "BROOKLYN" "MANHATTAN" "MANHATTAN" ...
##  $ X Coordinate (State Plane)    : num  998223 1006952 997205 989069 995440 ...
##  $ Y Coordinate (State Plane)    : num  235925 187926 231569 209574 219561 ...
##  $ Park Facility Name            : chr  "Unspecified" "Unspecified" "Unspecified" "Unspecified" ...
##  $ Park Borough                  : chr  "MANHATTAN" "BROOKLYN" "MANHATTAN" "MANHATTAN" ...
##  $ Vehicle Type                  : logi  NA NA NA NA NA NA ...
##  $ Taxi Company Borough          : logi  NA NA NA NA NA NA ...
##  $ Taxi Pick Up Location         : logi  NA NA NA NA NA NA ...
##  $ Bridge Highway Name           : logi  NA NA NA NA NA NA ...
##  $ Bridge Highway Direction      : logi  NA NA NA NA NA NA ...
##  $ Road Ramp                     : logi  NA NA NA NA NA NA ...
##  $ Bridge Highway Segment        : logi  NA NA NA NA NA NA ...
##  $ Latitude                      : num  40.8 40.7 40.8 40.7 40.8 ...
##  $ Longitude                     : num  -73.9 -73.9 -74 -74 -74 ...
##  $ Location                      : chr  "(40.814224320719624, -73.94952055422276)" "(40.68246111285433, -73.91814784809289)" "(40.802269859152865, -73.95320662932198)" "(40.74190751693278, -73.98260957594246)" ...
##  - attr(*, "problems")=Classes 'tbl_df', 'tbl' and 'data.frame': 1 obs. of  5 variables:
##   ..$ row     : int 3617
##   ..$ col     : chr "Incident Zip"
##   ..$ expected: chr "a double"
##   ..$ actual  : chr "N/A"
##   ..$ file    : chr "'Asbestos_ComplaintsNY.csv'"
##  - attr(*, "spec")=
##   .. cols(
##   ..   `Unique Key` = col_double(),
##   ..   `Created Date` = col_character(),
##   ..   `Closed Date` = col_character(),
##   ..   Agency = col_character(),
##   ..   `Agency Name` = col_character(),
##   ..   `Complaint Type` = col_character(),
##   ..   Descriptor = col_character(),
##   ..   `Location Type` = col_character(),
##   ..   `Incident Zip` = col_double(),
##   ..   `Incident Address` = col_character(),
##   ..   `Street Name` = col_character(),
##   ..   `Cross Street 1` = col_character(),
##   ..   `Cross Street 2` = col_character(),
##   ..   `Intersection Street 1` = col_character(),
##   ..   `Intersection Street 2` = col_character(),
##   ..   `Address Type` = col_character(),
##   ..   City = col_character(),
##   ..   Landmark = col_character(),
##   ..   `Facility Type` = col_character(),
##   ..   Status = col_character(),
##   ..   `Due Date` = col_character(),
##   ..   `Resolution Description` = col_character(),
##   ..   `Resolution Action Updated Date` = col_character(),
##   ..   `Community Board` = col_character(),
##   ..   Borough = col_character(),
##   ..   `X Coordinate (State Plane)` = col_double(),
##   ..   `Y Coordinate (State Plane)` = col_double(),
##   ..   `Park Facility Name` = col_character(),
##   ..   `Park Borough` = col_character(),
##   ..   `Vehicle Type` = col_logical(),
##   ..   `Taxi Company Borough` = col_logical(),
##   ..   `Taxi Pick Up Location` = col_logical(),
##   ..   `Bridge Highway Name` = col_logical(),
##   ..   `Bridge Highway Direction` = col_logical(),
##   ..   `Road Ramp` = col_logical(),
##   ..   `Bridge Highway Segment` = col_logical(),
##   ..   Latitude = col_double(),
##   ..   Longitude = col_double(),
##   ..   Location = col_character()
##   .. )

head(nyc_complaints)

## # A tibble: 6 x 39
##   `Unique Key` `Created Date` `Closed Date` Agency `Agency Name`
##          <dbl> <chr>          <chr>         <chr>  <chr>        
## 1     45646394 2/18/2020 13:~ <NA>          DOHMH  Department o~
## 2     45642978 2/18/2020 9:54 <NA>          DEP    Department o~
## 3     45644436 2/18/2020 7:37 <NA>          DEP    Department o~
## 4     45645315 2/18/2020 12:~ 2/18/2020 13~ DOHMH  Department o~
## 5     45642189 2/18/2020 15:~ <NA>          DEP    Department o~
## 6     45639603 2/18/2020 10:~ <NA>          DOHMH  Department o~
## # ... with 34 more variables: `Complaint Type` <chr>, Descriptor <chr>,
## #   `Location Type` <chr>, `Incident Zip` <dbl>, `Incident Address` <chr>,
## #   `Street Name` <chr>, `Cross Street 1` <chr>, `Cross Street 2` <chr>,
## #   `Intersection Street 1` <chr>, `Intersection Street 2` <chr>, `Address
## #   Type` <chr>, City <chr>, Landmark <chr>, `Facility Type` <chr>,
## #   Status <chr>, `Due Date` <chr>, `Resolution Description` <chr>, `Resolution
## #   Action Updated Date` <chr>, `Community Board` <chr>, Borough <chr>, `X
## #   Coordinate (State Plane)` <dbl>, `Y Coordinate (State Plane)` <dbl>, `Park
## #   Facility Name` <chr>, `Park Borough` <chr>, `Vehicle Type` <lgl>, `Taxi
## #   Company Borough` <lgl>, `Taxi Pick Up Location` <lgl>, `Bridge Highway
## #   Name` <lgl>, `Bridge Highway Direction` <lgl>, `Road Ramp` <lgl>, `Bridge
## #   Highway Segment` <lgl>, Latitude <dbl>, Longitude <dbl>, Location <chr>

Clean the Complaints data

Remove spaces in the headers of the dataset and make all variables lower case

names(nyc_complaints) <- gsub(" ",".",names(nyc_complaints))
names(nyc_complaints) <- tolower(names(nyc_complaints))
head(nyc_complaints)

## # A tibble: 6 x 39
##   unique.key created.date closed.date agency agency.name complaint.type
##        <dbl> <chr>        <chr>       <chr>  <chr>       <chr>         
## 1   45646394 2/18/2020 1~ <NA>        DOHMH  Department~ Asbestos      
## 2   45642978 2/18/2020 9~ <NA>        DEP    Department~ Asbestos      
## 3   45644436 2/18/2020 7~ <NA>        DEP    Department~ Asbestos      
## 4   45645315 2/18/2020 1~ 2/18/2020 ~ DOHMH  Department~ Asbestos      
## 5   45642189 2/18/2020 1~ <NA>        DEP    Department~ Asbestos      
## 6   45639603 2/18/2020 1~ <NA>        DOHMH  Department~ Asbestos      
## # ... with 33 more variables: descriptor <chr>, location.type <chr>,
## #   incident.zip <dbl>, incident.address <chr>, street.name <chr>,
## #   cross.street.1 <chr>, cross.street.2 <chr>, intersection.street.1 <chr>,
## #   intersection.street.2 <chr>, address.type <chr>, city <chr>,
## #   landmark <chr>, facility.type <chr>, status <chr>, due.date <chr>,
## #   resolution.description <chr>, resolution.action.updated.date <chr>,
## #   community.board <chr>, borough <chr>, `x.coordinate.(state.plane)` <dbl>,
## #   `y.coordinate.(state.plane)` <dbl>, park.facility.name <chr>,
## #   park.borough <chr>, vehicle.type <lgl>, taxi.company.borough <lgl>,
## #   taxi.pick.up.location <lgl>, bridge.highway.name <lgl>,
## #   bridge.highway.direction <lgl>, road.ramp <lgl>,
## #   bridge.highway.segment <lgl>, latitude <dbl>, longitude <dbl>,
## #   location <chr>

Remove unnecessary columns from the data set, so it will be easier to work with. Only look at complaints from the years 2010-2015, as this will match the census data we will use. Therefore, we will create a year column. Since complaints were generally created and closed in the same years, we will use date created as the new “year” column.

# Remove unwanted columns from the nyc_complaints dataset

sorted_nyc <- select(nyc_complaints, -(agency.name), -(incident.zip), -(incident.address), -(street.name), -(cross.street.1), -(cross.street.2), -(intersection.street.1), -(intersection.street.2), -(city), -(landmark), (facility.type), -(due.date), -(resolution.action.updated.date), -(community.board), -(park.facility.name), -(vehicle.type), -(taxi.company.borough), -(taxi.pick.up.location), -(bridge.highway.name), -(bridge.highway.direction), -(road.ramp), -(bridge.highway.segment), -(location), -(facility.type), -(location.type))

str(sorted_nyc)

## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 20433 obs. of  15 variables:
##  $ unique.key                : num  45646394 45642978 45644436 45645315 45642189 ...
##  $ created.date              : chr  "2/18/2020 13:54" "2/18/2020 9:54" "2/18/2020 7:37" "2/18/2020 12:58" ...
##  $ closed.date               : chr  NA NA NA "2/18/2020 13:15" ...
##  $ agency                    : chr  "DOHMH" "DEP" "DEP" "DOHMH" ...
##  $ complaint.type            : chr  "Asbestos" "Asbestos" "Asbestos" "Asbestos" ...
##  $ descriptor                : chr  "N/A" "Asbestos Complaint (B1)" "Asbestos Complaint (B1)" "N/A" ...
##  $ address.type              : chr  NA "ADDRESS" "ADDRESS" "ADDRESS" ...
##  $ status                    : chr  "In Progress" "Open" "Open" "Closed" ...
##  $ resolution.description    : chr  NA NA NA NA ...
##  $ borough                   : chr  "MANHATTAN" "BROOKLYN" "MANHATTAN" "MANHATTAN" ...
##  $ x.coordinate.(state.plane): num  998223 1006952 997205 989069 995440 ...
##  $ y.coordinate.(state.plane): num  235925 187926 231569 209574 219561 ...
##  $ park.borough              : chr  "MANHATTAN" "BROOKLYN" "MANHATTAN" "MANHATTAN" ...
##  $ latitude                  : num  40.8 40.7 40.8 40.7 40.8 ...
##  $ longitude                 : num  -73.9 -73.9 -74 -74 -74 ...
##  - attr(*, "problems")=Classes 'tbl_df', 'tbl' and 'data.frame': 1 obs. of  5 variables:
##   ..$ row     : int 3617
##   ..$ col     : chr "Incident Zip"
##   ..$ expected: chr "a double"
##   ..$ actual  : chr "N/A"
##   ..$ file    : chr "'Asbestos_ComplaintsNY.csv'"
##  - attr(*, "spec")=
##   .. cols(
##   ..   `Unique Key` = col_double(),
##   ..   `Created Date` = col_character(),
##   ..   `Closed Date` = col_character(),
##   ..   Agency = col_character(),
##   ..   `Agency Name` = col_character(),
##   ..   `Complaint Type` = col_character(),
##   ..   Descriptor = col_character(),
##   ..   `Location Type` = col_character(),
##   ..   `Incident Zip` = col_double(),
##   ..   `Incident Address` = col_character(),
##   ..   `Street Name` = col_character(),
##   ..   `Cross Street 1` = col_character(),
##   ..   `Cross Street 2` = col_character(),
##   ..   `Intersection Street 1` = col_character(),
##   ..   `Intersection Street 2` = col_character(),
##   ..   `Address Type` = col_character(),
##   ..   City = col_character(),
##   ..   Landmark = col_character(),
##   ..   `Facility Type` = col_character(),
##   ..   Status = col_character(),
##   ..   `Due Date` = col_character(),
##   ..   `Resolution Description` = col_character(),
##   ..   `Resolution Action Updated Date` = col_character(),
##   ..   `Community Board` = col_character(),
##   ..   Borough = col_character(),
##   ..   `X Coordinate (State Plane)` = col_double(),
##   ..   `Y Coordinate (State Plane)` = col_double(),
##   ..   `Park Facility Name` = col_character(),
##   ..   `Park Borough` = col_character(),
##   ..   `Vehicle Type` = col_logical(),
##   ..   `Taxi Company Borough` = col_logical(),
##   ..   `Taxi Pick Up Location` = col_logical(),
##   ..   `Bridge Highway Name` = col_logical(),
##   ..   `Bridge Highway Direction` = col_logical(),
##   ..   `Road Ramp` = col_logical(),
##   ..   `Bridge Highway Segment` = col_logical(),
##   ..   Latitude = col_double(),
##   ..   Longitude = col_double(),
##   ..   Location = col_character()
##   .. )

# create the POSIXct dates

list <- c("2010","2011","2012","2013","2014","2015")

#creating the POSIXct dates
sorted_nyc$created_date <- mdy_hm(sorted_nyc$created.date)

#creating the year column
sorted_nyc$year <- year(sorted_nyc$created_date)

filter_nyc <- filter(sorted_nyc, year %in% list)
str(filter_nyc)

## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 12129 obs. of  17 variables:
##  $ unique.key                : num  29589040 29009121 30524378 30516788 30507929 ...
##  $ created.date              : chr  "12/28/2014 23:52" "10/4/2014 21:03" "5/1/2015 20:01" "4/30/2015 12:04" ...
##  $ closed.date               : chr  "1/12/2016 12:21" "7/25/2016 12:11" "5/18/2015 9:59" "5/18/2015 9:52" ...
##  $ agency                    : chr  "DOHMH" "DOHMH" "DOHMH" "DOHMH" ...
##  $ complaint.type            : chr  "Asbestos" "Asbestos" "Asbestos" "Asbestos" ...
##  $ descriptor                : chr  "N/A" "N/A" "N/A" "N/A" ...
##  $ address.type              : chr  "ADDRESS" "ADDRESS" "ADDRESS" "ADDRESS" ...
##  $ status                    : chr  "Closed" "Closed" "Closed" "Closed" ...
##  $ resolution.description    : chr  "The Department of Health and Mental Hygiene has contacted the customer and closed the Service Request.  If the "| __truncated__ "The Department of Health and Mental Hygiene has reviewed your Service Request. A warning letter has been sent t"| __truncated__ "The Department of Health and Mental Hygiene has investigated the complaint and no violations were cited." "The Department of Health and Mental Hygiene has investigated the complaint and no violations were cited." ...
##  $ borough                   : chr  "BRONX" "BRONX" "BRONX" "BRONX" ...
##  $ x.coordinate.(state.plane): num  1012103 1018405 1021630 1007998 1015562 ...
##  $ y.coordinate.(state.plane): num  260029 241581 249957 241700 241262 ...
##  $ park.borough              : chr  "BRONX" "BRONX" "BRONX" "BRONX" ...
##  $ latitude                  : num  40.9 40.8 40.9 40.8 40.8 ...
##  $ longitude                 : num  -73.9 -73.9 -73.9 -73.9 -73.9 ...
##  $ created_date              : POSIXct, format: "2014-12-28 23:52:00" "2014-10-04 21:03:00" ...
##  $ year                      : num  2014 2014 2015 2015 2015 ...
##  - attr(*, "problems")=Classes 'tbl_df', 'tbl' and 'data.frame': 1 obs. of  5 variables:
##   ..$ row     : int 3617
##   ..$ col     : chr "Incident Zip"
##   ..$ expected: chr "a double"
##   ..$ actual  : chr "N/A"
##   ..$ file    : chr "'Asbestos_ComplaintsNY.csv'"
##  - attr(*, "spec")=
##   .. cols(
##   ..   `Unique Key` = col_double(),
##   ..   `Created Date` = col_character(),
##   ..   `Closed Date` = col_character(),
##   ..   Agency = col_character(),
##   ..   `Agency Name` = col_character(),
##   ..   `Complaint Type` = col_character(),
##   ..   Descriptor = col_character(),
##   ..   `Location Type` = col_character(),
##   ..   `Incident Zip` = col_double(),
##   ..   `Incident Address` = col_character(),
##   ..   `Street Name` = col_character(),
##   ..   `Cross Street 1` = col_character(),
##   ..   `Cross Street 2` = col_character(),
##   ..   `Intersection Street 1` = col_character(),
##   ..   `Intersection Street 2` = col_character(),
##   ..   `Address Type` = col_character(),
##   ..   City = col_character(),
##   ..   Landmark = col_character(),
##   ..   `Facility Type` = col_character(),
##   ..   Status = col_character(),
##   ..   `Due Date` = col_character(),
##   ..   `Resolution Description` = col_character(),
##   ..   `Resolution Action Updated Date` = col_character(),
##   ..   `Community Board` = col_character(),
##   ..   Borough = col_character(),
##   ..   `X Coordinate (State Plane)` = col_double(),
##   ..   `Y Coordinate (State Plane)` = col_double(),
##   ..   `Park Facility Name` = col_character(),
##   ..   `Park Borough` = col_character(),
##   ..   `Vehicle Type` = col_logical(),
##   ..   `Taxi Company Borough` = col_logical(),
##   ..   `Taxi Pick Up Location` = col_logical(),
##   ..   `Bridge Highway Name` = col_logical(),
##   ..   `Bridge Highway Direction` = col_logical(),
##   ..   `Road Ramp` = col_logical(),
##   ..   `Bridge Highway Segment` = col_logical(),
##   ..   Latitude = col_double(),
##   ..   Longitude = col_double(),
##   ..   Location = col_character()
##   .. )

Categorical Variable Statistics

Because the dataset has categorical variables, create frequency tables and barcharts to summarize my data.

# create frequency tables
borough_tab <- filter_nyc %>% 
  with(table(borough)) %>% 
  prop.table() %>% 
  addmargins()
borough_tab

## borough
##         BRONX      BROOKLYN     MANHATTAN        QUEENS STATEN ISLAND 
##  0.1185588260  0.2822986231  0.3658999093  0.1975430786  0.0353697749 
##   Unspecified           Sum 
##  0.0003297881  1.0000000000

# create borough table
agency_tab <- filter_nyc %>% 
  with(table(agency)) %>% 
  prop.table() %>% 
  addmargins()
agency_tab

## agency
##       DEP     DOHMH       Sum 
## 0.7440844 0.2559156 1.0000000

# Create the percent crosstab
cross<- filter_nyc %>% 
  with(table(agency, borough)) %>% 
  prop.table()

cross*(100) %>%
  round(2)

##        borough
## agency        BRONX    BROOKLYN   MANHATTAN      QUEENS STATEN ISLAND
##   DEP    6.97501855 20.55404403 29.58199357 14.66732624    2.59708138
##   DOHMH  4.88086404  7.67581829  7.00799736  5.08698161    0.93989612
##        borough
## agency  Unspecified
##   DEP    0.03297881
##   DOHMH  0.00000000

Summarize these percents from the table in a barplot

DEP: Department of Environmental Protection DOHMHM: Department of Health and Mental Hygiene

barplot(prop.table(cross),xlab='Agency',ylab='Percentages', main="Percentage DEP or DOHMH by Borough",beside=T,col=c("blue","red"), legend=rownames(cross), args.legend = list(x = "topright"))

Complaints by Borough

library(ggpubr)

## Warning: package 'ggpubr' was built under R version 3.6.2

## Loading required package: magrittr

## 
## Attaching package: 'magrittr'

## The following object is masked from 'package:purrr':
## 
##     set_names

## The following object is masked from 'package:tidyr':
## 
##     extract

complaints_visual <-ggplot(filter_nyc, aes(borough, fill=borough)) +
 geom_bar() +
  xlab("Borough") +
  theme(axis.text.x = element_text(angle = 90)) +
  ylab("Count") +
  ggtitle("NYC Asbestos Complaints (2010-2015)")
complaints_visual

Place complaint incidents on the map based on latitude and longitude information using the filtered dataset

site_locations <- filter_nyc %>%
  leaflet() %>% 
  addTiles() %>% 
  addMarkers(clusterOptions = markerClusterOptions(lng = ~longitude, lat = ~latitude, popup = ~unique.key, label = ~unique.key))

## Assuming "longitude" and "latitude" are longitude and latitude, respectively

## Warning in validateCoords(lng, lat, funcName): Data contains 63 rows with either
## missing or invalid lat/lon values and will be ignored

site_locations

NYC Census Household Income from 2010-2015 Data

Rather than importing this data from a file, use the acs and tigris packages to gain the census data and geospatial data to create a map for household income.

Fetch the shapefile

The first step is to create a shapefile containing the census tract. The county and state codes can be found utilizing the geo.lookup() and lookup_code() functions.

geo.lookup("NY")

##   state state.name
## 1    36   New York

# Grab the spatial data (tigris)
counties <- c(5, 47, 61, 81, 85)
shapefile <- tracts(state = '36', county=counties, cb=TRUE)

## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |=                                                                     |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |=======                                                               |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |=========                                                             |  14%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |==============                                                        |  21%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=======================                                               |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |============================                                          |  39%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |==============================                                        |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |===============================================                       |  68%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |=================================================================     |  92%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |===================================================================   |  95%
  |                                                                            
  |====================================================================  |  96%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |===================================================================== |  98%
  |                                                                            
  |===================================================================== |  99%
  |                                                                            
  |======================================================================| 100%

plot(shapefile)  #plot this shapefile

Use American Community Survey (ACS) Data from the Census Bureau

The first step to fetching the ACS data is to create a geographic set to grab tabular data. Use the state and county codes from above.

geo<-geo.make(state=36, county=c(5, 47, 61, 81, 85), tract="*")

Fetching the acs data is relatively simple, but requires a key which can be optained from this (link)[https://api.census.gov/data/key_signup.html]. It also requires an endyear and table of the ACS survey. Chose household income data over the 5-year span, ending in 2015 from the Census. This will match with the data available in the complaints dataset.

api.key.install(key="28291533ae4499be26a9471aedefe57fe26e8ed9")
mytable <- mytable <- acs.lookup(endyear=2015, table.number="B19013")

## Warning in acs.lookup(endyear = 2015, table.number = "B19013"): temporarily downloading and using archived XML variable lookup files;
##   since this is *much* slower, recommend running
##   acs.tables.install()

str(mytable)

## Formal class 'acs.lookup' [package "acs"] with 4 slots
##   ..@ endyear: num 2015
##   ..@ span   : num 5
##   ..@ args   :List of 7
##   .. ..$ endyear       : num 2015
##   .. ..$ span          : num 5
##   .. ..$ dataset       : chr "acs"
##   .. ..$ keyword       : symbol 
##   .. ..$ table.name    : symbol 
##   .. ..$ table.number  : chr "B19013"
##   .. ..$ case.sensitive: logi TRUE
##   ..@ results:'data.frame':  1 obs. of  4 variables:
##   .. ..$ variable.code: chr "B19013_001"
##   .. ..$ table.number : chr "B19013."
##   .. ..$ table.name   : chr "B19013. Median Household Income in the Past 12 Months (in 2015 Inflation-Adjusted Dollars)"
##   .. ..$ variable.name: chr "Median household income in the past 12 months (in 2015 Inflation-adjusted dollars)"

# Choose the variable of interest
results(mytable)$variable.name

## [1] "Median household income in the past 12 months (in 2015 Inflation-adjusted dollars)"

Create values to fetch data

myvars <- mytable[1] 
myspan <- 5
myendyear <- 2015
countylist2 <- as.numeric(counties)
mygeo <- geo.make(state=36, county=countylist2, tract="*")

Join the tabulation and geospatial information, so that I can create a map.

api.key.install(key="c3b5bdf35c5d95902f2ab45e532daebf326fef66")
mydata <- acs.fetch(endyear=myendyear, span=myspan, geography=mygeo, variable=myvars)

## Warning in acs.fetch(endyear = endyear, span = span, geography =
## geography[[1]], : NAs introduced by coercion

## Warning in acs.fetch(endyear = endyear, span = span, geography =
## geography[[1]], : NAs introduced by coercion

## Warning in acs.fetch(endyear = endyear, span = span, geography =
## geography[[1]], : NAs introduced by coercion

# Clean the data
acsgeoid <- paste0(as.character(mydata@geography$state),'0',
                   as.character(mydata@geography$county),
                   as.character(mydata@geography$tract))


# Create a dataframe
mydatadf <- data.frame(acsgeoid, mydata@estimate)
colnames(mydatadf)=c("GEOID", "medianincome")
mydatadf2 <- filter(mydatadf, medianincome>0)
head(mydatadf2)

##        GEOID medianincome
## 1 3605000200        72034
## 2 3605000400        74836
## 3 3605001600        32312
## 4 3605001900        37936
## 5 3605002000        18086
## 6 3605002300        14479

Creating the NYC map for median household income up to $250,000

# Join the data
mydatamerged <- geo_join(shapefile, mydatadf2, "GEOID", "GEOID")
df <- mydatamerged

# Create the popup
mypopup <- paste0("GEOID: ", df$GEOID, "<br>", "Median Income: $", round(df$medianincome,0))

# Set the pallete
mypal <- colorNumeric(
  palette = "YlGnBu",
  domain = df$medianincome
)

Create the map

mymap<-leaflet() %>%
  addProviderTiles("CartoDB.Positron") %>%
  addPolygons(data = df, 
              fillColor = ~mypal(medianincome), 
              color = "#b2aeae", # you need to use hex colors
              fillOpacity = 0.7, 
              weight = 1, 
              smoothFactor = 0.2,
              popup = mypopup) %>%
  addLegend(pal = mypal, 
            values = df$medianincome, 
            position = "bottomright", 
            title = "Median Income",
            labFormat = labelFormat(prefix = "$"))
mymap

Combine Asbestos Complaints with Household Income from 2010 to 2015

map4 <- leaflet() %>%
  addProviderTiles("CartoDB.Positron") %>%
  addMarkers(data = filter_nyc,
             clusterOptions = markerClusterOptions(lng = ~longitude, lat = ~latitude, popup = ~unique.key, label = ~unique.key, group="Complaints")) %>%
  addPolygons(data = df, 
              fillColor = ~mypal(medianincome), 
              color = "#b2aeae", # you need to use hex colors
              fillOpacity = 0.7, 
              weight = 1, 
              smoothFactor = 0.2,
              popup = mypopup,
              group ="Income") %>%
  addLegend(pal = mypal, 
            values = df$medianincome, 
            position = "bottomright", 
            title = "Median Income",
            labFormat = labelFormat(prefix = "$")) %>%
  addLayersControl(overlayGroups = c("Complaints", "Income"), options = layersControlOptions(collapsed = FALSE))

## Assuming "longitude" and "latitude" are longitude and latitude, respectively

## Warning in validateCoords(lng, lat, funcName): Data contains 63 rows with either
## missing or invalid lat/lon values and will be ignored

map4