Final Project

Introduction

I decided to look at this dataset from NYC OpenData regarding asbestos complaints received by the Department of Environmental Protection (DEP) and the Department of Health and Mental Hygiene (DOHMH) from 2010 to present. I also decided to follow parts of the "Manipulating and mapping US Census data in R using the acs, tigris and leaflet packages by ZevRoss as well as the "Census Mapping Tutorial tutorial by Laura Krull lkrull and Jeff Rosenblum.

I will be using functions from a couple of different libraries. They can be installed with install.packages() if it is not already on the device.

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(rgdal)

## Loading required package: sp

## rgdal: version: 1.4-4, (SVN revision 833)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
##  Path to GDAL shared files: C:/Users/murrayl/OneDrive - KCIC/Documents/R/win-library/3.6/rgdal/gdal
##  GDAL binary built with GEOS: TRUE 
##  Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
##  Path to PROJ.4 shared files: C:/Users/murrayl/OneDrive - KCIC/Documents/R/win-library/3.6/rgdal/proj
##  Linking to sp version: 1.3-1

library(ggplot2)
library(leaflet)
library(lubridate)

## 
## Attaching package: 'lubridate'

## The following object is masked from 'package:base':
## 
##     date

library(tigris)

## To enable 
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.

## 
## Attaching package: 'tigris'

## The following object is masked from 'package:graphics':
## 
##     plot

library(acs)

## Loading required package: stringr

## Loading required package: XML

## 
## Attaching package: 'acs'

## The following object is masked from 'package:dplyr':
## 
##     combine

## The following object is masked from 'package:base':
## 
##     apply

library(stringr)
library(ggplot2)
library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

New York City Asbestos Complaints Data

Reading the Data

#reading the data
nyc_complaints <- read.csv("Asbestos_Complaints.csv", stringsAsFactors = FALSE)

#observing nyc_complaints
str(nyc_complaints)

## 'data.frame':    19601 obs. of  39 variables:
##  $ Unique.Key                    : int  43416920 43397586 43319486 43284428 43264741 43100492 42935089 42805214 42122223 43427094 ...
##  $ Created.Date                  : chr  "07/30/2019 10:14:00 AM" "07/27/2019 02:54:00 PM" "07/18/2019 05:04:00 PM" "07/15/2019 11:40:00 AM" ...
##  $ Closed.Date                   : chr  "07/30/2019 10:20:00 AM" "07/27/2019 03:01:00 PM" "07/18/2019 05:10:00 PM" "07/15/2019 12:30:00 PM" ...
##  $ Agency                        : chr  "DEP" "DEP" "DEP" "DEP" ...
##  $ Agency.Name                   : chr  "Department of Environmental Protection" "Department of Environmental Protection" "Department of Environmental Protection" "Department of Environmental Protection" ...
##  $ Complaint.Type                : chr  "Asbestos" "Asbestos" "Asbestos" "Asbestos" ...
##  $ Descriptor                    : chr  "Asbestos Complaint (B1)" "Asbestos Complaint (B1)" "Asbestos Complaint (B1)" "Asbestos Complaint (B1)" ...
##  $ Location.Type                 : chr  "" "" "" "" ...
##  $ Incident.Zip                  : chr  "11411" "11217" "10006" "11692" ...
##  $ Incident.Address              : chr  "114-28 212 STREET" "449 PACIFIC STREET" "" "328 BEACH   63 STREET" ...
##  $ Street.Name                   : chr  "212 STREET" "PACIFIC STREET" "" "BEACH   63 STREET" ...
##  $ Cross.Street.1                : chr  "COLFAX ST" "BOND ST" "LIBERTY ST" "DEAD END" ...
##  $ Cross.Street.2                : chr  "NASHVILLE BLVD" "NEVINS ST" "CHURCH ST" "BEACH CHANNEL DR" ...
##  $ Intersection.Street.1         : chr  "" "" "LIBERTY STREET" "" ...
##  $ Intersection.Street.2         : chr  "" "" "CHURCH STREET" "" ...
##  $ Address.Type                  : chr  "ADDRESS" "ADDRESS" "INTERSECTION" "ADDRESS" ...
##  $ City                          : chr  "Cambria Heights" "BROOKLYN" "NEW YORK" "Arverne" ...
##  $ Landmark                      : chr  "" "" "" "" ...
##  $ Facility.Type                 : chr  "N/A" "N/A" "N/A" "N/A" ...
##  $ Status                        : chr  "Closed" "Closed" "Closed" "Closed" ...
##  $ Due.Date                      : chr  "" "" "" "" ...
##  $ Resolution.Description        : chr  "The Department of Environmental Protection determined that this complaint is a duplicate to another service request." "The Department of Environmental Protection determined that this complaint is a duplicate to another service request." "The Department of Environmental Protection spoke to the complainant via telephone and was able to resolve the c"| __truncated__ "The Department of Environmental Protection spoke to the complainant via telephone and was able to resolve the c"| __truncated__ ...
##  $ Resolution.Action.Updated.Date: chr  "07/30/2019 10:20:00 AM" "07/27/2019 03:01:00 PM" "07/18/2019 05:10:00 PM" "07/15/2019 12:30:00 PM" ...
##  $ Community.Board               : chr  "13 QUEENS" "02 BROOKLYN" "01 MANHATTAN" "14 QUEENS" ...
##  $ Borough                       : chr  "QUEENS" "BROOKLYN" "MANHATTAN" "QUEENS" ...
##  $ X.Coordinate..State.Plane.    : int  1055548 988836 981014 1041953 1001675 1012468 987636 1008936 1031238 1001658 ...
##  $ Y.Coordinate..State.Plane.    : int  195199 189061 197835 155162 241493 211804 204662 185447 216246 213650 ...
##  $ Park.Facility.Name            : chr  "Unspecified" "Unspecified" "Unspecified" "Unspecified" ...
##  $ Park.Borough                  : chr  "QUEENS" "BROOKLYN" "MANHATTAN" "QUEENS" ...
##  $ Vehicle.Type                  : logi  NA NA NA NA NA NA ...
##  $ Taxi.Company.Borough          : logi  NA NA NA NA NA NA ...
##  $ Taxi.Pick.Up.Location         : logi  NA NA NA NA NA NA ...
##  $ Bridge.Highway.Name           : logi  NA NA NA NA NA NA ...
##  $ Bridge.Highway.Direction      : logi  NA NA NA NA NA NA ...
##  $ Road.Ramp                     : logi  NA NA NA NA NA NA ...
##  $ Bridge.Highway.Segment        : logi  NA NA NA NA NA NA ...
##  $ Latitude                      : num  40.7 40.7 40.7 40.6 40.8 ...
##  $ Longitude                     : num  -73.7 -74 -74 -73.8 -73.9 ...
##  $ Location                      : chr  "(40.7021656966259, -73.74285846196767)" "(40.685604353374636, -73.98346438153149)" "(40.70968753652052, -74.01167217483717)" "(40.59237221211839, -73.79223212100901)" ...

head(nyc_complaints)

##   Unique.Key           Created.Date            Closed.Date Agency
## 1   43416920 07/30/2019 10:14:00 AM 07/30/2019 10:20:00 AM    DEP
## 2   43397586 07/27/2019 02:54:00 PM 07/27/2019 03:01:00 PM    DEP
## 3   43319486 07/18/2019 05:04:00 PM 07/18/2019 05:10:00 PM    DEP
## 4   43284428 07/15/2019 11:40:00 AM 07/15/2019 12:30:00 PM    DEP
## 5   43264741 07/12/2019 05:45:00 PM 07/12/2019 06:00:00 PM    DEP
## 6   43100492 06/26/2019 09:37:00 PM 06/27/2019 10:00:00 AM    DEP
##                              Agency.Name Complaint.Type
## 1 Department of Environmental Protection       Asbestos
## 2 Department of Environmental Protection       Asbestos
## 3 Department of Environmental Protection       Asbestos
## 4 Department of Environmental Protection       Asbestos
## 5 Department of Environmental Protection       Asbestos
## 6 Department of Environmental Protection       Asbestos
##                Descriptor Location.Type Incident.Zip      Incident.Address
## 1 Asbestos Complaint (B1)                      11411     114-28 212 STREET
## 2 Asbestos Complaint (B1)                      11217    449 PACIFIC STREET
## 3 Asbestos Complaint (B1)                      10006                      
## 4 Asbestos Complaint (B1)                      11692 328 BEACH   63 STREET
## 5 Asbestos Complaint (B1)                      10039         2937 8 AVENUE
## 6 Asbestos Complaint (B1)                      11377       37-16 65 STREET
##         Street.Name                   Cross.Street.1
## 1        212 STREET                        COLFAX ST
## 2    PACIFIC STREET                          BOND ST
## 3                                         LIBERTY ST
## 4 BEACH   63 STREET                         DEAD END
## 5          8 AVENUE MACOMBS DAM BRDG PEDESTRIAN PATH
## 6         65 STREET                           37 AVE
##                    Cross.Street.2 Intersection.Street.1
## 1                  NASHVILLE BLVD                      
## 2                       NEVINS ST                      
## 3                       CHURCH ST        LIBERTY STREET
## 4                BEACH CHANNEL DR                      
## 5 HARLEM RIVER DRIVE SERVICE RD W                      
## 6                           37 RD                      
##   Intersection.Street.2 Address.Type            City Landmark
## 1                            ADDRESS Cambria Heights         
## 2                            ADDRESS        BROOKLYN         
## 3         CHURCH STREET INTERSECTION        NEW YORK         
## 4                            ADDRESS         Arverne         
## 5                            ADDRESS        NEW YORK         
## 6                            ADDRESS        Woodside         
##   Facility.Type Status Due.Date
## 1           N/A Closed         
## 2           N/A Closed         
## 3           N/A Closed         
## 4           N/A Closed         
## 5           N/A Closed         
## 6           N/A Closed         
##                                                                                                                                                                                                                                                  Resolution.Description
## 1                                                                                                                                                  The Department of Environmental Protection determined that this complaint is a duplicate to another service request.
## 2                                                                                                                                                  The Department of Environmental Protection determined that this complaint is a duplicate to another service request.
## 3                                                                                                                           The Department of Environmental Protection spoke to the complainant via telephone and was able to resolve the complaint without inspection.
## 4                                                                                                                           The Department of Environmental Protection spoke to the complainant via telephone and was able to resolve the complaint without inspection.
## 5                                                                                                                           The Department of Environmental Protection spoke to the complainant via telephone and was able to resolve the complaint without inspection.
## 6 The Department of Environmental Protection (DEP) has investigated this complaint. This is an active asbestos abatement project that has been filed with DEP. The work is in compliance with all of DEP's rules and regulations and therefore no violation was issued.
##   Resolution.Action.Updated.Date Community.Board   Borough
## 1         07/30/2019 10:20:00 AM       13 QUEENS    QUEENS
## 2         07/27/2019 03:01:00 PM     02 BROOKLYN  BROOKLYN
## 3         07/18/2019 05:10:00 PM    01 MANHATTAN MANHATTAN
## 4         07/15/2019 12:30:00 PM       14 QUEENS    QUEENS
## 5         07/12/2019 06:00:00 PM    10 MANHATTAN MANHATTAN
## 6         06/27/2019 10:00:00 AM       02 QUEENS    QUEENS
##   X.Coordinate..State.Plane. Y.Coordinate..State.Plane. Park.Facility.Name
## 1                    1055548                     195199        Unspecified
## 2                     988836                     189061        Unspecified
## 3                     981014                     197835        Unspecified
## 4                    1041953                     155162        Unspecified
## 5                    1001675                     241493        Unspecified
## 6                    1012468                     211804        Unspecified
##   Park.Borough Vehicle.Type Taxi.Company.Borough Taxi.Pick.Up.Location
## 1       QUEENS           NA                   NA                    NA
## 2     BROOKLYN           NA                   NA                    NA
## 3    MANHATTAN           NA                   NA                    NA
## 4       QUEENS           NA                   NA                    NA
## 5    MANHATTAN           NA                   NA                    NA
## 6       QUEENS           NA                   NA                    NA
##   Bridge.Highway.Name Bridge.Highway.Direction Road.Ramp
## 1                  NA                       NA        NA
## 2                  NA                       NA        NA
## 3                  NA                       NA        NA
## 4                  NA                       NA        NA
## 5                  NA                       NA        NA
## 6                  NA                       NA        NA
##   Bridge.Highway.Segment Latitude Longitude
## 1                     NA 40.70217 -73.74286
## 2                     NA 40.68560 -73.98346
## 3                     NA 40.70969 -74.01167
## 4                     NA 40.59237 -73.79223
## 5                     NA 40.82950 -73.93704
## 6                     NA 40.74798 -73.89816
##                                   Location
## 1   (40.7021656966259, -73.74285846196767)
## 2 (40.685604353374636, -73.98346438153149)
## 3  (40.70968753652052, -74.01167217483717)
## 4  (40.59237221211839, -73.79223212100901)
## 5  (40.82950076779289, -73.93703525936526)
## 6  (40.74798459930839, -73.89815981373029)

Cleaning the Complaints data

I have decided to remove columns I do not need from the data set, so that it will be easier to work with. I also decided to only look at complaints from the years 2010-2015, as this will match the census data I intend to pull. Thus, I needed to create a year column. Since complaints were generally created and closed in the same years, I decided to go by the date created as the new “year” column.

#removing unwanted columns from the nyc_complaints dataset
sorted_nyc <- select(nyc_complaints, -(Agency.Name), -(Incident.Zip), -(Incident.Address), -(Street.Name), -(Cross.Street.1), -(Cross.Street.2), -(Intersection.Street.1), -(Intersection.Street.2), -(City), -(Landmark), (Facility.Type), -(Due.Date), -(Resolution.Action.Updated.Date), -(Community.Board), -(Park.Facility.Name), -(Vehicle.Type), -(Taxi.Company.Borough), -(Taxi.Pick.Up.Location), -(Bridge.Highway.Name), -(Bridge.Highway.Direction), -(Road.Ramp), -(Bridge.Highway.Segment), -(Location), -(Facility.Type), -(Location.Type))

#creating the POSIXct dates
sorted_nyc$created_date <- mdy_hms(nyc_complaints$Created.Date)

#creating the year column
sorted_nyc$year_created <- year(sorted_nyc$created_date)

#filtering by year
list <- c("2010","2011","2012","2013","2014","2015")
filtered_nyc <- filter(sorted_nyc, year_created %in% list)

#checking to see changes
str(filtered_nyc)

## 'data.frame':    12128 obs. of  17 variables:
##  $ Unique.Key                : int  31634762 31090297 31088849 31081633 31075198 31067643 31067619 31020650 30946496 30939510 ...
##  $ Created.Date              : chr  "09/28/2015 05:47:46 PM" "07/16/2015 09:28:00 AM" "07/16/2015 09:33:00 PM" "07/15/2015 10:32:00 AM" ...
##  $ Closed.Date               : chr  "" "" "" "" ...
##  $ Agency                    : chr  "DOHMH" "DEP" "DEP" "DEP" ...
##  $ Complaint.Type            : chr  "Asbestos" "Asbestos" "Asbestos" "Asbestos" ...
##  $ Descriptor                : chr  "N/A" "Asbestos Complaint (B1)" "Asbestos Complaint (B1)" "Asbestos Complaint (B1)" ...
##  $ Address.Type              : chr  "ADDRESS" "ADDRESS" "ADDRESS" "ADDRESS" ...
##  $ Status                    : chr  "Closed" "Started" "Started" "Started" ...
##  $ Resolution.Description    : chr  "The Department of Health and Mental Hygiene will review your complaint to determine appropriate action.  Compla"| __truncated__ "The Department of Environmental Protection has inspected your complaint and determined that further investigati"| __truncated__ "The Department of Environmental Protection has inspected your complaint and determined that further investigati"| __truncated__ "The Department of Environmental Protection has inspected your complaint and determined that further investigati"| __truncated__ ...
##  $ Borough                   : chr  "QUEENS" "BRONX" "BROOKLYN" "BROOKLYN" ...
##  $ X.Coordinate..State.Plane.: int  1031529 1035502 993033 1000072 1002519 986594 987572 990338 1054844 999426 ...
##  $ Y.Coordinate..State.Plane.: int  194802 240034 149494 187033 186860 201226 191296 216099 197888 232901 ...
##  $ Park.Borough              : chr  "QUEENS" "BRONX" "BROOKLYN" "BROOKLYN" ...
##  $ Latitude                  : num  40.7 40.8 40.6 40.7 40.7 ...
##  $ Longitude                 : num  -73.8 -73.8 -74 -73.9 -73.9 ...
##  $ created_date              : POSIXct, format: "2015-09-28 17:47:46" "2015-07-16 09:28:00" ...
##  $ year_created              : num  2015 2015 2015 2015 2015 ...

Categorical Variable Statistics

Because my data set is comrpised of categorical variables, I decided to utilize R to create frequency tables and barcharts to summarize my data.

#creating frequency tables
#table(nyc_complaints$Agency)
#table(nyc_complaints$Borough)

#creating borough table
borough_tab <- filtered_nyc %>% with(table(Borough)) %>% prop.table() %>% addmargins()
borough_tab

## Borough
##         BRONX      BROOKLYN     MANHATTAN        QUEENS STATEN ISLAND 
##  0.1185686016  0.2823218997  0.3659300792  0.1974769129  0.0353726913 
##   Unspecified           Sum 
##  0.0003298153  1.0000000000

#creating borough table
agency_tab <- filtered_nyc %>% with(table(Agency)) %>% prop.table() %>% addmargins()
agency_tab

## Agency
##       DEP     DOHMH       Sum 
## 0.7441458 0.2558542 1.0000000

#creating the crosstab
cross<- filtered_nyc %>% with(table(Agency, Borough)) %>% prop.table() %>% addmargins()
cross

##        Borough
## Agency         BRONX     BROOKLYN    MANHATTAN       QUEENS STATEN ISLAND
##   DEP   0.0697559367 0.2055573879 0.2958443272 0.1466853562  0.0259729551
##   DOHMH 0.0488126649 0.0767645119 0.0700857520 0.0507915567  0.0093997361
##   Sum   0.1185686016 0.2823218997 0.3659300792 0.1974769129  0.0353726913
##        Borough
## Agency   Unspecified          Sum
##   DEP   0.0003298153 0.7441457784
##   DOHMH 0.0000000000 0.2558542216
##   Sum   0.0003298153 1.0000000000

#creating the percent cross table
cross2<- filtered_nyc %>% with(table(Agency, Borough)) %>% prop.table()
cross2*(100) %>% round(2)

##        Borough
## Agency        BRONX    BROOKLYN   MANHATTAN      QUEENS STATEN ISLAND
##   DEP    6.97559367 20.55573879 29.58443272 14.66853562    2.59729551
##   DOHMH  4.88126649  7.67645119  7.00857520  5.07915567    0.93997361
##        Borough
## Agency  Unspecified
##   DEP    0.03298153
##   DOHMH  0.00000000

I can summarize this information in a quickbarplot:

barplot(prop.table(cross2), xlab='Agency',ylab='Percentages',main="Percentage DEP or DOHMH by Borough",beside=T,col=c("darkblue","lightcyan"), legend=rownames(cross2), args.legend = list(x = "topleft"))

Creating a visualization for complaints by Borough

complaints_visual <-ggplot(filtered_nyc, aes(Borough, fill=Borough)) +
 geom_bar() +
  xlab("Borough") +
  ylab("Count") +
  ggtitle("NYC Asbestos Complaints (2010-2015)")
complaints_visual

Creating a visualization for geospatial map for complaints (all data)

site_locations <- nyc_complaints %>%
  leaflet() %>% 
  addTiles() %>% 
  addMarkers(clusterOptions = markerClusterOptions(lng = ~Longitude, lat = ~Latitude, popup = ~Unique.Key, label = ~Unique.Key))

## Assuming "Longitude" and "Latitude" are longitude and latitude, respectively

## Warning in validateCoords(lng, lat, funcName): Data contains 285 rows with
## either missing or invalid lat/lon values and will be ignored

site_locations

Now let’s try it with the filtered data!

site_locations <- filtered_nyc %>%
  leaflet() %>% 
  addTiles() %>% 
  addMarkers(clusterOptions = markerClusterOptions(lng = ~Longitude, lat = ~Latitude, popup = ~Unique.Key, label = ~Unique.Key))

## Assuming "Longitude" and "Latitude" are longitude and latitude, respectively

## Warning in validateCoords(lng, lat, funcName): Data contains 63 rows with
## either missing or invalid lat/lon values and will be ignored

site_locations

NYC Census Household Income from 2010-2015 Data

Rather than importing this data from a file, I have decided to utilize the acs and tigris packages to gain the census data and geospatial data to create a map for household income.

Fetching the shapefile

The first step is to create a shapefile containing the census tract. The county and state codes can be found utilizing the geo.lookup() and lookup_code() functions.

# grab the spatial data (tigris)
counties <- c(5, 47, 61, 81, 85)
shapefile <- tracts(state = '36', county=counties, cb=TRUE)

## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=                                                                |   1%
  |                                                                       
  |=                                                                |   2%
  |                                                                       
  |==                                                               |   3%
  |                                                                       
  |===                                                              |   4%
  |                                                                       
  |===                                                              |   5%
  |                                                                       
  |====                                                             |   6%
  |                                                                       
  |====                                                             |   7%
  |                                                                       
  |=====                                                            |   8%
  |                                                                       
  |======                                                           |  10%
  |                                                                       
  |========                                                         |  12%
  |                                                                       
  |=========                                                        |  13%
  |                                                                       
  |==========                                                       |  15%
  |                                                                       
  |===========                                                      |  17%
  |                                                                       
  |============                                                     |  18%
  |                                                                       
  |=============                                                    |  20%
  |                                                                       
  |==============                                                   |  22%
  |                                                                       
  |===============                                                  |  23%
  |                                                                       
  |================                                                 |  25%
  |                                                                       
  |=================                                                |  27%
  |                                                                       
  |==================                                               |  28%
  |                                                                       
  |===================                                              |  29%
  |                                                                       
  |===================                                              |  30%
  |                                                                       
  |====================                                             |  31%
  |                                                                       
  |=====================                                            |  32%
  |                                                                       
  |=====================                                            |  33%
  |                                                                       
  |======================                                           |  33%
  |                                                                       
  |======================                                           |  34%
  |                                                                       
  |=======================                                          |  35%
  |                                                                       
  |========================                                         |  37%
  |                                                                       
  |=========================                                        |  38%
  |                                                                       
  |==========================                                       |  40%
  |                                                                       
  |===========================                                      |  42%
  |                                                                       
  |============================                                     |  43%
  |                                                                       
  |=============================                                    |  45%
  |                                                                       
  |==============================                                   |  47%
  |                                                                       
  |===============================                                  |  48%
  |                                                                       
  |=================================                                |  50%
  |                                                                       
  |=================================                                |  51%
  |                                                                       
  |==================================                               |  52%
  |                                                                       
  |==================================                               |  53%
  |                                                                       
  |===================================                              |  53%
  |                                                                       
  |===================================                              |  54%
  |                                                                       
  |====================================                             |  55%
  |                                                                       
  |====================================                             |  56%
  |                                                                       
  |=====================================                            |  57%
  |                                                                       
  |=====================================                            |  58%
  |                                                                       
  |======================================                           |  58%
  |                                                                       
  |=======================================                          |  59%
  |                                                                       
  |=======================================                          |  60%
  |                                                                       
  |========================================                         |  61%
  |                                                                       
  |========================================                         |  62%
  |                                                                       
  |=========================================                        |  63%
  |                                                                       
  |==========================================                       |  64%
  |                                                                       
  |==========================================                       |  65%
  |                                                                       
  |===========================================                      |  67%
  |                                                                       
  |============================================                     |  68%
  |                                                                       
  |==============================================                   |  70%
  |                                                                       
  |===============================================                  |  72%
  |                                                                       
  |================================================                 |  73%
  |                                                                       
  |=================================================                |  75%
  |                                                                       
  |=================================================                |  76%
  |                                                                       
  |==================================================               |  77%
  |                                                                       
  |===================================================              |  78%
  |                                                                       
  |====================================================             |  80%
  |                                                                       
  |=====================================================            |  82%
  |                                                                       
  |======================================================           |  83%
  |                                                                       
  |=======================================================          |  85%
  |                                                                       
  |========================================================         |  87%
  |                                                                       
  |=========================================================        |  88%
  |                                                                       
  |===========================================================      |  90%
  |                                                                       
  |============================================================     |  92%
  |                                                                       
  |============================================================     |  93%
  |                                                                       
  |=============================================================    |  93%
  |                                                                       
  |==============================================================   |  95%
  |                                                                       
  |===============================================================  |  97%
  |                                                                       
  |================================================================ |  98%
  |                                                                       
  |=================================================================| 100%

#visualizing what we just grabbed
plot(shapefile)

Fetching ACS Data

The first step to fetching the ACS data, is to create a geographic set to grab tabular data from. I utilized the state and county codes I found before.

geo<-geo.make(state=36, county=c(5, 47, 61, 81, 85), tract="*")

Fetching the acs data is relatively simple, but requires a key which can be optained from this (link)[https://api.census.gov/data/key_signup.html]. It also requires an endyear, span, and table of the ACS survey I am interested in pulling from. I chose household income data over the 5-year span, ending in 2015 from the Census, so that I can match it with the data available in the complaints dataset.

api.key.install(key="c3b5bdf35c5d95902f2ab45e532daebf326fef66")
mytable <- mytable <- acs.lookup(endyear=2015, table.number="B19013")

## Warning in acs.lookup(endyear = 2015, table.number = "B19013"): temporarily downloading and using archived XML variable lookup files;
##   since this is *much* slower, recommend running
##   acs.tables.install()

str(mytable)

## Formal class 'acs.lookup' [package "acs"] with 4 slots
##   ..@ endyear: num 2015
##   ..@ span   : num 5
##   ..@ args   :List of 7
##   .. ..$ endyear       : num 2015
##   .. ..$ span          : num 5
##   .. ..$ dataset       : chr "acs"
##   .. ..$ keyword       : symbol 
##   .. ..$ table.name    : symbol 
##   .. ..$ table.number  : chr "B19013"
##   .. ..$ case.sensitive: logi TRUE
##   ..@ results:'data.frame':  1 obs. of  4 variables:
##   .. ..$ variable.code: chr "B19013_001"
##   .. ..$ table.number : chr "B19013."
##   .. ..$ table.name   : chr "B19013. Median Household Income in the Past 12 Months (in 2015 Inflation-Adjusted Dollars)"
##   .. ..$ variable.name: chr "Median household income in the past 12 months (in 2015 Inflation-adjusted dollars)"

Now, I will choose the column of interest.

results(mytable)$variable.name

## [1] "Median household income in the past 12 months (in 2015 Inflation-adjusted dollars)"

I will now convert the table information into a dataframe.

income_df <- data.frame(paste0(str_pad(mytable@geography$state, 2, "left", pad="0"), 
                             str_pad(mytable@geography$county, 3, "left", pad="0"), 
                             str_pad(mytable@geography$tract, 6, "left", pad="0")), 
                        mytable@estimate[,c("Household Income: Total:",
"Household Income: $200,000 or more")], 
                        stringsAsFactors = FALSE)

## Error in stri_pad_left(string, width, pad = pad): no slot of name "geography" for this object of class "acs.lookup"

I will now create values to fetch my data.

myvars <- mytable[1] 
myspan <- 5
myendyear <- 2015
countylist2 <- as.numeric(counties)
mygeo <- geo.make(state=36, county=countylist2, tract="*")

Now, I will join the tabulation and geospatial information, so that I can create a map.

api.key.install(key="c3b5bdf35c5d95902f2ab45e532daebf326fef66")
mydata <- acs.fetch(endyear=myendyear, span=myspan, geography=mygeo, variable=myvars)

## Warning in acs.fetch(endyear = endyear, span = span, geography =
## geography[[1]], : NAs introduced by coercion

## Warning in acs.fetch(endyear = endyear, span = span, geography =
## geography[[1]], : NAs introduced by coercion

## Warning in acs.fetch(endyear = endyear, span = span, geography =
## geography[[1]], : NAs introduced by coercion

Cleaning the data

#cleaning the data
acsgeoid <- paste0(as.character(mydata@geography$state),'0',
                   as.character(mydata@geography$county),
                   as.character(mydata@geography$tract))


#creating a dataframe
mydatadf <- data.frame(acsgeoid, mydata@estimate)
colnames(mydatadf)=c("GEOID", "medianincome")
mydatadf2 <- filter(mydatadf, medianincome>0)
head(mydatadf2)

##        GEOID medianincome
## 1 3605000200        72034
## 2 3605000400        74836
## 3 3605001600        32312
## 4 3605001900        37936
## 5 3605002000        18086
## 6 3605002300        14479

Creating the NYC map for household income above 200k

#joining the data
mydatamerged <- geo_join(shapefile, mydatadf2, "GEOID", "GEOID")
df <- mydatamerged

#creating the popup
mypopup <- paste0("GEOID: ", df$GEOID, "<br>", "Median Income: $", round(df$medianincome,0))

#setting the pallete
mypal <- colorNumeric(
  palette = "YlGnBu",
  domain = df$medianincome
)

Creating the map:

mymap<-leaflet() %>%
  addProviderTiles("CartoDB.Positron") %>%
  addPolygons(data = df, 
              fillColor = ~mypal(medianincome), 
              color = "#b2aeae", # you need to use hex colors
              fillOpacity = 0.7, 
              weight = 1, 
              smoothFactor = 0.2,
              popup = mypopup) %>%
  addLegend(pal = mypal, 
            values = df$medianincome, 
            position = "bottomright", 
            title = "Median Income",
            labFormat = labelFormat(prefix = "$"))
mymap

Map Combining Asbestos Complaints Data and Household Income Data from 2010-2015

Let’s put it all together! For the final part of my project, I decided to combine both maps to create a visualization of both percentage of houshold income over 200k, and the count of asbestos complaints from 2010 to 2015.

map4 <- leaflet() %>%
  addProviderTiles("CartoDB.Positron") %>%
  addMarkers(data = filtered_nyc,
             clusterOptions = markerClusterOptions(lng = ~Longitude, lat = ~Latitude, popup = ~Unique.Key, label = ~Unique.Key, group="Complaints")) %>%
  addPolygons(data = df, 
              fillColor = ~mypal(medianincome), 
              color = "#b2aeae", # you need to use hex colors
              fillOpacity = 0.7, 
              weight = 1, 
              smoothFactor = 0.2,
              popup = mypopup,
              group ="Income") %>%
  addLegend(pal = mypal, 
            values = df$medianincome, 
            position = "bottomright", 
            title = "Median Income",
            labFormat = labelFormat(prefix = "$")) %>%
  addLayersControl(overlayGroups = c("Complaints", "Income"), options = layersControlOptions(collapsed = FALSE))

## Assuming "Longitude" and "Latitude" are longitude and latitude, respectively

## Warning in validateCoords(lng, lat, funcName): Data contains 63 rows with
## either missing or invalid lat/lon values and will be ignored

map4