1 Preliminaries

1.1 Loading packages

library(tidyverse)
library(lubridate)
library(sf)
library(tmap)
library(pdftools)
library(RSocrata)

1.2 Obtaining data from the Chicago Open Data portal

socrata.file <- "https://data.cityofchicago.org/resource/suj7-cg3j.csv"

vehicle.data <- read.socrata(socrata.file)

head(vehicle.data)

##   creation_date          status completion_date service_request_number
## 1    2011-01-01 Completed - Dup      2011-01-07            11-00002779
## 2    2011-01-01 Completed - Dup      2011-01-20            11-00003001
## 3    2011-01-01 Completed - Dup      2011-01-21            11-00003309
## 4    2011-01-01 Completed - Dup      2011-01-21            11-00003316
## 5    2011-01-01       Completed      2011-01-05            11-00001976
## 6    2011-01-01       Completed      2011-01-05            11-00002291
##       type_of_service_request
## 1 Abandoned Vehicle Complaint
## 2 Abandoned Vehicle Complaint
## 3 Abandoned Vehicle Complaint
## 4 Abandoned Vehicle Complaint
## 5 Abandoned Vehicle Complaint
## 6 Abandoned Vehicle Complaint
##                                         license_plate vehicle_make_model
## 1 REAR PLATE STARTS W/848 AND FRONT PLATE STARTS W/ K              Isuzu
## 2                                             9381880             Toyota
## 3                                          MI S CS860      Jeep/Cherokee
## 4                                           MI SCS860                   
## 5                                             H924236               Ford
## 6                         810 LYB    WISCONSIN PLATES            Mercury
##   vehicle_color current_activity most_recent_action
## 1           Red                                    
## 2        Silver                                    
## 3          Gold                                    
## 4          Gold                                    
## 5         White                                    
## 6         Green                                    
##   how_many_days_has_the_vehicle_been_reported_as_parked_       street_address
## 1                                                     24   5629 N KEDVALE AVE
## 2                                                     NA  2053 N KILBOURN AVE
## 3                                                     NA      736 W BUENA AVE
## 4                                                     NA      736 W BUENA AVE
## 5                                                     60  6059 S KOMENSKY AVE
## 6                                                     NA 4651 S WASHTENAW AVE
##   zip_code x_coordinate y_coordinate ward police_district community_area ssa
## 1    60646      1147717      1937054   39              17             13  NA
## 2    60639      1146056      1913269   31              25             20  NA
## 3    60613      1170576      1928214   46              23              3  NA
## 4    60613      1170576      1928214   46              23              3  NA
## 5    60629      1150408      1864110   13               8             65   3
## 6    60632      1159150      1873712   12               9             58  NA
##   latitude longitude                                     location
## 1 41.98368 -87.73197 POINT (41.983680361597564 -87.7319663736746)
## 2 41.91859 -87.73868 POINT (41.91858774162382 -87.73868431751842)
## 3 41.95861 -87.64888 POINT (41.95860696269331 -87.64887590959788)
## 4 41.95861 -87.64888 POINT (41.95860696269331 -87.64887590959788)
## 5 41.78237 -87.72394 POINT (41.78237428405976 -87.72394038021173)
## 6 41.80864 -87.69163 POINT (41.80863500843091 -87.69162625248853)
##   location_address location_city location_state location_zip
## 1             <NA>          <NA>           <NA>         <NA>
## 2             <NA>          <NA>           <NA>         <NA>
## 3             <NA>          <NA>           <NA>         <NA>
## 4             <NA>          <NA>           <NA>         <NA>
## 5             <NA>          <NA>           <NA>         <NA>
## 6             <NA>          <NA>           <NA>         <NA>

dim(vehicle.data)

## [1] 261486     26

class(vehicle.data$creation_date)

## [1] "POSIXct" "POSIXt"

1.3 Selecting Observations for a Given Time Period

1.3.1 Extracting observations for the desired time period

vehicle.sept16 <- vehicle.data %>% filter(year(creation_date) == 2016) %>%
                  filter(month(creation_date) == 9)
head(vehicle.sept16)

##   creation_date          status completion_date service_request_number
## 1    2016-09-01 Completed - Dup      2016-09-01            16-06192603
## 2    2016-09-01 Completed - Dup      2016-09-01            16-06192662
## 3    2016-09-01 Completed - Dup      2016-09-01            16-06193608
## 4    2016-09-01 Completed - Dup      2016-09-01            16-06194284
## 5    2016-09-01 Completed - Dup      2016-09-01            16-06194594
## 6    2016-09-01 Completed - Dup      2016-09-01            16-06197569
##       type_of_service_request license_plate vehicle_make_model vehicle_color
## 1 Abandoned Vehicle Complaint       UNKNOWN          Chevrolet         White
## 2 Abandoned Vehicle Complaint       UNKNOWN                            Green
## 3 Abandoned Vehicle Complaint        UKNOWN                             Gray
## 4 Abandoned Vehicle Complaint     NO PLATES               Ford          Blue
## 5 Abandoned Vehicle Complaint                                               
## 6 Abandoned Vehicle Complaint                                               
##   current_activity most_recent_action
## 1    FVI - Outcome  Create Work Order
## 2    FVI - Outcome  Create Work Order
## 3    FVI - Outcome  Create Work Order
## 4    FVI - Outcome  Create Work Order
## 5    FVI - Outcome  Create Work Order
## 6    FVI - Outcome  Create Work Order
##   how_many_days_has_the_vehicle_been_reported_as_parked_        street_address
## 1                                                     14        3710 W IOWA ST
## 2                                                     40   5240 S MAYFIELD AVE
## 3                                                      7     8000 S ALBANY AVE
## 4                                                     30  8654 W CATHERINE AVE
## 5                                                     NA 4315 N MONTICELLO AVE
## 6                                                     NA   2241 N MULLIGAN AVE
##   zip_code x_coordinate y_coordinate ward police_district community_area ssa
## 1    60651      1151452      1905748   27              11             23  NA
## 2    60638      1137921      1869254   14               8             56  NA
## 3    60652      1157102      1851405   18               8             70  NA
## 4    60656      1117638      1934535   41              16             76  NA
## 5    60618      1151283      1928434   35              17             16  NA
## 6    60639      1133710      1914324   36              25             19  NA
##   latitude longitude                                      location
## 1 41.89736 -87.71933  POINT (41.89736153676566 -87.71933325878982)
## 2 41.79688 -87.76990 POINT (41.796881421903066 -87.76989633815052)
## 3 41.74792 -87.70005  POINT (41.74792366108626 -87.70004701460941)
## 4 41.97694 -87.84349   POINT (41.97694235046974 -87.8434945723464)
## 5 41.95972 -87.71907  POINT (41.95972327912134 -87.71906810908936)
## 6 41.92154 -87.78402  POINT (41.92154133910697 -87.78401648793171)
##   location_address location_city location_state location_zip
## 1             <NA>          <NA>           <NA>         <NA>
## 2             <NA>          <NA>           <NA>         <NA>
## 3             <NA>          <NA>           <NA>         <NA>
## 4             <NA>          <NA>           <NA>         <NA>
## 5             <NA>          <NA>           <NA>         <NA>
## 6             <NA>          <NA>           <NA>         <NA>

dim(vehicle.sept16)

## [1] 2637   26

1.3.2 Selecting the variables for the final table

Checking name:

names(vehicle.sept16)

##  [1] "creation_date"                                         
##  [2] "status"                                                
##  [3] "completion_date"                                       
##  [4] "service_request_number"                                
##  [5] "type_of_service_request"                               
##  [6] "license_plate"                                         
##  [7] "vehicle_make_model"                                    
##  [8] "vehicle_color"                                         
##  [9] "current_activity"                                      
## [10] "most_recent_action"                                    
## [11] "how_many_days_has_the_vehicle_been_reported_as_parked_"
## [12] "street_address"                                        
## [13] "zip_code"                                              
## [14] "x_coordinate"                                          
## [15] "y_coordinate"                                          
## [16] "ward"                                                  
## [17] "police_district"                                       
## [18] "community_area"                                        
## [19] "ssa"                                                   
## [20] "latitude"                                              
## [21] "longitude"                                             
## [22] "location"                                              
## [23] "location_address"                                      
## [24] "location_city"                                         
## [25] "location_state"                                        
## [26] "location_zip"

Keep some columns for analysis: To keep things simple, we will only keep community_area, latitude and longitude, and turn them into comm, lat and lon. The new data set is vehicles.final. Note that to rename a variable, the new name is listed first, on the left hand side of the equal sign, and the old name is on the right hand side. We check the result with the head command.

vehicles.final <- vehicle.sept16 %>% select(comm = community_area,
                                            lat = latitude, lon = longitude)
head(vehicles.final)

##   comm      lat       lon
## 1   23 41.89736 -87.71933
## 2   56 41.79688 -87.76990
## 3   70 41.74792 -87.70005
## 4   76 41.97694 -87.84349
## 5   16 41.95972 -87.71907
## 6   19 41.92154 -87.78402

1.4 Creating a Point Layer

1.4.1 Creating a point layer from coordinates in a table - principle

Missing coordinates

vehicle.coord <- vehicles.final %>% filter(!(is.na(lat)))
dim(vehicle.coord)

## [1] 2635    3

As it turns out, the two rows we noticed above were the only two with missing coordinates (the number of rows went from 2,637 to 2,635).

1.4.2 Creating a spatial points objects

The sf package turns a non-spatial object like a data frame into a simple features spatial object by means of the st_as_sf function

vehicle.points <- st_as_sf(vehicle.coord, coords = c("lon", "lat"), crs = 4326, agr = "constant")

class(vehicle.points)

## [1] "sf"         "data.frame"

Quick plot

plot(vehicle.points)

We can also do a quick check of the projection information using the st_crs

st_crs(vehicle.points)

## Coordinate Reference System:
##   User input: EPSG:4326 
##   wkt:
## GEOGCRS["WGS 84",
##     ENSEMBLE["World Geodetic System 1984 ensemble",
##         MEMBER["World Geodetic System 1984 (Transit)"],
##         MEMBER["World Geodetic System 1984 (G730)"],
##         MEMBER["World Geodetic System 1984 (G873)"],
##         MEMBER["World Geodetic System 1984 (G1150)"],
##         MEMBER["World Geodetic System 1984 (G1674)"],
##         MEMBER["World Geodetic System 1984 (G1762)"],
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]],
##         ENSEMBLEACCURACY[2.0]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["geodetic latitude (Lat)",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["geodetic longitude (Lon)",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     USAGE[
##         SCOPE["Horizontal component of 3D system."],
##         AREA["World."],
##         BBOX[-90,-180,90,180]],
##     ID["EPSG",4326]]

1.4.3 Abandoned Vehicles by Community Area

Problem: As it turns out, some of the points have missing community area information, which is a critical element to compute the number of abandoned cars at that scale. Task: Here, we will exploit some of the GIS functionality in sf to carry out a spatial join. This boils down to: 1) identifying which points belong to each community area (a so-called point in polygon query) and 2) assigning the corresponding community area identifier to each point.

1.4.3.1 Community Area boundary file

comm.file <- "https://data.cityofchicago.org/resource/igwz-8jzy.geojson"
chicago.comm <- read_sf(comm.file)
class(chicago.comm)

## [1] "sf"         "tbl_df"     "tbl"        "data.frame"

We check the projection information using st_crs.

st_crs(chicago.comm)

## Coordinate Reference System:
##   User input: WGS 84 
##   wkt:
## GEOGCRS["WGS 84",
##     DATUM["World Geodetic System 1984",
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["geodetic latitude (Lat)",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["geodetic longitude (Lon)",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     ID["EPSG",4326]]

plot(chicago.comm)

head(chicago.comm)

## Simple feature collection with 6 features and 9 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -87.7069 ymin: 41.79448 xmax: -87.58001 ymax: 41.99076
## Geodetic CRS:  WGS 84
## # A tibble: 6 × 10
##   community  area  shape_area perimeter area_num_1 area_numbe comarea_id comarea
##   <chr>      <chr> <chr>      <chr>     <chr>      <chr>      <chr>      <chr>  
## 1 DOUGLAS    0     46004621.… 0         35         35         0          0      
## 2 OAKLAND    0     16913961.… 0         36         36         0          0      
## 3 FULLER PA… 0     19916704.… 0         37         37         0          0      
## 4 GRAND BOU… 0     48492503.… 0         38         38         0          0      
## 5 KENWOOD    0     29071741.… 0         39         39         0          0      
## 6 LINCOLN S… 0     71352328.… 0         4          4          0          0      
## # … with 2 more variables: shape_len <chr>, geometry <MULTIPOLYGON [°]>

str(chicago.comm)

## sf [77 × 10] (S3: sf/tbl_df/tbl/data.frame)
##  $ community : chr [1:77] "DOUGLAS" "OAKLAND" "FULLER PARK" "GRAND BOULEVARD" ...
##  $ area      : chr [1:77] "0" "0" "0" "0" ...
##  $ shape_area: chr [1:77] "46004621.1581" "16913961.0408" "19916704.8692" "48492503.1554" ...
##  $ perimeter : chr [1:77] "0" "0" "0" "0" ...
##  $ area_num_1: chr [1:77] "35" "36" "37" "38" ...
##  $ area_numbe: chr [1:77] "35" "36" "37" "38" ...
##  $ comarea_id: chr [1:77] "0" "0" "0" "0" ...
##  $ comarea   : chr [1:77] "0" "0" "0" "0" ...
##  $ shape_len : chr [1:77] "31027.0545098" "19565.5061533" "25339.0897503" "28196.8371573" ...
##  $ geometry  :sfc_MULTIPOLYGON of length 77; first list element: List of 1
##   ..$ :List of 1
##   .. ..$ : num [1:352, 1:2] -87.6 -87.6 -87.6 -87.6 -87.6 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA
##   ..- attr(*, "names")= chr [1:9] "community" "area" "shape_area" "perimeter" ...

1.4.3.2 Changing projections

Before moving on to the spatial join operation, we will convert both: 1) the community area boundaries and 2) the vehicle points

to the same projection, using the st_transform command.

We assign the UTM (Universal Tranverse Mercator) zone 16N, which the the proper one for Chicago, with an EPSG code of 32616. After the projection transformation, we check the result using st_crs

chicago.comm <- st_transform(chicago.comm, 32616)
st_crs(chicago.comm)

## Coordinate Reference System:
##   User input: EPSG:32616 
##   wkt:
## PROJCRS["WGS 84 / UTM zone 16N",
##     BASEGEOGCRS["WGS 84",
##         ENSEMBLE["World Geodetic System 1984 ensemble",
##             MEMBER["World Geodetic System 1984 (Transit)"],
##             MEMBER["World Geodetic System 1984 (G730)"],
##             MEMBER["World Geodetic System 1984 (G873)"],
##             MEMBER["World Geodetic System 1984 (G1150)"],
##             MEMBER["World Geodetic System 1984 (G1674)"],
##             MEMBER["World Geodetic System 1984 (G1762)"],
##             ELLIPSOID["WGS 84",6378137,298.257223563,
##                 LENGTHUNIT["metre",1]],
##             ENSEMBLEACCURACY[2.0]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4326]],
##     CONVERSION["UTM zone 16N",
##         METHOD["Transverse Mercator",
##             ID["EPSG",9807]],
##         PARAMETER["Latitude of natural origin",0,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8801]],
##         PARAMETER["Longitude of natural origin",-87,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8802]],
##         PARAMETER["Scale factor at natural origin",0.9996,
##             SCALEUNIT["unity",1],
##             ID["EPSG",8805]],
##         PARAMETER["False easting",500000,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8806]],
##         PARAMETER["False northing",0,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8807]]],
##     CS[Cartesian,2],
##         AXIS["(E)",east,
##             ORDER[1],
##             LENGTHUNIT["metre",1]],
##         AXIS["(N)",north,
##             ORDER[2],
##             LENGTHUNIT["metre",1]],
##     USAGE[
##         SCOPE["Engineering survey, topographic mapping."],
##         AREA["Between 90°W and 84°W, northern hemisphere between equator and 84°N, onshore and offshore. Belize. Canada - Manitoba; Nunavut; Ontario. Costa Rica. Cuba. Ecuador - Galapagos. El Salvador. Guatemala. Honduras. Mexico. Nicaragua. United States (USA)."],
##         BBOX[0,-90,84,-84]],
##     ID["EPSG",32616]]

vehicle.points <- st_transform(vehicle.points, 32616)
st_crs(vehicle.points)

## Coordinate Reference System:
##   User input: EPSG:32616 
##   wkt:
## PROJCRS["WGS 84 / UTM zone 16N",
##     BASEGEOGCRS["WGS 84",
##         ENSEMBLE["World Geodetic System 1984 ensemble",
##             MEMBER["World Geodetic System 1984 (Transit)"],
##             MEMBER["World Geodetic System 1984 (G730)"],
##             MEMBER["World Geodetic System 1984 (G873)"],
##             MEMBER["World Geodetic System 1984 (G1150)"],
##             MEMBER["World Geodetic System 1984 (G1674)"],
##             MEMBER["World Geodetic System 1984 (G1762)"],
##             ELLIPSOID["WGS 84",6378137,298.257223563,
##                 LENGTHUNIT["metre",1]],
##             ENSEMBLEACCURACY[2.0]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4326]],
##     CONVERSION["UTM zone 16N",
##         METHOD["Transverse Mercator",
##             ID["EPSG",9807]],
##         PARAMETER["Latitude of natural origin",0,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8801]],
##         PARAMETER["Longitude of natural origin",-87,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8802]],
##         PARAMETER["Scale factor at natural origin",0.9996,
##             SCALEUNIT["unity",1],
##             ID["EPSG",8805]],
##         PARAMETER["False easting",500000,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8806]],
##         PARAMETER["False northing",0,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8807]]],
##     CS[Cartesian,2],
##         AXIS["(E)",east,
##             ORDER[1],
##             LENGTHUNIT["metre",1]],
##         AXIS["(N)",north,
##             ORDER[2],
##             LENGTHUNIT["metre",1]],
##     USAGE[
##         SCOPE["Engineering survey, topographic mapping."],
##         AREA["Between 90°W and 84°W, northern hemisphere between equator and 84°N, onshore and offshore. Belize. Canada - Manitoba; Nunavut; Ontario. Costa Rica. Cuba. Ecuador - Galapagos. El Salvador. Guatemala. Honduras. Mexico. Nicaragua. United States (USA)."],
##         BBOX[0,-90,84,-84]],
##     ID["EPSG",32616]]

1.4.3.3 Spatial Join

comm.pts <- st_join(vehicle.points, chicago.comm["area_num_1"])
head(comm.pts)

## Simple feature collection with 6 features and 2 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 430118.3 ymin: 4622026 xmax: 441795.4 ymax: 4647560
## Projected CRS: WGS 84 / UTM zone 16N
##   comm area_num_1                 geometry
## 1   23         23 POINT (440330.7 4638631)
## 2   56         56 POINT (436036.4 4627511)
## 3   70         70 POINT (441795.4 4622026)
## 4   76         76 POINT (430118.3 4647560)
## 5   16         16 POINT (440410.8 4645554)
## 6   19         19 POINT (434989.7 4641362)

As we can see, the community area in comm matches the entry in area_num_1. However, there is one more issue to deal with. Upon closer examination, we find that the area_num_1 variable is not numeric using the is.numeric check.

is.numeric(comm.pts$area_num_1)

## [1] FALSE

comm.pts$area_num_1 <- as.integer(comm.pts$area_num_1)
is.integer(comm.pts$area_num_1)

## [1] TRUE

The same problem occurs in the chicago.comm data set, which can cause trouble later on when we will join it with other data. Therefore, we turn it into an integer as well.

chicago.comm$area_num_1 <- as.integer(chicago.comm$area_num_1)

1.4.3.4 Counts by community area

Task: We now need to count the number of points in each polygon. Solution: We proceed in two steps. 1) First, we illustrate how we can move back from the simple features spatial points object to a simple data frame by stripping the geometry column. This is accomplished by setting st_geometry to NULL. We check the class of the new object to make sure it is no longer a simple feature. 2) We next take advantage of the tidyverse count function to create a new data frame with the identifier of the community area and the number of points contained in each community area.

st_geometry(comm.pts) <- NULL
class(comm.pts)

## [1] "data.frame"

veh.cnts <- comm.pts %>% count(area_num_1)
head(veh.cnts)

##   area_num_1  n
## 1          1 67
## 2          2 89
## 3          3 21
## 4          4 32
## 5          5 18
## 6          6 19

Change the columm name:

veh.cnts <- veh.cnts %>% rename(comm = area_num_1, AGG.COUNT = n)
head(veh.cnts)

##   comm AGG.COUNT
## 1    1        67
## 2    2        89
## 3    3        21
## 4    4        32
## 5    5        18
## 6    6        19

1.4.4 Mapping the vehicle counts

At this point, we have: 1) a polygon layer with the community area boundaries and some identifiers (chicago.comm) 2) a data frame with the community identifier and the aggregate vehicle count (veh.cnts).

In order to map the vehicle counts by community area, we need to join the two tables using left_join command and use area_num_1 as the key for the first table (the community area boundaries), and comm as the key for the second table (the vehicle counts).

chicago.comm <- left_join(chicago.comm, veh.cnts, by = c("area_num_1" = "comm"))
head(chicago.comm)

## Simple feature collection with 6 features and 10 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 441440.4 ymin: 4627153 xmax: 451817.1 ymax: 4648971
## Projected CRS: WGS 84 / UTM zone 16N
## # A tibble: 6 × 11
##   community  area  shape_area perimeter area_num_1 area_numbe comarea_id comarea
##   <chr>      <chr> <chr>      <chr>          <int> <chr>      <chr>      <chr>  
## 1 DOUGLAS    0     46004621.… 0                 35 35         0          0      
## 2 OAKLAND    0     16913961.… 0                 36 36         0          0      
## 3 FULLER PA… 0     19916704.… 0                 37 37         0          0      
## 4 GRAND BOU… 0     48492503.… 0                 38 38         0          0      
## 5 KENWOOD    0     29071741.… 0                 39 39         0          0      
## 6 LINCOLN S… 0     71352328.… 0                  4 4          0          0      
## # … with 3 more variables: shape_len <chr>, geometry <MULTIPOLYGON [m]>,
## #   AGG.COUNT <int>

Basic choropleth map

tm_shape(chicago.comm) + 
  tm_polygons("AGG.COUNT")

However, this map can be highly misleading since it pertains to a so-called spatially extensive variable, such as a count. Even if every area had the same risk of having abandoned vehicles, larger community areas would have higher counts. In other words, since the count is directly related to the size of the area, it does not provide a proper indication of the risk.

Instead, we should map a spatially intensive variable, which is corrected for the size of the unit. For example, this can be achieved by expressing the variable as a density (counts per area), or as some other ratio, such as the counts per capita. In order to calculate this ratio, we first need to obtain the population for each community area.

1.4.5 Community Area Population Data

1.4.5.1 Extracting a pdf file

pdf.file <- "https://www.cityofchicago.org/content/dam/city/depts/zlup/Zoning_Main_Page/Publications/Census_2010_Community_Area_Profiles/Census_2010_and_2000_CA_Populations.pdf"

pop.dat <- pdf_text(pdf.file)

class(pop.dat)

## [1] "character"

length(pop.dat)

## [1] 2

1.4.5.2 Parsing the pdf file

Firstly, we start by initializing a vector (nnlist) with an empty character, and confirm that it is indeed initialized.

nnlist <- ""
nnlist

## [1] ""

Next, we create a list of strings, one for each line in the table, by using the strsplit operation. This splits the long string into a list of one string for each line, by using the return character as the separator (the value for the split argument).

The resulting list, ppage, contains a list of 44 elements, matching the contents of the first page of the pdf file.

ppage <- strsplit(pop.dat[[1]],split="\n")
ppage[[1]]

##  [1] "                              CITY OF CHICAGO"                                
##  [2] "                            CENSUS 2010 AND 2000"                             
##  [3] ""                                                                             
##  [4] "                                                Population"                   
##  [5] "Num        Community Area       2010        2,000     Difference   Percentage"
##  [6] " 1    Rogers Park                54,991     63,484      -8,493       -13.4%"  
##  [7] " 2    West Ridge                 71,942     73,199      -1,257        -1.7%"  
##  [8] " 3    Uptown                     56,362     63,551      -7,189       -11.3%"  
##  [9] " 4    Lincoln Square             39,493     44,574      -5,081       -11.4%"  
## [10] " 5    North Center               31,867     31,895        -28         -0.1%"  
## [11] " 6    Lake View                  94,368     94,817       -449         -0.5%"  
## [12] " 7    Lincoln Park               64,116     64,320       -204         -0.3%"  
## [13] " 8    Near North Side            80,484     72,811      7,673         10.5%"  
## [14] " 9    Edison Park                11,187     11,259        -72         -0.6%"  
## [15] " 10   Norwood Park               37,023     37,669       -646         -1.7%"  
## [16] " 11   Jefferson Park             25,448     25,859       -411         -1.6%"  
## [17] " 12   Forest Glen                18,508     18,165        343         1.9%"   
## [18] " 13   North Park                 17,931     18,514       -583         -3.1%"  
## [19] " 14   Albany Park                51,542     57,655      -6,113       -10.6%"  
## [20] " 15   Portage Park               64,124     65,340      -1,216        -1.9%"  
## [21] " 16   Irving Park                53,359     58,643      -5,284        -9.0%"  
## [22] " 17   Dunning                    41,932     42,164       -232         -0.6%"  
## [23] " 18   Montclare                  13,426     12,646        780         6.2%"   
## [24] " 19   Belmont Cragin             78,743     78,144        599         0.8%"   
## [25] " 20   Hermosa                    25,010     26,908      -1,898        -7.1%"  
## [26] " 21   Avondale                   39,262     43,083      -3,821        -8.9%"  
## [27] " 22   Logan Square               73,595     82,715      -9,120       -11.0%"  
## [28] " 23   Humboldt Park              56,323     65,836      -9,513       -14.4%"  
## [29] " 24   West Town                  81,432     87,435      -6,003        -6.9%"  
## [30] " 25   Austin                     98,514    117,527     -19,013       -16.2%"  
## [31] " 26   West Garfield Park         18,001     23,019      -5,018       -21.8%"  
## [32] " 27   East Garfield Park         20,567     20,881       -314         -1.5%"  
## [33] " 28   Near West Side             54,881     46,419      8,462         18.2%"  
## [34] " 29   North Lawndale             35,912     41,768      -5,856       -14.0%"  
## [35] " 30   South Lawndale             79,288     91,071     -11,783       -12.9%"  
## [36] " 31   Lower West Side            35,769     44,031      -8,262       -18.8%"  
## [37] " 32   Loop                       29,283     16,388      12,895        78.7%"  
## [38] " 33   Near South Side            21,390     9,509       11,881       124.9%"  
## [39] " 34   Armour Square              13,391     12,032      1,359         11.3%"  
## [40] " 35   Douglas                    18,238     26,470      -8,232       -31.1%"  
## [41] " 36   Oakland                     5,918     6,110        -192         -3.1%"  
## [42] " 37   Fuller Park                 2,876     3,420        -544        -15.9%"  
## [43] " 38   Grand Boulevard            21,929     28,006      -6,077       -21.7%"  
## [44] " 39   Kenwood                    17,841     18,363       -522         -2.8%"  
## [45] " 40   Washington Park            11,717     14,146      -2,429       -17.2%"

Each element is one long string, corresponding to a table row. We remove the first four lines (using the - operation on the list elements 1 through 4). These first rows appear on each page, so we are safe to repeat this procedure for the second page (string) as well.

nni <- ppage[[1]]
nni <- nni[-(1:5)]
nni

##  [1] " 1    Rogers Park                54,991     63,484      -8,493       -13.4%"
##  [2] " 2    West Ridge                 71,942     73,199      -1,257        -1.7%"
##  [3] " 3    Uptown                     56,362     63,551      -7,189       -11.3%"
##  [4] " 4    Lincoln Square             39,493     44,574      -5,081       -11.4%"
##  [5] " 5    North Center               31,867     31,895        -28         -0.1%"
##  [6] " 6    Lake View                  94,368     94,817       -449         -0.5%"
##  [7] " 7    Lincoln Park               64,116     64,320       -204         -0.3%"
##  [8] " 8    Near North Side            80,484     72,811      7,673         10.5%"
##  [9] " 9    Edison Park                11,187     11,259        -72         -0.6%"
## [10] " 10   Norwood Park               37,023     37,669       -646         -1.7%"
## [11] " 11   Jefferson Park             25,448     25,859       -411         -1.6%"
## [12] " 12   Forest Glen                18,508     18,165        343         1.9%" 
## [13] " 13   North Park                 17,931     18,514       -583         -3.1%"
## [14] " 14   Albany Park                51,542     57,655      -6,113       -10.6%"
## [15] " 15   Portage Park               64,124     65,340      -1,216        -1.9%"
## [16] " 16   Irving Park                53,359     58,643      -5,284        -9.0%"
## [17] " 17   Dunning                    41,932     42,164       -232         -0.6%"
## [18] " 18   Montclare                  13,426     12,646        780         6.2%" 
## [19] " 19   Belmont Cragin             78,743     78,144        599         0.8%" 
## [20] " 20   Hermosa                    25,010     26,908      -1,898        -7.1%"
## [21] " 21   Avondale                   39,262     43,083      -3,821        -8.9%"
## [22] " 22   Logan Square               73,595     82,715      -9,120       -11.0%"
## [23] " 23   Humboldt Park              56,323     65,836      -9,513       -14.4%"
## [24] " 24   West Town                  81,432     87,435      -6,003        -6.9%"
## [25] " 25   Austin                     98,514    117,527     -19,013       -16.2%"
## [26] " 26   West Garfield Park         18,001     23,019      -5,018       -21.8%"
## [27] " 27   East Garfield Park         20,567     20,881       -314         -1.5%"
## [28] " 28   Near West Side             54,881     46,419      8,462         18.2%"
## [29] " 29   North Lawndale             35,912     41,768      -5,856       -14.0%"
## [30] " 30   South Lawndale             79,288     91,071     -11,783       -12.9%"
## [31] " 31   Lower West Side            35,769     44,031      -8,262       -18.8%"
## [32] " 32   Loop                       29,283     16,388      12,895        78.7%"
## [33] " 33   Near South Side            21,390     9,509       11,881       124.9%"
## [34] " 34   Armour Square              13,391     12,032      1,359         11.3%"
## [35] " 35   Douglas                    18,238     26,470      -8,232       -31.1%"
## [36] " 36   Oakland                     5,918     6,110        -192         -3.1%"
## [37] " 37   Fuller Park                 2,876     3,420        -544        -15.9%"
## [38] " 38   Grand Boulevard            21,929     28,006      -6,077       -21.7%"
## [39] " 39   Kenwood                    17,841     18,363       -522         -2.8%"
## [40] " 40   Washington Park            11,717     14,146      -2,429       -17.2%"

NEXT: To streamline the resulting data structure for further operations, we turn it into a simple vector by means of unlist.

This then allows us to concatenate the result to the current nnlist vector (initially, this contains just a single element with an empty character, after the first step it contains the empty character and the first page).

nnu <- unlist(nni)
nnlist <- c(nnlist, nnu)
nnlist

##  [1] ""                                                                           
##  [2] " 1    Rogers Park                54,991     63,484      -8,493       -13.4%"
##  [3] " 2    West Ridge                 71,942     73,199      -1,257        -1.7%"
##  [4] " 3    Uptown                     56,362     63,551      -7,189       -11.3%"
##  [5] " 4    Lincoln Square             39,493     44,574      -5,081       -11.4%"
##  [6] " 5    North Center               31,867     31,895        -28         -0.1%"
##  [7] " 6    Lake View                  94,368     94,817       -449         -0.5%"
##  [8] " 7    Lincoln Park               64,116     64,320       -204         -0.3%"
##  [9] " 8    Near North Side            80,484     72,811      7,673         10.5%"
## [10] " 9    Edison Park                11,187     11,259        -72         -0.6%"
## [11] " 10   Norwood Park               37,023     37,669       -646         -1.7%"
## [12] " 11   Jefferson Park             25,448     25,859       -411         -1.6%"
## [13] " 12   Forest Glen                18,508     18,165        343         1.9%" 
## [14] " 13   North Park                 17,931     18,514       -583         -3.1%"
## [15] " 14   Albany Park                51,542     57,655      -6,113       -10.6%"
## [16] " 15   Portage Park               64,124     65,340      -1,216        -1.9%"
## [17] " 16   Irving Park                53,359     58,643      -5,284        -9.0%"
## [18] " 17   Dunning                    41,932     42,164       -232         -0.6%"
## [19] " 18   Montclare                  13,426     12,646        780         6.2%" 
## [20] " 19   Belmont Cragin             78,743     78,144        599         0.8%" 
## [21] " 20   Hermosa                    25,010     26,908      -1,898        -7.1%"
## [22] " 21   Avondale                   39,262     43,083      -3,821        -8.9%"
## [23] " 22   Logan Square               73,595     82,715      -9,120       -11.0%"
## [24] " 23   Humboldt Park              56,323     65,836      -9,513       -14.4%"
## [25] " 24   West Town                  81,432     87,435      -6,003        -6.9%"
## [26] " 25   Austin                     98,514    117,527     -19,013       -16.2%"
## [27] " 26   West Garfield Park         18,001     23,019      -5,018       -21.8%"
## [28] " 27   East Garfield Park         20,567     20,881       -314         -1.5%"
## [29] " 28   Near West Side             54,881     46,419      8,462         18.2%"
## [30] " 29   North Lawndale             35,912     41,768      -5,856       -14.0%"
## [31] " 30   South Lawndale             79,288     91,071     -11,783       -12.9%"
## [32] " 31   Lower West Side            35,769     44,031      -8,262       -18.8%"
## [33] " 32   Loop                       29,283     16,388      12,895        78.7%"
## [34] " 33   Near South Side            21,390     9,509       11,881       124.9%"
## [35] " 34   Armour Square              13,391     12,032      1,359         11.3%"
## [36] " 35   Douglas                    18,238     26,470      -8,232       -31.1%"
## [37] " 36   Oakland                     5,918     6,110        -192         -3.1%"
## [38] " 37   Fuller Park                 2,876     3,420        -544        -15.9%"
## [39] " 38   Grand Boulevard            21,929     28,006      -6,077       -21.7%"
## [40] " 39   Kenwood                    17,841     18,363       -522         -2.8%"
## [41] " 40   Washington Park            11,717     14,146      -2,429       -17.2%"

We now repeat this operation for pop.dat[[2]]. More efficiently, we implement it as a loop, replacing i in turn by 1 and 2. This yields:

nnlist <- ""
for (i in 1:2) {
  ppage <- strsplit(pop.dat[[i]],split="\n")
  nni <- ppage[[1]]
  nni <- nni[-(1:5)]
  nnu <- unlist(nni)
  nnlist <- c(nnlist,nnu)
}

nnlist

##  [1] ""                                                                               
##  [2] " 1    Rogers Park                54,991     63,484      -8,493       -13.4%"    
##  [3] " 2    West Ridge                 71,942     73,199      -1,257        -1.7%"    
##  [4] " 3    Uptown                     56,362     63,551      -7,189       -11.3%"    
##  [5] " 4    Lincoln Square             39,493     44,574      -5,081       -11.4%"    
##  [6] " 5    North Center               31,867     31,895        -28         -0.1%"    
##  [7] " 6    Lake View                  94,368     94,817       -449         -0.5%"    
##  [8] " 7    Lincoln Park               64,116     64,320       -204         -0.3%"    
##  [9] " 8    Near North Side            80,484     72,811      7,673         10.5%"    
## [10] " 9    Edison Park                11,187     11,259        -72         -0.6%"    
## [11] " 10   Norwood Park               37,023     37,669       -646         -1.7%"    
## [12] " 11   Jefferson Park             25,448     25,859       -411         -1.6%"    
## [13] " 12   Forest Glen                18,508     18,165        343         1.9%"     
## [14] " 13   North Park                 17,931     18,514       -583         -3.1%"    
## [15] " 14   Albany Park                51,542     57,655      -6,113       -10.6%"    
## [16] " 15   Portage Park               64,124     65,340      -1,216        -1.9%"    
## [17] " 16   Irving Park                53,359     58,643      -5,284        -9.0%"    
## [18] " 17   Dunning                    41,932     42,164       -232         -0.6%"    
## [19] " 18   Montclare                  13,426     12,646        780         6.2%"     
## [20] " 19   Belmont Cragin             78,743     78,144        599         0.8%"     
## [21] " 20   Hermosa                    25,010     26,908      -1,898        -7.1%"    
## [22] " 21   Avondale                   39,262     43,083      -3,821        -8.9%"    
## [23] " 22   Logan Square               73,595     82,715      -9,120       -11.0%"    
## [24] " 23   Humboldt Park              56,323     65,836      -9,513       -14.4%"    
## [25] " 24   West Town                  81,432     87,435      -6,003        -6.9%"    
## [26] " 25   Austin                     98,514    117,527     -19,013       -16.2%"    
## [27] " 26   West Garfield Park         18,001     23,019      -5,018       -21.8%"    
## [28] " 27   East Garfield Park         20,567     20,881       -314         -1.5%"    
## [29] " 28   Near West Side             54,881     46,419      8,462         18.2%"    
## [30] " 29   North Lawndale             35,912     41,768      -5,856       -14.0%"    
## [31] " 30   South Lawndale             79,288     91,071     -11,783       -12.9%"    
## [32] " 31   Lower West Side            35,769     44,031      -8,262       -18.8%"    
## [33] " 32   Loop                       29,283     16,388      12,895        78.7%"    
## [34] " 33   Near South Side            21,390     9,509       11,881       124.9%"    
## [35] " 34   Armour Square              13,391     12,032      1,359         11.3%"    
## [36] " 35   Douglas                    18,238     26,470      -8,232       -31.1%"    
## [37] " 36   Oakland                     5,918     6,110        -192         -3.1%"    
## [38] " 37   Fuller Park                 2,876     3,420        -544        -15.9%"    
## [39] " 38   Grand Boulevard            21,929     28,006      -6,077       -21.7%"    
## [40] " 39   Kenwood                    17,841     18,363       -522         -2.8%"    
## [41] " 40   Washington Park            11,717     14,146      -2,429       -17.2%"    
## [42] " 41   Hyde Park                      25,681     29,920       -4,239      -14.2%"
## [43] " 42   Woodlawn                       25,983     27,086       -1,103       -4.1%"
## [44] " 43   South Shore                    49,767     61,556      -11,789      -19.2%"
## [45] " 44   Chatham                        31,028     37,275       -6,247      -16.8%"
## [46] " 45   Avalon Park                    10,185     11,147        -962        -8.6%"
## [47] " 46   South Chicago                  31,198     38,596       -7,398      -19.2%"
## [48] " 47   Burnside                        2,916     3,294         -378       -11.5%"
## [49] " 48   Calumet Heights                13,812     15,974       -2,162      -13.5%"
## [50] " 49   Roseland                       44,619     52,723       -8,104      -15.4%"
## [51] " 50   Pullman                         7,325     8,921        -1,596      -17.9%"
## [52] " 51   South Deering                  15,109     16,990       -1,881      -11.1%"
## [53] " 52   East Side                      23,042     23,653        -611        -2.6%"
## [54] " 53   West Pullman                   29,651     36,649       -6,998      -19.1%"
## [55] " 54   Riverdale                       6,482     9,809        -3,327      -33.9%"
## [56] " 55   Hegewisch                       9,426     9,781         -355        -3.6%"
## [57] " 56   Garfield Ridge                 34,513     36,101       -1,588       -4.4%"
## [58] " 57   Archer Heights                 13,393     12,644        749         5.9%" 
## [59] " 58   Brighton Park                  45,368     44,912        456         1.0%" 
## [60] " 59   McKinley Park                  15,612     15,962        -350        -2.2%"
## [61] " 60   Bridgeport                     31,977     33,694       -1,717       -5.1%"
## [62] " 61   New City                       44,377     51,721       -7,344      -14.2%"
## [63] " 62   West Elsdon                    18,109     15,921       2,188       13.7%" 
## [64] " 63   Gage Park                      39,894     39,193        701         1.8%" 
## [65] " 64   Clearing                       23,139     22,331        808         3.6%" 
## [66] " 65   West Lawn                      33,355     29,235       4,120       14.1%" 
## [67] " 66   Chicago Lawn                   55,628     61,412       -5,784       -9.4%"
## [68] " 67   West Englewood                 35,505     45,282       -9,777      -21.6%"
## [69] " 68   Englewood                      30,654     40,222       -9,568      -23.8%"
## [70] " 69   Greater Grand Crossing         32,602     38,619       -6,017      -15.6%"
## [71] " 70   Ashburn                        41,081     39,584       1,497        3.8%" 
## [72] " 71   Auburn Gresham                 48,743     55,928       -7,185      -12.8%"
## [73] " 72   Beverly                        20,034     21,992       -1,958       -8.9%"
## [74] " 73   Washington Heights             26,493     29,843       -3,350      -11.2%"
## [75] " 74   Mount Greenwood                19,093     18,820        273         1.5%" 
## [76] " 75   Morgan Park                    22,544     25,226       -2,682      -10.6%"
## [77] " 76   O'Hare                         12,756     11,956        800         6.7%" 
## [78] " 77   Edgewater                      56,521     62,198       -5,677       -9.1%"
## [79] "      Total                       2,695,598   2,896,016    -200,418       -6.9%"

This is now a vector of 79 elements, each of which is a string. To clean things up, strip the first (empty) element, and the last element, which is nothing but the totals. We thus extract the elements from 2 to length - 1.

nnlist <- nnlist[2:(length(nnlist)-1)]

nnlist

##  [1] " 1    Rogers Park                54,991     63,484      -8,493       -13.4%"    
##  [2] " 2    West Ridge                 71,942     73,199      -1,257        -1.7%"    
##  [3] " 3    Uptown                     56,362     63,551      -7,189       -11.3%"    
##  [4] " 4    Lincoln Square             39,493     44,574      -5,081       -11.4%"    
##  [5] " 5    North Center               31,867     31,895        -28         -0.1%"    
##  [6] " 6    Lake View                  94,368     94,817       -449         -0.5%"    
##  [7] " 7    Lincoln Park               64,116     64,320       -204         -0.3%"    
##  [8] " 8    Near North Side            80,484     72,811      7,673         10.5%"    
##  [9] " 9    Edison Park                11,187     11,259        -72         -0.6%"    
## [10] " 10   Norwood Park               37,023     37,669       -646         -1.7%"    
## [11] " 11   Jefferson Park             25,448     25,859       -411         -1.6%"    
## [12] " 12   Forest Glen                18,508     18,165        343         1.9%"     
## [13] " 13   North Park                 17,931     18,514       -583         -3.1%"    
## [14] " 14   Albany Park                51,542     57,655      -6,113       -10.6%"    
## [15] " 15   Portage Park               64,124     65,340      -1,216        -1.9%"    
## [16] " 16   Irving Park                53,359     58,643      -5,284        -9.0%"    
## [17] " 17   Dunning                    41,932     42,164       -232         -0.6%"    
## [18] " 18   Montclare                  13,426     12,646        780         6.2%"     
## [19] " 19   Belmont Cragin             78,743     78,144        599         0.8%"     
## [20] " 20   Hermosa                    25,010     26,908      -1,898        -7.1%"    
## [21] " 21   Avondale                   39,262     43,083      -3,821        -8.9%"    
## [22] " 22   Logan Square               73,595     82,715      -9,120       -11.0%"    
## [23] " 23   Humboldt Park              56,323     65,836      -9,513       -14.4%"    
## [24] " 24   West Town                  81,432     87,435      -6,003        -6.9%"    
## [25] " 25   Austin                     98,514    117,527     -19,013       -16.2%"    
## [26] " 26   West Garfield Park         18,001     23,019      -5,018       -21.8%"    
## [27] " 27   East Garfield Park         20,567     20,881       -314         -1.5%"    
## [28] " 28   Near West Side             54,881     46,419      8,462         18.2%"    
## [29] " 29   North Lawndale             35,912     41,768      -5,856       -14.0%"    
## [30] " 30   South Lawndale             79,288     91,071     -11,783       -12.9%"    
## [31] " 31   Lower West Side            35,769     44,031      -8,262       -18.8%"    
## [32] " 32   Loop                       29,283     16,388      12,895        78.7%"    
## [33] " 33   Near South Side            21,390     9,509       11,881       124.9%"    
## [34] " 34   Armour Square              13,391     12,032      1,359         11.3%"    
## [35] " 35   Douglas                    18,238     26,470      -8,232       -31.1%"    
## [36] " 36   Oakland                     5,918     6,110        -192         -3.1%"    
## [37] " 37   Fuller Park                 2,876     3,420        -544        -15.9%"    
## [38] " 38   Grand Boulevard            21,929     28,006      -6,077       -21.7%"    
## [39] " 39   Kenwood                    17,841     18,363       -522         -2.8%"    
## [40] " 40   Washington Park            11,717     14,146      -2,429       -17.2%"    
## [41] " 41   Hyde Park                      25,681     29,920       -4,239      -14.2%"
## [42] " 42   Woodlawn                       25,983     27,086       -1,103       -4.1%"
## [43] " 43   South Shore                    49,767     61,556      -11,789      -19.2%"
## [44] " 44   Chatham                        31,028     37,275       -6,247      -16.8%"
## [45] " 45   Avalon Park                    10,185     11,147        -962        -8.6%"
## [46] " 46   South Chicago                  31,198     38,596       -7,398      -19.2%"
## [47] " 47   Burnside                        2,916     3,294         -378       -11.5%"
## [48] " 48   Calumet Heights                13,812     15,974       -2,162      -13.5%"
## [49] " 49   Roseland                       44,619     52,723       -8,104      -15.4%"
## [50] " 50   Pullman                         7,325     8,921        -1,596      -17.9%"
## [51] " 51   South Deering                  15,109     16,990       -1,881      -11.1%"
## [52] " 52   East Side                      23,042     23,653        -611        -2.6%"
## [53] " 53   West Pullman                   29,651     36,649       -6,998      -19.1%"
## [54] " 54   Riverdale                       6,482     9,809        -3,327      -33.9%"
## [55] " 55   Hegewisch                       9,426     9,781         -355        -3.6%"
## [56] " 56   Garfield Ridge                 34,513     36,101       -1,588       -4.4%"
## [57] " 57   Archer Heights                 13,393     12,644        749         5.9%" 
## [58] " 58   Brighton Park                  45,368     44,912        456         1.0%" 
## [59] " 59   McKinley Park                  15,612     15,962        -350        -2.2%"
## [60] " 60   Bridgeport                     31,977     33,694       -1,717       -5.1%"
## [61] " 61   New City                       44,377     51,721       -7,344      -14.2%"
## [62] " 62   West Elsdon                    18,109     15,921       2,188       13.7%" 
## [63] " 63   Gage Park                      39,894     39,193        701         1.8%" 
## [64] " 64   Clearing                       23,139     22,331        808         3.6%" 
## [65] " 65   West Lawn                      33,355     29,235       4,120       14.1%" 
## [66] " 66   Chicago Lawn                   55,628     61,412       -5,784       -9.4%"
## [67] " 67   West Englewood                 35,505     45,282       -9,777      -21.6%"
## [68] " 68   Englewood                      30,654     40,222       -9,568      -23.8%"
## [69] " 69   Greater Grand Crossing         32,602     38,619       -6,017      -15.6%"
## [70] " 70   Ashburn                        41,081     39,584       1,497        3.8%" 
## [71] " 71   Auburn Gresham                 48,743     55,928       -7,185      -12.8%"
## [72] " 72   Beverly                        20,034     21,992       -1,958       -8.9%"
## [73] " 73   Washington Heights             26,493     29,843       -3,350      -11.2%"
## [74] " 74   Mount Greenwood                19,093     18,820        273         1.5%" 
## [75] " 75   Morgan Park                    22,544     25,226       -2,682      -10.6%"
## [76] " 76   O'Hare                         12,756     11,956        800         6.7%" 
## [77] " 77   Edgewater                      56,521     62,198       -5,677       -9.1%"

1.4.5.3 Extracting the population values

We first initialize a vector of zeros to hold the population values. It is the preferred approach to initialize a vector first if one knows its size, rather than having it grow by appending rows or columns.

We use the vector command and specify the mode=“numeric” and give the length as the length of the list.

nnpop <- vector(mode = "numeric", length = length(nnlist))

We again will use a loop to process each element of the list (each line of the table) one by one. 1) We use the substr command to extract the characters between position 27 and 39 (these values were determined after taking a careful look at the structure of the table). 2) since the population values contain commas. We now do two things in one line of code. First, we use gsub to substitute the comma character by an empty “”. We turn the result into a numeric value by means of as.numeric. We then assign this number to position i of the vector. The resulting vector nnpop contains the population for each of the community areas.

for (i in (1:length(nnlist))) {
     popchar <- substr(nnlist[i],start=27,stop=43)
     popval <- as.numeric(gsub(",","",popchar))
     nnpop[i] <- popval
}

## Warning: NAs introduced by coercion

nnpop

##  [1] 54991 71942 56362 39493 31867 94368 64116 80484 11187 37023 25448 18508
## [13] 17931 51542 64124 53359 41932 13426 78743 25010 39262 73595 56323 81432
## [25] 98514 18001 20567 54881 35912 79288 35769 29283 21390 13391 18238  5918
## [37]  2876 21929 17841 11717 25681 25983 49767 31028 10185 31198  2916 13812
## [49] 44619  7325 15109 23042 29651  6482  9426 34513 13393 45368 15612 31977
## [61] 44377 18109 39894 23139 33355 55628 35505 30654    NA 41081 48743 20034
## [73] 26493 19093 22544 12756 56521

1.4.5.4 Creating a data frame with population values

nnid <- (1:length(nnlist))
nnid

##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
## [51] 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
## [76] 76 77

neighpop <- data.frame(as.integer(nnid),nnpop)
names(neighpop) <- c("NID","POP2010")
head(neighpop)

##   NID POP2010
## 1   1   54991
## 2   2   71942
## 3   3   56362
## 4   4   39493
## 5   5   31867
## 6   6   94368

Replace NA value with mean value

ind <- which(is.na(neighpop$POP2010))
neighpop$POP2010[ind] <- sapply(ind, function(i) with(neighpop, mean(c(POP2010[i-1], POP2010[i+1]))))

neighpop

##    NID POP2010
## 1    1 54991.0
## 2    2 71942.0
## 3    3 56362.0
## 4    4 39493.0
## 5    5 31867.0
## 6    6 94368.0
## 7    7 64116.0
## 8    8 80484.0
## 9    9 11187.0
## 10  10 37023.0
## 11  11 25448.0
## 12  12 18508.0
## 13  13 17931.0
## 14  14 51542.0
## 15  15 64124.0
## 16  16 53359.0
## 17  17 41932.0
## 18  18 13426.0
## 19  19 78743.0
## 20  20 25010.0
## 21  21 39262.0
## 22  22 73595.0
## 23  23 56323.0
## 24  24 81432.0
## 25  25 98514.0
## 26  26 18001.0
## 27  27 20567.0
## 28  28 54881.0
## 29  29 35912.0
## 30  30 79288.0
## 31  31 35769.0
## 32  32 29283.0
## 33  33 21390.0
## 34  34 13391.0
## 35  35 18238.0
## 36  36  5918.0
## 37  37  2876.0
## 38  38 21929.0
## 39  39 17841.0
## 40  40 11717.0
## 41  41 25681.0
## 42  42 25983.0
## 43  43 49767.0
## 44  44 31028.0
## 45  45 10185.0
## 46  46 31198.0
## 47  47  2916.0
## 48  48 13812.0
## 49  49 44619.0
## 50  50  7325.0
## 51  51 15109.0
## 52  52 23042.0
## 53  53 29651.0
## 54  54  6482.0
## 55  55  9426.0
## 56  56 34513.0
## 57  57 13393.0
## 58  58 45368.0
## 59  59 15612.0
## 60  60 31977.0
## 61  61 44377.0
## 62  62 18109.0
## 63  63 39894.0
## 64  64 23139.0
## 65  65 33355.0
## 66  66 55628.0
## 67  67 35505.0
## 68  68 30654.0
## 69  69 35867.5
## 70  70 41081.0
## 71  71 48743.0
## 72  72 20034.0
## 73  73 26493.0
## 74  74 19093.0
## 75  75 22544.0
## 76  76 12756.0
## 77  77 56521.0

str(neighpop)

## 'data.frame':    77 obs. of  2 variables:
##  $ NID    : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ POP2010: num  54991 71942 56362 39493 31867 ...

1.4.5.5 Mapping Community Area Abandoned Vehicles Per Capita

Computing abandoned vehicles per capita

chicago.comm <- left_join(chicago.comm,neighpop, by = c("area_num_1" = "NID"))
head(chicago.comm)

## Simple feature collection with 6 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 441440.4 ymin: 4627153 xmax: 451817.1 ymax: 4648971
## Projected CRS: WGS 84 / UTM zone 16N
## # A tibble: 6 × 12
##   community  area  shape_area perimeter area_num_1 area_numbe comarea_id comarea
##   <chr>      <chr> <chr>      <chr>          <int> <chr>      <chr>      <chr>  
## 1 DOUGLAS    0     46004621.… 0                 35 35         0          0      
## 2 OAKLAND    0     16913961.… 0                 36 36         0          0      
## 3 FULLER PA… 0     19916704.… 0                 37 37         0          0      
## 4 GRAND BOU… 0     48492503.… 0                 38 38         0          0      
## 5 KENWOOD    0     29071741.… 0                 39 39         0          0      
## 6 LINCOLN S… 0     71352328.… 0                  4 4          0          0      
## # … with 4 more variables: shape_len <chr>, geometry <MULTIPOLYGON [m]>,
## #   AGG.COUNT <int>, POP2010 <dbl>

str(chicago.comm)

## sf [77 × 12] (S3: sf/tbl_df/tbl/data.frame)
##  $ community : chr [1:77] "DOUGLAS" "OAKLAND" "FULLER PARK" "GRAND BOULEVARD" ...
##  $ area      : chr [1:77] "0" "0" "0" "0" ...
##  $ shape_area: chr [1:77] "46004621.1581" "16913961.0408" "19916704.8692" "48492503.1554" ...
##  $ perimeter : chr [1:77] "0" "0" "0" "0" ...
##  $ area_num_1: int [1:77] 35 36 37 38 39 4 40 41 42 1 ...
##  $ area_numbe: chr [1:77] "35" "36" "37" "38" ...
##  $ comarea_id: chr [1:77] "0" "0" "0" "0" ...
##  $ comarea   : chr [1:77] "0" "0" "0" "0" ...
##  $ shape_len : chr [1:77] "31027.0545098" "19565.5061533" "25339.0897503" "28196.8371573" ...
##  $ geometry  :sfc_MULTIPOLYGON of length 77; first list element: List of 1
##   ..$ :List of 1
##   .. ..$ : num [1:352, 1:2] 449430 449429 449428 449427 449427 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
##  $ AGG.COUNT : int [1:77] 8 3 4 12 22 32 2 13 30 67 ...
##  $ POP2010   : num [1:77] 18238 5918 2876 21929 17841 ...
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:11] "community" "area" "shape_area" "perimeter" ...

chicago.comm <- chicago.comm %>% mutate(vehpcap = (AGG.COUNT / POP2010) * 1000) 
head(chicago.comm)

## Simple feature collection with 6 features and 12 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 441440.4 ymin: 4627153 xmax: 451817.1 ymax: 4648971
## Projected CRS: WGS 84 / UTM zone 16N
## # A tibble: 6 × 13
##   community  area  shape_area perimeter area_num_1 area_numbe comarea_id comarea
##   <chr>      <chr> <chr>      <chr>          <int> <chr>      <chr>      <chr>  
## 1 DOUGLAS    0     46004621.… 0                 35 35         0          0      
## 2 OAKLAND    0     16913961.… 0                 36 36         0          0      
## 3 FULLER PA… 0     19916704.… 0                 37 37         0          0      
## 4 GRAND BOU… 0     48492503.… 0                 38 38         0          0      
## 5 KENWOOD    0     29071741.… 0                 39 39         0          0      
## 6 LINCOLN S… 0     71352328.… 0                  4 4          0          0      
## # … with 5 more variables: shape_len <chr>, geometry <MULTIPOLYGON [m]>,
## #   AGG.COUNT <int>, POP2010 <dbl>, vehpcap <dbl>

1.4.5.6 Final choropleth map

tm_shape(chicago.comm) +
  tm_polygons("vehpcap")

2 Refernence

Spatial Data Handling

Spatial Data Handling

July 31, 2022