library(tidyverse)
library(lubridate)
library(sf)
library(tmap)
library(pdftools)
library(RSocrata)socrata.file <- "https://data.cityofchicago.org/resource/suj7-cg3j.csv"vehicle.data <- read.socrata(socrata.file)head(vehicle.data)## creation_date status completion_date service_request_number
## 1 2011-01-01 Completed - Dup 2011-01-07 11-00002779
## 2 2011-01-01 Completed - Dup 2011-01-20 11-00003001
## 3 2011-01-01 Completed - Dup 2011-01-21 11-00003309
## 4 2011-01-01 Completed - Dup 2011-01-21 11-00003316
## 5 2011-01-01 Completed 2011-01-05 11-00001976
## 6 2011-01-01 Completed 2011-01-05 11-00002291
## type_of_service_request
## 1 Abandoned Vehicle Complaint
## 2 Abandoned Vehicle Complaint
## 3 Abandoned Vehicle Complaint
## 4 Abandoned Vehicle Complaint
## 5 Abandoned Vehicle Complaint
## 6 Abandoned Vehicle Complaint
## license_plate vehicle_make_model
## 1 REAR PLATE STARTS W/848 AND FRONT PLATE STARTS W/ K Isuzu
## 2 9381880 Toyota
## 3 MI S CS860 Jeep/Cherokee
## 4 MI SCS860
## 5 H924236 Ford
## 6 810 LYB WISCONSIN PLATES Mercury
## vehicle_color current_activity most_recent_action
## 1 Red
## 2 Silver
## 3 Gold
## 4 Gold
## 5 White
## 6 Green
## how_many_days_has_the_vehicle_been_reported_as_parked_ street_address
## 1 24 5629 N KEDVALE AVE
## 2 NA 2053 N KILBOURN AVE
## 3 NA 736 W BUENA AVE
## 4 NA 736 W BUENA AVE
## 5 60 6059 S KOMENSKY AVE
## 6 NA 4651 S WASHTENAW AVE
## zip_code x_coordinate y_coordinate ward police_district community_area ssa
## 1 60646 1147717 1937054 39 17 13 NA
## 2 60639 1146056 1913269 31 25 20 NA
## 3 60613 1170576 1928214 46 23 3 NA
## 4 60613 1170576 1928214 46 23 3 NA
## 5 60629 1150408 1864110 13 8 65 3
## 6 60632 1159150 1873712 12 9 58 NA
## latitude longitude location
## 1 41.98368 -87.73197 POINT (41.983680361597564 -87.7319663736746)
## 2 41.91859 -87.73868 POINT (41.91858774162382 -87.73868431751842)
## 3 41.95861 -87.64888 POINT (41.95860696269331 -87.64887590959788)
## 4 41.95861 -87.64888 POINT (41.95860696269331 -87.64887590959788)
## 5 41.78237 -87.72394 POINT (41.78237428405976 -87.72394038021173)
## 6 41.80864 -87.69163 POINT (41.80863500843091 -87.69162625248853)
## location_address location_city location_state location_zip
## 1 <NA> <NA> <NA> <NA>
## 2 <NA> <NA> <NA> <NA>
## 3 <NA> <NA> <NA> <NA>
## 4 <NA> <NA> <NA> <NA>
## 5 <NA> <NA> <NA> <NA>
## 6 <NA> <NA> <NA> <NA>
dim(vehicle.data)## [1] 261486 26
class(vehicle.data$creation_date)## [1] "POSIXct" "POSIXt"
vehicle.sept16 <- vehicle.data %>% filter(year(creation_date) == 2016) %>%
filter(month(creation_date) == 9)
head(vehicle.sept16)## creation_date status completion_date service_request_number
## 1 2016-09-01 Completed - Dup 2016-09-01 16-06192603
## 2 2016-09-01 Completed - Dup 2016-09-01 16-06192662
## 3 2016-09-01 Completed - Dup 2016-09-01 16-06193608
## 4 2016-09-01 Completed - Dup 2016-09-01 16-06194284
## 5 2016-09-01 Completed - Dup 2016-09-01 16-06194594
## 6 2016-09-01 Completed - Dup 2016-09-01 16-06197569
## type_of_service_request license_plate vehicle_make_model vehicle_color
## 1 Abandoned Vehicle Complaint UNKNOWN Chevrolet White
## 2 Abandoned Vehicle Complaint UNKNOWN Green
## 3 Abandoned Vehicle Complaint UKNOWN Gray
## 4 Abandoned Vehicle Complaint NO PLATES Ford Blue
## 5 Abandoned Vehicle Complaint
## 6 Abandoned Vehicle Complaint
## current_activity most_recent_action
## 1 FVI - Outcome Create Work Order
## 2 FVI - Outcome Create Work Order
## 3 FVI - Outcome Create Work Order
## 4 FVI - Outcome Create Work Order
## 5 FVI - Outcome Create Work Order
## 6 FVI - Outcome Create Work Order
## how_many_days_has_the_vehicle_been_reported_as_parked_ street_address
## 1 14 3710 W IOWA ST
## 2 40 5240 S MAYFIELD AVE
## 3 7 8000 S ALBANY AVE
## 4 30 8654 W CATHERINE AVE
## 5 NA 4315 N MONTICELLO AVE
## 6 NA 2241 N MULLIGAN AVE
## zip_code x_coordinate y_coordinate ward police_district community_area ssa
## 1 60651 1151452 1905748 27 11 23 NA
## 2 60638 1137921 1869254 14 8 56 NA
## 3 60652 1157102 1851405 18 8 70 NA
## 4 60656 1117638 1934535 41 16 76 NA
## 5 60618 1151283 1928434 35 17 16 NA
## 6 60639 1133710 1914324 36 25 19 NA
## latitude longitude location
## 1 41.89736 -87.71933 POINT (41.89736153676566 -87.71933325878982)
## 2 41.79688 -87.76990 POINT (41.796881421903066 -87.76989633815052)
## 3 41.74792 -87.70005 POINT (41.74792366108626 -87.70004701460941)
## 4 41.97694 -87.84349 POINT (41.97694235046974 -87.8434945723464)
## 5 41.95972 -87.71907 POINT (41.95972327912134 -87.71906810908936)
## 6 41.92154 -87.78402 POINT (41.92154133910697 -87.78401648793171)
## location_address location_city location_state location_zip
## 1 <NA> <NA> <NA> <NA>
## 2 <NA> <NA> <NA> <NA>
## 3 <NA> <NA> <NA> <NA>
## 4 <NA> <NA> <NA> <NA>
## 5 <NA> <NA> <NA> <NA>
## 6 <NA> <NA> <NA> <NA>
dim(vehicle.sept16)## [1] 2637 26
Checking name:
names(vehicle.sept16)## [1] "creation_date"
## [2] "status"
## [3] "completion_date"
## [4] "service_request_number"
## [5] "type_of_service_request"
## [6] "license_plate"
## [7] "vehicle_make_model"
## [8] "vehicle_color"
## [9] "current_activity"
## [10] "most_recent_action"
## [11] "how_many_days_has_the_vehicle_been_reported_as_parked_"
## [12] "street_address"
## [13] "zip_code"
## [14] "x_coordinate"
## [15] "y_coordinate"
## [16] "ward"
## [17] "police_district"
## [18] "community_area"
## [19] "ssa"
## [20] "latitude"
## [21] "longitude"
## [22] "location"
## [23] "location_address"
## [24] "location_city"
## [25] "location_state"
## [26] "location_zip"
Keep some columns for analysis: To keep things simple, we will only keep community_area, latitude and longitude, and turn them into comm, lat and lon. The new data set is vehicles.final. Note that to rename a variable, the new name is listed first, on the left hand side of the equal sign, and the old name is on the right hand side. We check the result with the head command.
vehicles.final <- vehicle.sept16 %>% select(comm = community_area,
lat = latitude, lon = longitude)
head(vehicles.final)## comm lat lon
## 1 23 41.89736 -87.71933
## 2 56 41.79688 -87.76990
## 3 70 41.74792 -87.70005
## 4 76 41.97694 -87.84349
## 5 16 41.95972 -87.71907
## 6 19 41.92154 -87.78402
Missing coordinates
vehicle.coord <- vehicles.final %>% filter(!(is.na(lat)))
dim(vehicle.coord)## [1] 2635 3
As it turns out, the two rows we noticed above were the only two with missing coordinates (the number of rows went from 2,637 to 2,635).
The sf package turns a non-spatial object like a data frame into a simple features spatial object by means of the st_as_sf function
vehicle.points <- st_as_sf(vehicle.coord, coords = c("lon", "lat"), crs = 4326, agr = "constant")
class(vehicle.points)## [1] "sf" "data.frame"
Quick plot
plot(vehicle.points)We can also do a quick check of the projection information using the st_crs
st_crs(vehicle.points)## Coordinate Reference System:
## User input: EPSG:4326
## wkt:
## GEOGCRS["WGS 84",
## ENSEMBLE["World Geodetic System 1984 ensemble",
## MEMBER["World Geodetic System 1984 (Transit)"],
## MEMBER["World Geodetic System 1984 (G730)"],
## MEMBER["World Geodetic System 1984 (G873)"],
## MEMBER["World Geodetic System 1984 (G1150)"],
## MEMBER["World Geodetic System 1984 (G1674)"],
## MEMBER["World Geodetic System 1984 (G1762)"],
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]],
## ENSEMBLEACCURACY[2.0]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["geodetic latitude (Lat)",north,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["geodetic longitude (Lon)",east,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]],
## USAGE[
## SCOPE["Horizontal component of 3D system."],
## AREA["World."],
## BBOX[-90,-180,90,180]],
## ID["EPSG",4326]]
Problem: As it turns out, some of the points have missing community area information, which is a critical element to compute the number of abandoned cars at that scale. Task: Here, we will exploit some of the GIS functionality in sf to carry out a spatial join. This boils down to: 1) identifying which points belong to each community area (a so-called point in polygon query) and 2) assigning the corresponding community area identifier to each point.
comm.file <- "https://data.cityofchicago.org/resource/igwz-8jzy.geojson"
chicago.comm <- read_sf(comm.file)
class(chicago.comm)## [1] "sf" "tbl_df" "tbl" "data.frame"
We check the projection information using st_crs.
st_crs(chicago.comm)## Coordinate Reference System:
## User input: WGS 84
## wkt:
## GEOGCRS["WGS 84",
## DATUM["World Geodetic System 1984",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["geodetic latitude (Lat)",north,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["geodetic longitude (Lon)",east,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4326]]
plot(chicago.comm)head(chicago.comm)## Simple feature collection with 6 features and 9 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -87.7069 ymin: 41.79448 xmax: -87.58001 ymax: 41.99076
## Geodetic CRS: WGS 84
## # A tibble: 6 × 10
## community area shape_area perimeter area_num_1 area_numbe comarea_id comarea
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 DOUGLAS 0 46004621.… 0 35 35 0 0
## 2 OAKLAND 0 16913961.… 0 36 36 0 0
## 3 FULLER PA… 0 19916704.… 0 37 37 0 0
## 4 GRAND BOU… 0 48492503.… 0 38 38 0 0
## 5 KENWOOD 0 29071741.… 0 39 39 0 0
## 6 LINCOLN S… 0 71352328.… 0 4 4 0 0
## # … with 2 more variables: shape_len <chr>, geometry <MULTIPOLYGON [°]>
str(chicago.comm)## sf [77 × 10] (S3: sf/tbl_df/tbl/data.frame)
## $ community : chr [1:77] "DOUGLAS" "OAKLAND" "FULLER PARK" "GRAND BOULEVARD" ...
## $ area : chr [1:77] "0" "0" "0" "0" ...
## $ shape_area: chr [1:77] "46004621.1581" "16913961.0408" "19916704.8692" "48492503.1554" ...
## $ perimeter : chr [1:77] "0" "0" "0" "0" ...
## $ area_num_1: chr [1:77] "35" "36" "37" "38" ...
## $ area_numbe: chr [1:77] "35" "36" "37" "38" ...
## $ comarea_id: chr [1:77] "0" "0" "0" "0" ...
## $ comarea : chr [1:77] "0" "0" "0" "0" ...
## $ shape_len : chr [1:77] "31027.0545098" "19565.5061533" "25339.0897503" "28196.8371573" ...
## $ geometry :sfc_MULTIPOLYGON of length 77; first list element: List of 1
## ..$ :List of 1
## .. ..$ : num [1:352, 1:2] -87.6 -87.6 -87.6 -87.6 -87.6 ...
## ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA
## ..- attr(*, "names")= chr [1:9] "community" "area" "shape_area" "perimeter" ...
Before moving on to the spatial join operation, we will convert both: 1) the community area boundaries and 2) the vehicle points
to the same projection, using the st_transform command.
We assign the UTM (Universal Tranverse Mercator) zone 16N, which the the proper one for Chicago, with an EPSG code of 32616. After the projection transformation, we check the result using st_crs
chicago.comm <- st_transform(chicago.comm, 32616)
st_crs(chicago.comm)## Coordinate Reference System:
## User input: EPSG:32616
## wkt:
## PROJCRS["WGS 84 / UTM zone 16N",
## BASEGEOGCRS["WGS 84",
## ENSEMBLE["World Geodetic System 1984 ensemble",
## MEMBER["World Geodetic System 1984 (Transit)"],
## MEMBER["World Geodetic System 1984 (G730)"],
## MEMBER["World Geodetic System 1984 (G873)"],
## MEMBER["World Geodetic System 1984 (G1150)"],
## MEMBER["World Geodetic System 1984 (G1674)"],
## MEMBER["World Geodetic System 1984 (G1762)"],
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]],
## ENSEMBLEACCURACY[2.0]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4326]],
## CONVERSION["UTM zone 16N",
## METHOD["Transverse Mercator",
## ID["EPSG",9807]],
## PARAMETER["Latitude of natural origin",0,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8801]],
## PARAMETER["Longitude of natural origin",-87,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8802]],
## PARAMETER["Scale factor at natural origin",0.9996,
## SCALEUNIT["unity",1],
## ID["EPSG",8805]],
## PARAMETER["False easting",500000,
## LENGTHUNIT["metre",1],
## ID["EPSG",8806]],
## PARAMETER["False northing",0,
## LENGTHUNIT["metre",1],
## ID["EPSG",8807]]],
## CS[Cartesian,2],
## AXIS["(E)",east,
## ORDER[1],
## LENGTHUNIT["metre",1]],
## AXIS["(N)",north,
## ORDER[2],
## LENGTHUNIT["metre",1]],
## USAGE[
## SCOPE["Engineering survey, topographic mapping."],
## AREA["Between 90°W and 84°W, northern hemisphere between equator and 84°N, onshore and offshore. Belize. Canada - Manitoba; Nunavut; Ontario. Costa Rica. Cuba. Ecuador - Galapagos. El Salvador. Guatemala. Honduras. Mexico. Nicaragua. United States (USA)."],
## BBOX[0,-90,84,-84]],
## ID["EPSG",32616]]
vehicle.points <- st_transform(vehicle.points, 32616)
st_crs(vehicle.points)## Coordinate Reference System:
## User input: EPSG:32616
## wkt:
## PROJCRS["WGS 84 / UTM zone 16N",
## BASEGEOGCRS["WGS 84",
## ENSEMBLE["World Geodetic System 1984 ensemble",
## MEMBER["World Geodetic System 1984 (Transit)"],
## MEMBER["World Geodetic System 1984 (G730)"],
## MEMBER["World Geodetic System 1984 (G873)"],
## MEMBER["World Geodetic System 1984 (G1150)"],
## MEMBER["World Geodetic System 1984 (G1674)"],
## MEMBER["World Geodetic System 1984 (G1762)"],
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]],
## ENSEMBLEACCURACY[2.0]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4326]],
## CONVERSION["UTM zone 16N",
## METHOD["Transverse Mercator",
## ID["EPSG",9807]],
## PARAMETER["Latitude of natural origin",0,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8801]],
## PARAMETER["Longitude of natural origin",-87,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8802]],
## PARAMETER["Scale factor at natural origin",0.9996,
## SCALEUNIT["unity",1],
## ID["EPSG",8805]],
## PARAMETER["False easting",500000,
## LENGTHUNIT["metre",1],
## ID["EPSG",8806]],
## PARAMETER["False northing",0,
## LENGTHUNIT["metre",1],
## ID["EPSG",8807]]],
## CS[Cartesian,2],
## AXIS["(E)",east,
## ORDER[1],
## LENGTHUNIT["metre",1]],
## AXIS["(N)",north,
## ORDER[2],
## LENGTHUNIT["metre",1]],
## USAGE[
## SCOPE["Engineering survey, topographic mapping."],
## AREA["Between 90°W and 84°W, northern hemisphere between equator and 84°N, onshore and offshore. Belize. Canada - Manitoba; Nunavut; Ontario. Costa Rica. Cuba. Ecuador - Galapagos. El Salvador. Guatemala. Honduras. Mexico. Nicaragua. United States (USA)."],
## BBOX[0,-90,84,-84]],
## ID["EPSG",32616]]
comm.pts <- st_join(vehicle.points, chicago.comm["area_num_1"])
head(comm.pts)## Simple feature collection with 6 features and 2 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 430118.3 ymin: 4622026 xmax: 441795.4 ymax: 4647560
## Projected CRS: WGS 84 / UTM zone 16N
## comm area_num_1 geometry
## 1 23 23 POINT (440330.7 4638631)
## 2 56 56 POINT (436036.4 4627511)
## 3 70 70 POINT (441795.4 4622026)
## 4 76 76 POINT (430118.3 4647560)
## 5 16 16 POINT (440410.8 4645554)
## 6 19 19 POINT (434989.7 4641362)
As we can see, the community area in comm matches the entry in area_num_1. However, there is one more issue to deal with. Upon closer examination, we find that the area_num_1 variable is not numeric using the is.numeric check.
is.numeric(comm.pts$area_num_1)## [1] FALSE
comm.pts$area_num_1 <- as.integer(comm.pts$area_num_1)
is.integer(comm.pts$area_num_1)## [1] TRUE
The same problem occurs in the chicago.comm data set, which can cause trouble later on when we will join it with other data. Therefore, we turn it into an integer as well.
chicago.comm$area_num_1 <- as.integer(chicago.comm$area_num_1)Task: We now need to count the number of points in each polygon. Solution: We proceed in two steps. 1) First, we illustrate how we can move back from the simple features spatial points object to a simple data frame by stripping the geometry column. This is accomplished by setting st_geometry to NULL. We check the class of the new object to make sure it is no longer a simple feature. 2) We next take advantage of the tidyverse count function to create a new data frame with the identifier of the community area and the number of points contained in each community area.
st_geometry(comm.pts) <- NULL
class(comm.pts)## [1] "data.frame"
veh.cnts <- comm.pts %>% count(area_num_1)
head(veh.cnts)## area_num_1 n
## 1 1 67
## 2 2 89
## 3 3 21
## 4 4 32
## 5 5 18
## 6 6 19
Change the columm name:
veh.cnts <- veh.cnts %>% rename(comm = area_num_1, AGG.COUNT = n)
head(veh.cnts)## comm AGG.COUNT
## 1 1 67
## 2 2 89
## 3 3 21
## 4 4 32
## 5 5 18
## 6 6 19
At this point, we have: 1) a polygon layer with the community area boundaries and some identifiers (chicago.comm) 2) a data frame with the community identifier and the aggregate vehicle count (veh.cnts).
In order to map the vehicle counts by community area, we need to join the two tables using left_join command and use area_num_1 as the key for the first table (the community area boundaries), and comm as the key for the second table (the vehicle counts).
chicago.comm <- left_join(chicago.comm, veh.cnts, by = c("area_num_1" = "comm"))
head(chicago.comm)## Simple feature collection with 6 features and 10 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 441440.4 ymin: 4627153 xmax: 451817.1 ymax: 4648971
## Projected CRS: WGS 84 / UTM zone 16N
## # A tibble: 6 × 11
## community area shape_area perimeter area_num_1 area_numbe comarea_id comarea
## <chr> <chr> <chr> <chr> <int> <chr> <chr> <chr>
## 1 DOUGLAS 0 46004621.… 0 35 35 0 0
## 2 OAKLAND 0 16913961.… 0 36 36 0 0
## 3 FULLER PA… 0 19916704.… 0 37 37 0 0
## 4 GRAND BOU… 0 48492503.… 0 38 38 0 0
## 5 KENWOOD 0 29071741.… 0 39 39 0 0
## 6 LINCOLN S… 0 71352328.… 0 4 4 0 0
## # … with 3 more variables: shape_len <chr>, geometry <MULTIPOLYGON [m]>,
## # AGG.COUNT <int>
Basic choropleth map
tm_shape(chicago.comm) +
tm_polygons("AGG.COUNT")However, this map can be highly misleading since it pertains to a so-called spatially extensive variable, such as a count. Even if every area had the same risk of having abandoned vehicles, larger community areas would have higher counts. In other words, since the count is directly related to the size of the area, it does not provide a proper indication of the risk.
Instead, we should map a spatially intensive variable, which is corrected for the size of the unit. For example, this can be achieved by expressing the variable as a density (counts per area), or as some other ratio, such as the counts per capita. In order to calculate this ratio, we first need to obtain the population for each community area.
pdf.file <- "https://www.cityofchicago.org/content/dam/city/depts/zlup/Zoning_Main_Page/Publications/Census_2010_Community_Area_Profiles/Census_2010_and_2000_CA_Populations.pdf"
pop.dat <- pdf_text(pdf.file)
class(pop.dat)## [1] "character"
length(pop.dat)## [1] 2
Firstly, we start by initializing a vector (nnlist) with an empty character, and confirm that it is indeed initialized.
nnlist <- ""
nnlist## [1] ""
Next, we create a list of strings, one for each line in the table, by using the strsplit operation. This splits the long string into a list of one string for each line, by using the return character as the separator (the value for the split argument).
The resulting list, ppage, contains a list of 44 elements, matching the contents of the first page of the pdf file.
ppage <- strsplit(pop.dat[[1]],split="\n")
ppage[[1]]## [1] " CITY OF CHICAGO"
## [2] " CENSUS 2010 AND 2000"
## [3] ""
## [4] " Population"
## [5] "Num Community Area 2010 2,000 Difference Percentage"
## [6] " 1 Rogers Park 54,991 63,484 -8,493 -13.4%"
## [7] " 2 West Ridge 71,942 73,199 -1,257 -1.7%"
## [8] " 3 Uptown 56,362 63,551 -7,189 -11.3%"
## [9] " 4 Lincoln Square 39,493 44,574 -5,081 -11.4%"
## [10] " 5 North Center 31,867 31,895 -28 -0.1%"
## [11] " 6 Lake View 94,368 94,817 -449 -0.5%"
## [12] " 7 Lincoln Park 64,116 64,320 -204 -0.3%"
## [13] " 8 Near North Side 80,484 72,811 7,673 10.5%"
## [14] " 9 Edison Park 11,187 11,259 -72 -0.6%"
## [15] " 10 Norwood Park 37,023 37,669 -646 -1.7%"
## [16] " 11 Jefferson Park 25,448 25,859 -411 -1.6%"
## [17] " 12 Forest Glen 18,508 18,165 343 1.9%"
## [18] " 13 North Park 17,931 18,514 -583 -3.1%"
## [19] " 14 Albany Park 51,542 57,655 -6,113 -10.6%"
## [20] " 15 Portage Park 64,124 65,340 -1,216 -1.9%"
## [21] " 16 Irving Park 53,359 58,643 -5,284 -9.0%"
## [22] " 17 Dunning 41,932 42,164 -232 -0.6%"
## [23] " 18 Montclare 13,426 12,646 780 6.2%"
## [24] " 19 Belmont Cragin 78,743 78,144 599 0.8%"
## [25] " 20 Hermosa 25,010 26,908 -1,898 -7.1%"
## [26] " 21 Avondale 39,262 43,083 -3,821 -8.9%"
## [27] " 22 Logan Square 73,595 82,715 -9,120 -11.0%"
## [28] " 23 Humboldt Park 56,323 65,836 -9,513 -14.4%"
## [29] " 24 West Town 81,432 87,435 -6,003 -6.9%"
## [30] " 25 Austin 98,514 117,527 -19,013 -16.2%"
## [31] " 26 West Garfield Park 18,001 23,019 -5,018 -21.8%"
## [32] " 27 East Garfield Park 20,567 20,881 -314 -1.5%"
## [33] " 28 Near West Side 54,881 46,419 8,462 18.2%"
## [34] " 29 North Lawndale 35,912 41,768 -5,856 -14.0%"
## [35] " 30 South Lawndale 79,288 91,071 -11,783 -12.9%"
## [36] " 31 Lower West Side 35,769 44,031 -8,262 -18.8%"
## [37] " 32 Loop 29,283 16,388 12,895 78.7%"
## [38] " 33 Near South Side 21,390 9,509 11,881 124.9%"
## [39] " 34 Armour Square 13,391 12,032 1,359 11.3%"
## [40] " 35 Douglas 18,238 26,470 -8,232 -31.1%"
## [41] " 36 Oakland 5,918 6,110 -192 -3.1%"
## [42] " 37 Fuller Park 2,876 3,420 -544 -15.9%"
## [43] " 38 Grand Boulevard 21,929 28,006 -6,077 -21.7%"
## [44] " 39 Kenwood 17,841 18,363 -522 -2.8%"
## [45] " 40 Washington Park 11,717 14,146 -2,429 -17.2%"
Each element is one long string, corresponding to a table row. We remove the first four lines (using the - operation on the list elements 1 through 4). These first rows appear on each page, so we are safe to repeat this procedure for the second page (string) as well.
nni <- ppage[[1]]
nni <- nni[-(1:5)]
nni## [1] " 1 Rogers Park 54,991 63,484 -8,493 -13.4%"
## [2] " 2 West Ridge 71,942 73,199 -1,257 -1.7%"
## [3] " 3 Uptown 56,362 63,551 -7,189 -11.3%"
## [4] " 4 Lincoln Square 39,493 44,574 -5,081 -11.4%"
## [5] " 5 North Center 31,867 31,895 -28 -0.1%"
## [6] " 6 Lake View 94,368 94,817 -449 -0.5%"
## [7] " 7 Lincoln Park 64,116 64,320 -204 -0.3%"
## [8] " 8 Near North Side 80,484 72,811 7,673 10.5%"
## [9] " 9 Edison Park 11,187 11,259 -72 -0.6%"
## [10] " 10 Norwood Park 37,023 37,669 -646 -1.7%"
## [11] " 11 Jefferson Park 25,448 25,859 -411 -1.6%"
## [12] " 12 Forest Glen 18,508 18,165 343 1.9%"
## [13] " 13 North Park 17,931 18,514 -583 -3.1%"
## [14] " 14 Albany Park 51,542 57,655 -6,113 -10.6%"
## [15] " 15 Portage Park 64,124 65,340 -1,216 -1.9%"
## [16] " 16 Irving Park 53,359 58,643 -5,284 -9.0%"
## [17] " 17 Dunning 41,932 42,164 -232 -0.6%"
## [18] " 18 Montclare 13,426 12,646 780 6.2%"
## [19] " 19 Belmont Cragin 78,743 78,144 599 0.8%"
## [20] " 20 Hermosa 25,010 26,908 -1,898 -7.1%"
## [21] " 21 Avondale 39,262 43,083 -3,821 -8.9%"
## [22] " 22 Logan Square 73,595 82,715 -9,120 -11.0%"
## [23] " 23 Humboldt Park 56,323 65,836 -9,513 -14.4%"
## [24] " 24 West Town 81,432 87,435 -6,003 -6.9%"
## [25] " 25 Austin 98,514 117,527 -19,013 -16.2%"
## [26] " 26 West Garfield Park 18,001 23,019 -5,018 -21.8%"
## [27] " 27 East Garfield Park 20,567 20,881 -314 -1.5%"
## [28] " 28 Near West Side 54,881 46,419 8,462 18.2%"
## [29] " 29 North Lawndale 35,912 41,768 -5,856 -14.0%"
## [30] " 30 South Lawndale 79,288 91,071 -11,783 -12.9%"
## [31] " 31 Lower West Side 35,769 44,031 -8,262 -18.8%"
## [32] " 32 Loop 29,283 16,388 12,895 78.7%"
## [33] " 33 Near South Side 21,390 9,509 11,881 124.9%"
## [34] " 34 Armour Square 13,391 12,032 1,359 11.3%"
## [35] " 35 Douglas 18,238 26,470 -8,232 -31.1%"
## [36] " 36 Oakland 5,918 6,110 -192 -3.1%"
## [37] " 37 Fuller Park 2,876 3,420 -544 -15.9%"
## [38] " 38 Grand Boulevard 21,929 28,006 -6,077 -21.7%"
## [39] " 39 Kenwood 17,841 18,363 -522 -2.8%"
## [40] " 40 Washington Park 11,717 14,146 -2,429 -17.2%"
NEXT: To streamline the resulting data structure for further operations, we turn it into a simple vector by means of unlist.
This then allows us to concatenate the result to the current nnlist vector (initially, this contains just a single element with an empty character, after the first step it contains the empty character and the first page).
nnu <- unlist(nni)
nnlist <- c(nnlist, nnu)
nnlist ## [1] ""
## [2] " 1 Rogers Park 54,991 63,484 -8,493 -13.4%"
## [3] " 2 West Ridge 71,942 73,199 -1,257 -1.7%"
## [4] " 3 Uptown 56,362 63,551 -7,189 -11.3%"
## [5] " 4 Lincoln Square 39,493 44,574 -5,081 -11.4%"
## [6] " 5 North Center 31,867 31,895 -28 -0.1%"
## [7] " 6 Lake View 94,368 94,817 -449 -0.5%"
## [8] " 7 Lincoln Park 64,116 64,320 -204 -0.3%"
## [9] " 8 Near North Side 80,484 72,811 7,673 10.5%"
## [10] " 9 Edison Park 11,187 11,259 -72 -0.6%"
## [11] " 10 Norwood Park 37,023 37,669 -646 -1.7%"
## [12] " 11 Jefferson Park 25,448 25,859 -411 -1.6%"
## [13] " 12 Forest Glen 18,508 18,165 343 1.9%"
## [14] " 13 North Park 17,931 18,514 -583 -3.1%"
## [15] " 14 Albany Park 51,542 57,655 -6,113 -10.6%"
## [16] " 15 Portage Park 64,124 65,340 -1,216 -1.9%"
## [17] " 16 Irving Park 53,359 58,643 -5,284 -9.0%"
## [18] " 17 Dunning 41,932 42,164 -232 -0.6%"
## [19] " 18 Montclare 13,426 12,646 780 6.2%"
## [20] " 19 Belmont Cragin 78,743 78,144 599 0.8%"
## [21] " 20 Hermosa 25,010 26,908 -1,898 -7.1%"
## [22] " 21 Avondale 39,262 43,083 -3,821 -8.9%"
## [23] " 22 Logan Square 73,595 82,715 -9,120 -11.0%"
## [24] " 23 Humboldt Park 56,323 65,836 -9,513 -14.4%"
## [25] " 24 West Town 81,432 87,435 -6,003 -6.9%"
## [26] " 25 Austin 98,514 117,527 -19,013 -16.2%"
## [27] " 26 West Garfield Park 18,001 23,019 -5,018 -21.8%"
## [28] " 27 East Garfield Park 20,567 20,881 -314 -1.5%"
## [29] " 28 Near West Side 54,881 46,419 8,462 18.2%"
## [30] " 29 North Lawndale 35,912 41,768 -5,856 -14.0%"
## [31] " 30 South Lawndale 79,288 91,071 -11,783 -12.9%"
## [32] " 31 Lower West Side 35,769 44,031 -8,262 -18.8%"
## [33] " 32 Loop 29,283 16,388 12,895 78.7%"
## [34] " 33 Near South Side 21,390 9,509 11,881 124.9%"
## [35] " 34 Armour Square 13,391 12,032 1,359 11.3%"
## [36] " 35 Douglas 18,238 26,470 -8,232 -31.1%"
## [37] " 36 Oakland 5,918 6,110 -192 -3.1%"
## [38] " 37 Fuller Park 2,876 3,420 -544 -15.9%"
## [39] " 38 Grand Boulevard 21,929 28,006 -6,077 -21.7%"
## [40] " 39 Kenwood 17,841 18,363 -522 -2.8%"
## [41] " 40 Washington Park 11,717 14,146 -2,429 -17.2%"
We now repeat this operation for pop.dat[[2]]. More efficiently, we implement it as a loop, replacing i in turn by 1 and 2. This yields:
nnlist <- ""
for (i in 1:2) {
ppage <- strsplit(pop.dat[[i]],split="\n")
nni <- ppage[[1]]
nni <- nni[-(1:5)]
nnu <- unlist(nni)
nnlist <- c(nnlist,nnu)
}nnlist ## [1] ""
## [2] " 1 Rogers Park 54,991 63,484 -8,493 -13.4%"
## [3] " 2 West Ridge 71,942 73,199 -1,257 -1.7%"
## [4] " 3 Uptown 56,362 63,551 -7,189 -11.3%"
## [5] " 4 Lincoln Square 39,493 44,574 -5,081 -11.4%"
## [6] " 5 North Center 31,867 31,895 -28 -0.1%"
## [7] " 6 Lake View 94,368 94,817 -449 -0.5%"
## [8] " 7 Lincoln Park 64,116 64,320 -204 -0.3%"
## [9] " 8 Near North Side 80,484 72,811 7,673 10.5%"
## [10] " 9 Edison Park 11,187 11,259 -72 -0.6%"
## [11] " 10 Norwood Park 37,023 37,669 -646 -1.7%"
## [12] " 11 Jefferson Park 25,448 25,859 -411 -1.6%"
## [13] " 12 Forest Glen 18,508 18,165 343 1.9%"
## [14] " 13 North Park 17,931 18,514 -583 -3.1%"
## [15] " 14 Albany Park 51,542 57,655 -6,113 -10.6%"
## [16] " 15 Portage Park 64,124 65,340 -1,216 -1.9%"
## [17] " 16 Irving Park 53,359 58,643 -5,284 -9.0%"
## [18] " 17 Dunning 41,932 42,164 -232 -0.6%"
## [19] " 18 Montclare 13,426 12,646 780 6.2%"
## [20] " 19 Belmont Cragin 78,743 78,144 599 0.8%"
## [21] " 20 Hermosa 25,010 26,908 -1,898 -7.1%"
## [22] " 21 Avondale 39,262 43,083 -3,821 -8.9%"
## [23] " 22 Logan Square 73,595 82,715 -9,120 -11.0%"
## [24] " 23 Humboldt Park 56,323 65,836 -9,513 -14.4%"
## [25] " 24 West Town 81,432 87,435 -6,003 -6.9%"
## [26] " 25 Austin 98,514 117,527 -19,013 -16.2%"
## [27] " 26 West Garfield Park 18,001 23,019 -5,018 -21.8%"
## [28] " 27 East Garfield Park 20,567 20,881 -314 -1.5%"
## [29] " 28 Near West Side 54,881 46,419 8,462 18.2%"
## [30] " 29 North Lawndale 35,912 41,768 -5,856 -14.0%"
## [31] " 30 South Lawndale 79,288 91,071 -11,783 -12.9%"
## [32] " 31 Lower West Side 35,769 44,031 -8,262 -18.8%"
## [33] " 32 Loop 29,283 16,388 12,895 78.7%"
## [34] " 33 Near South Side 21,390 9,509 11,881 124.9%"
## [35] " 34 Armour Square 13,391 12,032 1,359 11.3%"
## [36] " 35 Douglas 18,238 26,470 -8,232 -31.1%"
## [37] " 36 Oakland 5,918 6,110 -192 -3.1%"
## [38] " 37 Fuller Park 2,876 3,420 -544 -15.9%"
## [39] " 38 Grand Boulevard 21,929 28,006 -6,077 -21.7%"
## [40] " 39 Kenwood 17,841 18,363 -522 -2.8%"
## [41] " 40 Washington Park 11,717 14,146 -2,429 -17.2%"
## [42] " 41 Hyde Park 25,681 29,920 -4,239 -14.2%"
## [43] " 42 Woodlawn 25,983 27,086 -1,103 -4.1%"
## [44] " 43 South Shore 49,767 61,556 -11,789 -19.2%"
## [45] " 44 Chatham 31,028 37,275 -6,247 -16.8%"
## [46] " 45 Avalon Park 10,185 11,147 -962 -8.6%"
## [47] " 46 South Chicago 31,198 38,596 -7,398 -19.2%"
## [48] " 47 Burnside 2,916 3,294 -378 -11.5%"
## [49] " 48 Calumet Heights 13,812 15,974 -2,162 -13.5%"
## [50] " 49 Roseland 44,619 52,723 -8,104 -15.4%"
## [51] " 50 Pullman 7,325 8,921 -1,596 -17.9%"
## [52] " 51 South Deering 15,109 16,990 -1,881 -11.1%"
## [53] " 52 East Side 23,042 23,653 -611 -2.6%"
## [54] " 53 West Pullman 29,651 36,649 -6,998 -19.1%"
## [55] " 54 Riverdale 6,482 9,809 -3,327 -33.9%"
## [56] " 55 Hegewisch 9,426 9,781 -355 -3.6%"
## [57] " 56 Garfield Ridge 34,513 36,101 -1,588 -4.4%"
## [58] " 57 Archer Heights 13,393 12,644 749 5.9%"
## [59] " 58 Brighton Park 45,368 44,912 456 1.0%"
## [60] " 59 McKinley Park 15,612 15,962 -350 -2.2%"
## [61] " 60 Bridgeport 31,977 33,694 -1,717 -5.1%"
## [62] " 61 New City 44,377 51,721 -7,344 -14.2%"
## [63] " 62 West Elsdon 18,109 15,921 2,188 13.7%"
## [64] " 63 Gage Park 39,894 39,193 701 1.8%"
## [65] " 64 Clearing 23,139 22,331 808 3.6%"
## [66] " 65 West Lawn 33,355 29,235 4,120 14.1%"
## [67] " 66 Chicago Lawn 55,628 61,412 -5,784 -9.4%"
## [68] " 67 West Englewood 35,505 45,282 -9,777 -21.6%"
## [69] " 68 Englewood 30,654 40,222 -9,568 -23.8%"
## [70] " 69 Greater Grand Crossing 32,602 38,619 -6,017 -15.6%"
## [71] " 70 Ashburn 41,081 39,584 1,497 3.8%"
## [72] " 71 Auburn Gresham 48,743 55,928 -7,185 -12.8%"
## [73] " 72 Beverly 20,034 21,992 -1,958 -8.9%"
## [74] " 73 Washington Heights 26,493 29,843 -3,350 -11.2%"
## [75] " 74 Mount Greenwood 19,093 18,820 273 1.5%"
## [76] " 75 Morgan Park 22,544 25,226 -2,682 -10.6%"
## [77] " 76 O'Hare 12,756 11,956 800 6.7%"
## [78] " 77 Edgewater 56,521 62,198 -5,677 -9.1%"
## [79] " Total 2,695,598 2,896,016 -200,418 -6.9%"
This is now a vector of 79 elements, each of which is a string. To clean things up, strip the first (empty) element, and the last element, which is nothing but the totals. We thus extract the elements from 2 to length - 1.
nnlist <- nnlist[2:(length(nnlist)-1)]
nnlist## [1] " 1 Rogers Park 54,991 63,484 -8,493 -13.4%"
## [2] " 2 West Ridge 71,942 73,199 -1,257 -1.7%"
## [3] " 3 Uptown 56,362 63,551 -7,189 -11.3%"
## [4] " 4 Lincoln Square 39,493 44,574 -5,081 -11.4%"
## [5] " 5 North Center 31,867 31,895 -28 -0.1%"
## [6] " 6 Lake View 94,368 94,817 -449 -0.5%"
## [7] " 7 Lincoln Park 64,116 64,320 -204 -0.3%"
## [8] " 8 Near North Side 80,484 72,811 7,673 10.5%"
## [9] " 9 Edison Park 11,187 11,259 -72 -0.6%"
## [10] " 10 Norwood Park 37,023 37,669 -646 -1.7%"
## [11] " 11 Jefferson Park 25,448 25,859 -411 -1.6%"
## [12] " 12 Forest Glen 18,508 18,165 343 1.9%"
## [13] " 13 North Park 17,931 18,514 -583 -3.1%"
## [14] " 14 Albany Park 51,542 57,655 -6,113 -10.6%"
## [15] " 15 Portage Park 64,124 65,340 -1,216 -1.9%"
## [16] " 16 Irving Park 53,359 58,643 -5,284 -9.0%"
## [17] " 17 Dunning 41,932 42,164 -232 -0.6%"
## [18] " 18 Montclare 13,426 12,646 780 6.2%"
## [19] " 19 Belmont Cragin 78,743 78,144 599 0.8%"
## [20] " 20 Hermosa 25,010 26,908 -1,898 -7.1%"
## [21] " 21 Avondale 39,262 43,083 -3,821 -8.9%"
## [22] " 22 Logan Square 73,595 82,715 -9,120 -11.0%"
## [23] " 23 Humboldt Park 56,323 65,836 -9,513 -14.4%"
## [24] " 24 West Town 81,432 87,435 -6,003 -6.9%"
## [25] " 25 Austin 98,514 117,527 -19,013 -16.2%"
## [26] " 26 West Garfield Park 18,001 23,019 -5,018 -21.8%"
## [27] " 27 East Garfield Park 20,567 20,881 -314 -1.5%"
## [28] " 28 Near West Side 54,881 46,419 8,462 18.2%"
## [29] " 29 North Lawndale 35,912 41,768 -5,856 -14.0%"
## [30] " 30 South Lawndale 79,288 91,071 -11,783 -12.9%"
## [31] " 31 Lower West Side 35,769 44,031 -8,262 -18.8%"
## [32] " 32 Loop 29,283 16,388 12,895 78.7%"
## [33] " 33 Near South Side 21,390 9,509 11,881 124.9%"
## [34] " 34 Armour Square 13,391 12,032 1,359 11.3%"
## [35] " 35 Douglas 18,238 26,470 -8,232 -31.1%"
## [36] " 36 Oakland 5,918 6,110 -192 -3.1%"
## [37] " 37 Fuller Park 2,876 3,420 -544 -15.9%"
## [38] " 38 Grand Boulevard 21,929 28,006 -6,077 -21.7%"
## [39] " 39 Kenwood 17,841 18,363 -522 -2.8%"
## [40] " 40 Washington Park 11,717 14,146 -2,429 -17.2%"
## [41] " 41 Hyde Park 25,681 29,920 -4,239 -14.2%"
## [42] " 42 Woodlawn 25,983 27,086 -1,103 -4.1%"
## [43] " 43 South Shore 49,767 61,556 -11,789 -19.2%"
## [44] " 44 Chatham 31,028 37,275 -6,247 -16.8%"
## [45] " 45 Avalon Park 10,185 11,147 -962 -8.6%"
## [46] " 46 South Chicago 31,198 38,596 -7,398 -19.2%"
## [47] " 47 Burnside 2,916 3,294 -378 -11.5%"
## [48] " 48 Calumet Heights 13,812 15,974 -2,162 -13.5%"
## [49] " 49 Roseland 44,619 52,723 -8,104 -15.4%"
## [50] " 50 Pullman 7,325 8,921 -1,596 -17.9%"
## [51] " 51 South Deering 15,109 16,990 -1,881 -11.1%"
## [52] " 52 East Side 23,042 23,653 -611 -2.6%"
## [53] " 53 West Pullman 29,651 36,649 -6,998 -19.1%"
## [54] " 54 Riverdale 6,482 9,809 -3,327 -33.9%"
## [55] " 55 Hegewisch 9,426 9,781 -355 -3.6%"
## [56] " 56 Garfield Ridge 34,513 36,101 -1,588 -4.4%"
## [57] " 57 Archer Heights 13,393 12,644 749 5.9%"
## [58] " 58 Brighton Park 45,368 44,912 456 1.0%"
## [59] " 59 McKinley Park 15,612 15,962 -350 -2.2%"
## [60] " 60 Bridgeport 31,977 33,694 -1,717 -5.1%"
## [61] " 61 New City 44,377 51,721 -7,344 -14.2%"
## [62] " 62 West Elsdon 18,109 15,921 2,188 13.7%"
## [63] " 63 Gage Park 39,894 39,193 701 1.8%"
## [64] " 64 Clearing 23,139 22,331 808 3.6%"
## [65] " 65 West Lawn 33,355 29,235 4,120 14.1%"
## [66] " 66 Chicago Lawn 55,628 61,412 -5,784 -9.4%"
## [67] " 67 West Englewood 35,505 45,282 -9,777 -21.6%"
## [68] " 68 Englewood 30,654 40,222 -9,568 -23.8%"
## [69] " 69 Greater Grand Crossing 32,602 38,619 -6,017 -15.6%"
## [70] " 70 Ashburn 41,081 39,584 1,497 3.8%"
## [71] " 71 Auburn Gresham 48,743 55,928 -7,185 -12.8%"
## [72] " 72 Beverly 20,034 21,992 -1,958 -8.9%"
## [73] " 73 Washington Heights 26,493 29,843 -3,350 -11.2%"
## [74] " 74 Mount Greenwood 19,093 18,820 273 1.5%"
## [75] " 75 Morgan Park 22,544 25,226 -2,682 -10.6%"
## [76] " 76 O'Hare 12,756 11,956 800 6.7%"
## [77] " 77 Edgewater 56,521 62,198 -5,677 -9.1%"
We first initialize a vector of zeros to hold the population values. It is the preferred approach to initialize a vector first if one knows its size, rather than having it grow by appending rows or columns.
We use the vector command and specify the mode=“numeric” and give the length as the length of the list.
nnpop <- vector(mode = "numeric", length = length(nnlist))We again will use a loop to process each element of the list (each line of the table) one by one. 1) We use the substr command to extract the characters between position 27 and 39 (these values were determined after taking a careful look at the structure of the table). 2) since the population values contain commas. We now do two things in one line of code. First, we use gsub to substitute the comma character by an empty “”. We turn the result into a numeric value by means of as.numeric. We then assign this number to position i of the vector. The resulting vector nnpop contains the population for each of the community areas.
for (i in (1:length(nnlist))) {
popchar <- substr(nnlist[i],start=27,stop=43)
popval <- as.numeric(gsub(",","",popchar))
nnpop[i] <- popval
}## Warning: NAs introduced by coercion
nnpop## [1] 54991 71942 56362 39493 31867 94368 64116 80484 11187 37023 25448 18508
## [13] 17931 51542 64124 53359 41932 13426 78743 25010 39262 73595 56323 81432
## [25] 98514 18001 20567 54881 35912 79288 35769 29283 21390 13391 18238 5918
## [37] 2876 21929 17841 11717 25681 25983 49767 31028 10185 31198 2916 13812
## [49] 44619 7325 15109 23042 29651 6482 9426 34513 13393 45368 15612 31977
## [61] 44377 18109 39894 23139 33355 55628 35505 30654 NA 41081 48743 20034
## [73] 26493 19093 22544 12756 56521
nnid <- (1:length(nnlist))
nnid## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
## [51] 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
## [76] 76 77
neighpop <- data.frame(as.integer(nnid),nnpop)
names(neighpop) <- c("NID","POP2010")
head(neighpop)## NID POP2010
## 1 1 54991
## 2 2 71942
## 3 3 56362
## 4 4 39493
## 5 5 31867
## 6 6 94368
Replace NA value with mean value
ind <- which(is.na(neighpop$POP2010))
neighpop$POP2010[ind] <- sapply(ind, function(i) with(neighpop, mean(c(POP2010[i-1], POP2010[i+1]))))
neighpop## NID POP2010
## 1 1 54991.0
## 2 2 71942.0
## 3 3 56362.0
## 4 4 39493.0
## 5 5 31867.0
## 6 6 94368.0
## 7 7 64116.0
## 8 8 80484.0
## 9 9 11187.0
## 10 10 37023.0
## 11 11 25448.0
## 12 12 18508.0
## 13 13 17931.0
## 14 14 51542.0
## 15 15 64124.0
## 16 16 53359.0
## 17 17 41932.0
## 18 18 13426.0
## 19 19 78743.0
## 20 20 25010.0
## 21 21 39262.0
## 22 22 73595.0
## 23 23 56323.0
## 24 24 81432.0
## 25 25 98514.0
## 26 26 18001.0
## 27 27 20567.0
## 28 28 54881.0
## 29 29 35912.0
## 30 30 79288.0
## 31 31 35769.0
## 32 32 29283.0
## 33 33 21390.0
## 34 34 13391.0
## 35 35 18238.0
## 36 36 5918.0
## 37 37 2876.0
## 38 38 21929.0
## 39 39 17841.0
## 40 40 11717.0
## 41 41 25681.0
## 42 42 25983.0
## 43 43 49767.0
## 44 44 31028.0
## 45 45 10185.0
## 46 46 31198.0
## 47 47 2916.0
## 48 48 13812.0
## 49 49 44619.0
## 50 50 7325.0
## 51 51 15109.0
## 52 52 23042.0
## 53 53 29651.0
## 54 54 6482.0
## 55 55 9426.0
## 56 56 34513.0
## 57 57 13393.0
## 58 58 45368.0
## 59 59 15612.0
## 60 60 31977.0
## 61 61 44377.0
## 62 62 18109.0
## 63 63 39894.0
## 64 64 23139.0
## 65 65 33355.0
## 66 66 55628.0
## 67 67 35505.0
## 68 68 30654.0
## 69 69 35867.5
## 70 70 41081.0
## 71 71 48743.0
## 72 72 20034.0
## 73 73 26493.0
## 74 74 19093.0
## 75 75 22544.0
## 76 76 12756.0
## 77 77 56521.0
str(neighpop)## 'data.frame': 77 obs. of 2 variables:
## $ NID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ POP2010: num 54991 71942 56362 39493 31867 ...
Computing abandoned vehicles per capita
chicago.comm <- left_join(chicago.comm,neighpop, by = c("area_num_1" = "NID"))
head(chicago.comm)## Simple feature collection with 6 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 441440.4 ymin: 4627153 xmax: 451817.1 ymax: 4648971
## Projected CRS: WGS 84 / UTM zone 16N
## # A tibble: 6 × 12
## community area shape_area perimeter area_num_1 area_numbe comarea_id comarea
## <chr> <chr> <chr> <chr> <int> <chr> <chr> <chr>
## 1 DOUGLAS 0 46004621.… 0 35 35 0 0
## 2 OAKLAND 0 16913961.… 0 36 36 0 0
## 3 FULLER PA… 0 19916704.… 0 37 37 0 0
## 4 GRAND BOU… 0 48492503.… 0 38 38 0 0
## 5 KENWOOD 0 29071741.… 0 39 39 0 0
## 6 LINCOLN S… 0 71352328.… 0 4 4 0 0
## # … with 4 more variables: shape_len <chr>, geometry <MULTIPOLYGON [m]>,
## # AGG.COUNT <int>, POP2010 <dbl>
str(chicago.comm)## sf [77 × 12] (S3: sf/tbl_df/tbl/data.frame)
## $ community : chr [1:77] "DOUGLAS" "OAKLAND" "FULLER PARK" "GRAND BOULEVARD" ...
## $ area : chr [1:77] "0" "0" "0" "0" ...
## $ shape_area: chr [1:77] "46004621.1581" "16913961.0408" "19916704.8692" "48492503.1554" ...
## $ perimeter : chr [1:77] "0" "0" "0" "0" ...
## $ area_num_1: int [1:77] 35 36 37 38 39 4 40 41 42 1 ...
## $ area_numbe: chr [1:77] "35" "36" "37" "38" ...
## $ comarea_id: chr [1:77] "0" "0" "0" "0" ...
## $ comarea : chr [1:77] "0" "0" "0" "0" ...
## $ shape_len : chr [1:77] "31027.0545098" "19565.5061533" "25339.0897503" "28196.8371573" ...
## $ geometry :sfc_MULTIPOLYGON of length 77; first list element: List of 1
## ..$ :List of 1
## .. ..$ : num [1:352, 1:2] 449430 449429 449428 449427 449427 ...
## ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
## $ AGG.COUNT : int [1:77] 8 3 4 12 22 32 2 13 30 67 ...
## $ POP2010 : num [1:77] 18238 5918 2876 21929 17841 ...
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:11] "community" "area" "shape_area" "perimeter" ...
chicago.comm <- chicago.comm %>% mutate(vehpcap = (AGG.COUNT / POP2010) * 1000)
head(chicago.comm)## Simple feature collection with 6 features and 12 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 441440.4 ymin: 4627153 xmax: 451817.1 ymax: 4648971
## Projected CRS: WGS 84 / UTM zone 16N
## # A tibble: 6 × 13
## community area shape_area perimeter area_num_1 area_numbe comarea_id comarea
## <chr> <chr> <chr> <chr> <int> <chr> <chr> <chr>
## 1 DOUGLAS 0 46004621.… 0 35 35 0 0
## 2 OAKLAND 0 16913961.… 0 36 36 0 0
## 3 FULLER PA… 0 19916704.… 0 37 37 0 0
## 4 GRAND BOU… 0 48492503.… 0 38 38 0 0
## 5 KENWOOD 0 29071741.… 0 39 39 0 0
## 6 LINCOLN S… 0 71352328.… 0 4 4 0 0
## # … with 5 more variables: shape_len <chr>, geometry <MULTIPOLYGON [m]>,
## # AGG.COUNT <int>, POP2010 <dbl>, vehpcap <dbl>
tm_shape(chicago.comm) +
tm_polygons("vehpcap")