Introduction

This is an example of reading spatial and non-spatial data using R. Furthermore, it illustrates how to summarize individual attribute values as well as aggregated attribute values. It also shows how to join atributes from an excel file to spatial objects stored in a shapefile.

Load objects of interest

As with any project, the starting point is to load the data we’ll be using. In this case we can download all the datasets from https://goo.gl/dcn9gu and put them in a given directory.

# load the packages we'll be using 
library(rgdal)
## Loading required package: sp
## rgdal: version: 1.1-10, (SVN revision 622)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 1.11.4, released 2016/01/25
##  Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.3/Resources/library/rgdal/gdal
##  Loaded PROJ.4 runtime: Rel. 4.9.1, 04 March 2015, [PJ_VERSION: 491]
##  Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.3/Resources/library/rgdal/proj
##  Linking to sp version: 1.2-3
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#library(ggmap)
library(RColorBrewer)


# set working directory:
setwd("/Users/laura/documents/unal/ame/datos/K24901/C2")
# get data
# make sure you have downloaded them
#
# list files:
list.files(path=".")
##  [1] "agricul_ILL_stats.dbf"                    
##  [2] "agricul_ILL_stats.prj"                    
##  [3] "agricul_ILL_stats.sbn"                    
##  [4] "agricul_ILL_stats.sbx"                    
##  [5] "agricul_ILL_stats.shp"                    
##  [6] "agricul_ILL_stats.shp.xml"                
##  [7] "agricul_ILL_stats.shx"                    
##  [8] "Agricultural_Exported_GISdataset.dbf"     
##  [9] "central.dbf"                              
## [10] "central.prj"                              
## [11] "central.sbn"                              
## [12] "central.sbx"                              
## [13] "central.shp"                              
## [14] "central.shx"                              
## [15] "east.dbf"                                 
## [16] "east.prj"                                 
## [17] "east.sbn"                                 
## [18] "east.sbx"                                 
## [19] "east.shp"                                 
## [20] "east.shx"                                 
## [21] "EastSoutheas.dbf"                         
## [22] "EastSoutheas.prj"                         
## [23] "EastSoutheas.sbn"                         
## [24] "EastSoutheas.sbx"                         
## [25] "EastSoutheas.shp"                         
## [26] "EastSoutheas.shx"                         
## [27] "Illinios_census_county.dbf"               
## [28] "Illinios_census_county.prj"               
## [29] "Illinios_census_county.sbn"               
## [30] "Illinios_census_county.sbx"               
## [31] "Illinios_census_county.shp"               
## [32] "Illinios_census_county.shp.xml"           
## [33] "Illinios_census_county.shx"               
## [34] "Lab2.mxd"                                 
## [35] "llinios_cnty_agricultural_statistics.xls" 
## [36] "llinios_cnty_agricultural_statistics.xlsx"
## [37] "Northeast.dbf"                            
## [38] "Northeast.prj"                            
## [39] "Northeast.sbn"                            
## [40] "Northeast.sbx"                            
## [41] "Northeast.shp"                            
## [42] "Northeast.shx"                            
## [43] "Northwest.dbf"                            
## [44] "Northwest.prj"                            
## [45] "Northwest.sbn"                            
## [46] "Northwest.sbx"                            
## [47] "Northwest.shp"                            
## [48] "Northwest.shx"                            
## [49] "randompoints2.dbf"                        
## [50] "randompoints2.prj"                        
## [51] "randompoints2.shp"                        
## [52] "randompoints2.shp.xml"                    
## [53] "randompoints2.shx"                        
## [54] "Southeast.dbf"                            
## [55] "Southeast.prj"                            
## [56] "Southeast.sbn"                            
## [57] "Southeast.sbx"                            
## [58] "Southeast.shp"                            
## [59] "Southeast.shx"                            
## [60] "Southwest.dbf"                            
## [61] "Southwest.prj"                            
## [62] "Southwest.sbn"                            
## [63] "Southwest.sbx"                            
## [64] "Southwest.shp"                            
## [65] "Southwest.shx"                            
## [66] "West.dbf"                                 
## [67] "West.prj"                                 
## [68] "West.sbn"                                 
## [69] "West.sbx"                                 
## [70] "West.shp"                                 
## [71] "West.shx"                                 
## [72] "WestSouthwest.dbf"                        
## [73] "WestSouthwest.prj"                        
## [74] "WestSouthwest.sbn"                        
## [75] "WestSouthwest.sbx"                        
## [76] "WestSouthwest.shp"                        
## [77] "WestSouthwest.shx"
# note misspelled names !!!
# select data:
# let's choose a shapefile (spatial data) and a excel file (non spatial)
# 
# read spatial data: 
x <- readOGR('Illinios_census_county.shp', layer='Illinios_census_county')
## OGR data source with driver: ESRI Shapefile 
## Source: "Illinios_census_county.shp", layer: "Illinios_census_county"
## with 102 features
## It has 50 fields
plot(x, main="Illinois Census County")

# 
#install.packages("XLConnect")
#install.packages("openxlsx")
#read 
library("openxlsx")
excel.file <- "llinios_cnty_agricultural_statistics.xlsx"
A <- read.xlsx(excel.file)
#head(A)
#summary(A)
#unique(A$GroupArea)

View data structure

Before you do anything else, it is important to understand the structure of your data and that of any objects derived from it.

class(A)          # "data.frame"
## [1] "data.frame"
sapply(A, class)  # show classes of all columns
##       CNTY_FIPS     COUNTY_NAME           Group       GroupArea 
##     "character"     "character"       "numeric"     "character" 
##  CornAllPurpose        CornAcre       CornYield  CornProduction 
##       "numeric"       "numeric"       "numeric"       "numeric" 
##   SoyAllPurpose         SoyAcre        SoyYield   SoyProduction 
##       "numeric"       "numeric"       "numeric"       "numeric" 
## WheatAllPurpose       WheatAcre      WheatYield WheatProduction 
##       "numeric"       "numeric"       "numeric"       "numeric"
typeof(A)         # "list"
## [1] "list"
names(A)          # show list components
##  [1] "CNTY_FIPS"       "COUNTY_NAME"     "Group"          
##  [4] "GroupArea"       "CornAllPurpose"  "CornAcre"       
##  [7] "CornYield"       "CornProduction"  "SoyAllPurpose"  
## [10] "SoyAcre"         "SoyYield"        "SoyProduction"  
## [13] "WheatAllPurpose" "WheatAcre"       "WheatYield"     
## [16] "WheatProduction"
dim(A)            # dimensions of object, if any
## [1] 102  16
head(A)           # extract first few (default 6) parts
##   CNTY_FIPS COUNTY_NAME Group     GroupArea CornAllPurpose CornAcre
## 1       001       Adams    30          West         142000   129900
## 2       003   Alexander    80     Southwest           8600     8000
## 3       005        Bond    60 WestSouthwest          55000    50900
## 4       007       Boone    20     Northeast          80000    79300
## 5       009       Brown    30          West          54000    53400
## 6       011      Bureau    10     Northwest         299000   297500
##   CornYield CornProduction SoyAllPurpose SoyAcre SoyYield SoyProduction
## 1       171       22212900        110000  101700       41       4169700
## 2       144        1152000         31000   29800       40       1192000
## 3       164        8347600         90000   89000       45       4005000
## 4       169       13401700         38000   37800       42       1587600
## 5       152        8116800         33000   32100       37       1187700
## 6       199       59202500        116000  115000       49       5635000
##   WheatAllPurpose WheatAcre WheatYield WheatProduction
## 1           22000     21600         62         1339200
## 2            8400      6200         47          291400
## 3           33400     32900         64         2105600
## 4               0         0          0               0
## 5               0         0          0               0
## 6            4400      4100         74          303400
tail(A, 1)        # extract last row
##     CNTY_FIPS COUNTY_NAME Group GroupArea CornAllPurpose CornAcre
## 102       203    Woodford    40   Central         163000   162800
##     CornYield CornProduction SoyAllPurpose SoyAcre SoyYield SoyProduction
## 102       206       33536800        101000  100600       54       5432400
##     WheatAllPurpose WheatAcre WheatYield WheatProduction
## 102               0         0          0               0
head(1:10, -1)    # extract everything except the last element
## [1] 1 2 3 4 5 6 7 8 9

Basic data summaries

Use a loop for generate and summarise attribute values:

for (i in 1:dim(A)[[2]])
{
a <- names(A)[[i]]
b <- A[[i]]
if (is.numeric(b) == TRUE)
  { 
  cat("Atributo No", i)
  print(a)
  print(summary(b))           # gives min, max, mean, median, 1st & 3rd quartiles
  print("")
  #min(b); max(b)       # }
  #range(b)             # } self-explanatory
  #mean(b); median(b)   # }
  #sd(b); mad(b)        # standard deviation, median absolute deviation
  }
}
## Atributo No 3[1] "Group"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   10.00   30.00   60.00   51.67   70.00   90.00 
## [1] ""
## Atributo No 5[1] "CornAllPurpose"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    3000   58000   98000  118700  165200  370000 
## [1] ""
## Atributo No 6[1] "CornAcre"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2950   53680   94450  116700  162600  368000 
## [1] ""
## Atributo No 7[1] "CornYield"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   116.0   155.0   170.0   170.2   186.5   207.0 
## [1] ""
## Atributo No 8[1] "CornProduction"
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##   331000  8145000 16880000 20890000 30970000 69920000 
## [1] ""
## Atributo No 9[1] "SoyAllPurpose"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2000   51000   91500   90220  116000  252000 
## [1] ""
## Atributo No 10[1] "SoyAcre"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1900   49620   90250   89220  115000  250500 
## [1] ""
## Atributo No 11[1] "SoyYield"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   27.00   42.00   46.00   45.73   49.00   56.00 
## [1] ""
## Atributo No 12[1] "SoyProduction"
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##    62700  2378000  3958000  4193000  5428000 12320000 
## [1] ""
## Atributo No 13[1] "WheatAllPurpose"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0    4550   10210   15320   75000 
## [1] ""
## Atributo No 14[1] "WheatAcre"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0    4400   10070   14620   74200 
## [1] ""
## Atributo No 15[1] "WheatYield"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00   60.00   44.15   65.00   81.00 
## [1] ""
## Atributo No 16[1] "WheatProduction"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0  298000  637500  900800 4749000 
## [1] ""

Summarize aggregated attribute values

In the excel file, the GroupArea attribute represent different spatial regions. This attribute value will be used for aggregating attribute values.

Note that CornYield, SoyYield, and WheatYield values can be aggregated by averaging row values:

aggregate(A[, c(7,11,15)], list(A$GroupArea), mean)
##         Group.1 CornYield SoyYield WheatYield
## 1       Central  193.8182 50.90909   31.54545
## 2          East  176.2857 47.85714   40.42857
## 3 EastSoutheast  161.4667 45.93333   54.73333
## 4     Northeast  172.1818 42.90909   37.63636
## 5     Northwest  182.5833 46.66667   45.66667
## 6     Southeast  145.7500 39.50000   47.50000
## 7     Southwest  152.3333 43.16667   54.08333
## 8          West  189.1111 48.77778   20.88889
## 9 WestSouthwest  170.0769 47.46154   52.53846

CornProduction, SoyProduction, and WheatProduction can be aggregated by summation of row values:

aggregate(A[, c(8,12, 16)], list(A$GroupArea), mean)
##         Group.1 CornProduction SoyProduction WheatProduction
## 1       Central       31090291       4928500        137681.8
## 2          East       42985314       8740214        355542.9
## 3 EastSoutheast       15928027       5059200        895120.0
## 4     Northeast       19466400       2735873        207554.5
## 5     Northwest       31217692       3342658        178825.0
## 6     Southeast        5950667       2807717        948183.3
## 7     Southwest        6019233       3189708       1601666.7
## 8          West       24961278       4316656        298444.4
## 9 WestSouthwest       22472862       4260969        760430.8

Take a moment to compare summaries with values obtained by Oyana & Margai (2016). Are these values similar to values reported in such textbook?

Now, think about how could you summarize acreage and test your procedure.

Explore spatial objects

Let’s summarize our spatial objects:

### summarize spatial objects
summary(x)
## Object of class SpatialPolygonsDataFrame
## Coordinates:
##         min       max
## x -91.51628 -87.50791
## y  36.98682  42.50936
## Is projected: FALSE 
## proj4string :
## [+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0]
## Data attributes:
##     ObjectID           NAME       STATE_NAME  STATE_FIPS   CNTY_FIPS 
##  Min.   : 545   Adams    : 1   Illinois:102   17:102     001    : 1  
##  1st Qu.: 890   Alexander: 1                             003    : 1  
##  Median :1139   Bond     : 1                             005    : 1  
##  Mean   :1349   Boone    : 1                             007    : 1  
##  3rd Qu.:1448   Brown    : 1                             009    : 1  
##  Max.   :3098   Bureau   : 1                             011    : 1  
##                 (Other)  :96                             (Other):96  
##       FIPS       POP2000          POP00_SQMI        POP2010       
##  17001  : 1   Min.   :   4413   Min.   :  11.8   Min.   :   4125  
##  17003  : 1   1st Qu.:  15151   1st Qu.:  31.8   1st Qu.:  14875  
##  17005  : 1   Median :  27176   Median :  48.6   Median :  28014  
##  17007  : 1   Mean   : 121758   Mean   : 152.3   Mean   : 128331  
##  17009  : 1   3rd Qu.:  54207   3rd Qu.:  89.6   3rd Qu.:  55939  
##  17011  : 1   Max.   :5376741   Max.   :3289.5   Max.   :5346234  
##  (Other):96                                                       
##    POP10_SQMI          WHITE             BLACK          
##  Min.   :  11.00   Min.   :   4117   Min.   :      2.0  
##  1st Qu.:  30.48   1st Qu.:  14590   1st Qu.:     50.0  
##  Median :  48.10   Median :  26222   Median :    491.5  
##  Mean   : 163.41   Mean   :  89465   Mean   :  18400.7  
##  3rd Qu.:  90.80   3rd Qu.:  49900   3rd Qu.:   2051.0  
##  Max.   :3270.90   Max.   :3025760   Max.   :1405361.0  
##                                                         
##     AMERI_ES            ASIAN             HAWN_PI           OTHER         
##  Min.   :    2.00   Min.   :     4.0   Min.   :   0.0   Min.   :     2.0  
##  1st Qu.:   29.25   1st Qu.:    33.5   1st Qu.:   2.0   1st Qu.:    33.0  
##  Median :   53.00   Median :    73.5   Median :   5.0   Median :   120.5  
##  Mean   :  303.98   Mean   :  4153.0   Mean   :  45.2   Mean   :  7085.4  
##  3rd Qu.:  109.00   3rd Qu.:   327.5   3rd Qu.:  15.5   3rd Qu.:   568.8  
##  Max.   :15496.00   Max.   :260170.0   Max.   :2561.0   Max.   :531170.0  
##                                                                           
##    MULT_RACE            HISPANIC             MALES        
##  Min.   :    11.00   Min.   :     10.0   Min.   :   2235  
##  1st Qu.:    95.25   1st Qu.:    108.5   1st Qu.:   7335  
##  Median :   179.50   Median :    348.0   Median :  13224  
##  Mean   :  2304.08   Mean   :  15002.6   Mean   :  59611  
##  3rd Qu.:   648.50   3rd Qu.:   1461.0   3rd Qu.:  26666  
##  Max.   :136223.00   Max.   :1071740.0   Max.   :2603532  
##                                                           
##     FEMALES          AGE_UNDER5          AGE_5_17         AGE_18_21       
##  Min.   :   2178   Min.   :   211.0   Min.   :    718   Min.   :   230.0  
##  1st Qu.:   7726   1st Qu.:   914.8   1st Qu.:   2750   1st Qu.:   717.2  
##  Median :  13952   Median :  1495.0   Median :   4631   Median :  1424.5  
##  Mean   :  62147   Mean   :  8593.6   Mean   :  23224   Mean   :  6931.3  
##  3rd Qu.:  27720   3rd Qu.:  3205.2   3rd Qu.:   9745   3rd Qu.:  3079.8  
##  Max.   :2773209   Max.   :388201.0   Max.   :1009618   Max.   :290208.0  
##                                                                           
##    AGE_22_29        AGE_30_39        AGE_40_49        AGE_50_64     
##  Min.   :   349   Min.   :   535   Min.   :   611   Min.   :   877  
##  1st Qu.:  1286   1st Qu.:  1988   1st Qu.:  2238   1st Qu.:  2401  
##  Median :  2356   Median :  3376   Median :  3790   Median :  4328  
##  Mean   : 13683   Mean   : 18792   Mean   : 18243   Mean   : 17584  
##  3rd Qu.:  5297   3rd Qu.:  7224   3rd Qu.:  8055   3rd Qu.:  8046  
##  Max.   :685132   Max.   :851391   Max.   :769856   Max.   :752070  
##                                                                     
##    AGE_65_UP         MED_AGE        MED_AGE_M       MED_AGE_F    
##  Min.   :   782   Min.   :27.50   Min.   :26.10   Min.   :29.20  
##  1st Qu.:  2613   1st Qu.:37.05   1st Qu.:35.52   1st Qu.:38.60  
##  Median :  4270   Median :38.10   Median :36.80   Median :39.80  
##  Mean   : 14706   Mean   :37.69   Mean   :36.27   Mean   :39.21  
##  3rd Qu.:  8262   3rd Qu.:39.58   3rd Qu.:37.90   3rd Qu.:41.17  
##  Max.   :630265   Max.   :42.10   Max.   :40.70   Max.   :43.90  
##                                                                  
##    HOUSEHOLDS        AVE_HH_SZ       HSEHLD_1_M       HSEHLD_1_F    
##  Min.   :   1769   Min.   :2.210   Min.   :   221   Min.   :   272  
##  1st Qu.:   5925   1st Qu.:2.400   1st Qu.:   605   1st Qu.:   919  
##  Median :  10634   Median :2.450   Median :  1080   Median :  1479  
##  Mean   :  45017   Mean   :2.481   Mean   :  5159   Mean   :  6898  
##  3rd Qu.:  20796   3rd Qu.:2.520   3rd Qu.:  2388   3rd Qu.:  3429  
##  Max.   :1974181   Max.   :2.970   Max.   :249752   Max.   :330249  
##                                                                     
##    MARHH_CHD        MARHH_NO_C       MHH_CHILD         FHH_CHILD     
##  Min.   :   372   Min.   :   651   Min.   :   29.0   Min.   :    54  
##  1st Qu.:  1396   1st Qu.:  2026   1st Qu.:  117.8   1st Qu.:   304  
##  Median :  2281   Median :  3418   Median :  199.5   Median :   505  
##  Mean   : 10918   Mean   : 12160   Mean   :  833.5   Mean   :  3098  
##  3rd Qu.:  4915   3rd Qu.:  6607   3rd Qu.:  445.8   3rd Qu.:  1251  
##  Max.   :415813   Max.   :452431   Max.   :35262.0   Max.   :158874  
##                                                                      
##     FAMILIES         AVE_FAM_SZ      HSE_UNITS           VACANT        
##  Min.   :   1220   Min.   :2.810   Min.   :   2351   Min.   :   200.0  
##  1st Qu.:   4245   1st Qu.:2.930   1st Qu.:   6468   1st Qu.:   576.8  
##  Median :   7162   Median :2.970   Median :  11938   Median :   926.0  
##  Mean   :  30446   Mean   :2.998   Mean   :  47898   Mean   :  2880.8  
##  3rd Qu.:  14274   3rd Qu.:3.030   3rd Qu.:  22504   3rd Qu.:  1718.0  
##  Max.   :1269592   Max.   :3.430   Max.   :2096121   Max.   :121940.0  
##                                                                        
##    OWNER_OCC         RENTER_OCC       NO_FARMS07       AVG_SIZE07   
##  Min.   :   1455   Min.   :   314   Min.   :  73.0   Min.   : 45.0  
##  1st Qu.:   4770   1st Qu.:  1340   1st Qu.: 497.8   1st Qu.:268.5  
##  Median :   7862   Median :  2148   Median : 707.5   Median :350.0  
##  Mean   :  30283   Mean   : 14734   Mean   : 753.5   Mean   :350.6  
##  3rd Qu.:  15642   3rd Qu.:  5786   3rd Qu.:1011.8   3rd Qu.:409.8  
##  Max.   :1142677   Max.   :831504   Max.   :1622.0   Max.   :885.0  
##                                                                     
##    CROP_ACR07       AVG_SALE07          SQMI       
##  Min.   :  6388   Min.   : 15.59   Min.   : 172.2  
##  1st Qu.:152719   1st Qu.:106.90   1st Qu.: 389.0  
##  Median :207478   Median :175.22   Median : 521.7  
##  Mean   :232428   Mean   :169.65   Mean   : 567.8  
##  3rd Qu.:287258   3rd Qu.:221.31   3rd Qu.: 706.4  
##  Max.   :647350   Max.   :388.64   Max.   :1634.5  
## 
nrow(x)
## [1] 102

The summary() funcion outputs some very useful information: the bounding box of the layer, its coordinate reference system (CRS) and even summaries of the attributes associated with each spatial object. The nrow() function tell us how many polygons are represented within the layer.

To gain a better understanding of the structure of the Illinois layer, we can use the str function (but only on the first polygon, to avoid an extrememly long output):

str(x[1,])
## Formal class 'SpatialPolygonsDataFrame' [package "sp"] with 5 slots
##   ..@ data       :'data.frame':  1 obs. of  50 variables:
##   .. ..$ ObjectID  : int 545
##   .. ..$ NAME      : Factor w/ 102 levels "Adams","Alexander",..: 43
##   .. ..$ STATE_NAME: Factor w/ 1 level "Illinois": 1
##   .. ..$ STATE_FIPS: Factor w/ 1 level "17": 1
##   .. ..$ CNTY_FIPS : Factor w/ 102 levels "001","003","005",..: 43
##   .. ..$ FIPS      : Factor w/ 102 levels "17001","17003",..: 43
##   .. ..$ POP2000   : int 22289
##   .. ..$ POP00_SQMI: num 36
##   .. ..$ POP2010   : int 22046
##   .. ..$ POP10_SQMI: num 35.6
##   .. ..$ WHITE     : int 21991
##   .. ..$ BLACK     : int 44
##   .. ..$ AMERI_ES  : int 23
##   .. ..$ ASIAN     : int 36
##   .. ..$ HAWN_PI   : int 1
##   .. ..$ OTHER     : int 75
##   .. ..$ MULT_RACE : int 119
##   .. ..$ HISPANIC  : int 342
##   .. ..$ MALES     : int 11175
##   .. ..$ FEMALES   : int 11114
##   .. ..$ AGE_UNDER5: int 1246
##   .. ..$ AGE_5_17  : int 3916
##   .. ..$ AGE_18_21 : int 900
##   .. ..$ AGE_22_29 : int 1724
##   .. ..$ AGE_30_39 : int 2811
##   .. ..$ AGE_40_49 : int 3378
##   .. ..$ AGE_50_64 : int 4316
##   .. ..$ AGE_65_UP : int 3998
##   .. ..$ MED_AGE   : num 41.6
##   .. ..$ MED_AGE_M : num 40.2
##   .. ..$ MED_AGE_F : num 43.1
##   .. ..$ HOUSEHOLDS: int 9218
##   .. ..$ AVE_HH_SZ : num 2.4
##   .. ..$ HSEHLD_1_M: int 1118
##   .. ..$ HSEHLD_1_F: int 1421
##   .. ..$ MARHH_CHD : int 2013
##   .. ..$ MARHH_NO_C: int 3355
##   .. ..$ MHH_CHILD : int 154
##   .. ..$ FHH_CHILD : int 354
##   .. ..$ FAMILIES  : int 6287
##   .. ..$ AVE_FAM_SZ: num 2.92
##   .. ..$ HSE_UNITS : int 12003
##   .. ..$ VACANT    : int 2785
##   .. ..$ OWNER_OCC : int 7129
##   .. ..$ RENTER_OCC: int 2089
##   .. ..$ NO_FARMS07: num 1016
##   .. ..$ AVG_SIZE07: num 277
##   .. ..$ CROP_ACR07: num 196027
##   .. ..$ AVG_SALE07: num 134
##   .. ..$ SQMI      : num 619
##   ..@ polygons   :List of 1
##   .. ..$ :Formal class 'Polygons' [package "sp"] with 5 slots
##   .. .. .. ..@ Polygons :List of 1
##   .. .. .. .. ..$ :Formal class 'Polygon' [package "sp"] with 5 slots
##   .. .. .. .. .. .. ..@ labpt  : num [1:2] -90.2 42.4
##   .. .. .. .. .. .. ..@ area   : num 0.174
##   .. .. .. .. .. .. ..@ hole   : logi FALSE
##   .. .. .. .. .. .. ..@ ringDir: int 1
##   .. .. .. .. .. .. ..@ coords : num [1:16, 1:2] -90.3 -90.4 -90.4 -90.4 -90.4 ...
##   .. .. .. ..@ plotOrder: int 1
##   .. .. .. ..@ labpt    : num [1:2] -90.2 42.4
##   .. .. .. ..@ ID       : chr "0"
##   .. .. .. ..@ area     : num 0.174
##   ..@ plotOrder  : int 1
##   ..@ bbox       : num [1:2, 1:2] -90.7 42.2 -89.9 42.5
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : chr [1:2] "x" "y"
##   .. .. ..$ : chr [1:2] "min" "max"
##   ..@ proj4string:Formal class 'CRS' [package "sp"] with 1 slot
##   .. .. ..@ projargs: chr "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"

This shows us that spatial objects are stored as SpatialPolygonsDataFrame. This structure is both complex as useful, as Robin Lovelace states, allowing R to store the full range of information needed to describe almost any polygon-based dataset. The @ symbol in the structure represents slots which are specific to the S4 object class and contain specific pieces of information within the spatial layer. The basic slots within such spatial object are:

@data, which contains the the attribute data for the zones \ @polygons, the geographic data associated with each polygon (this confusingly contains the @Polygons slot: each polygon feature can contain multiple Polygons, e.g. if an administrative zone is non-contiguous) \ @plotOrder is simply the order in which the polygons are plotted \ @bbox is a slot associated with all spatial objects, representing its spatial extent \ @proj4string the CRS associated with the object \

Critically for exploring the attributes of london is the data slot. We can look at and modify the attributes of the subdivisions of london easily using the @ notation:

head(x@data)
##   ObjectID        NAME STATE_NAME STATE_FIPS CNTY_FIPS  FIPS POP2000
## 0      545  Jo Daviess   Illinois         17       085 17085   22289
## 1      621 Rock Island   Illinois         17       161 17161  149374
## 2      634        Will   Illinois         17       197 17197  502266
## 3      636     Kendall   Illinois         17       093 17093   54544
## 4      653    La Salle   Illinois         17       099 17099  111509
## 5      666      Bureau   Illinois         17       011 17011   35503
##   POP00_SQMI POP2010 POP10_SQMI  WHITE BLACK AMERI_ES ASIAN HAWN_PI OTHER
## 0       36.0   22046       35.6  21991    44       23    36       1    75
## 1      330.8  147257      326.2 127742 11260      410  1524      45  5612
## 2      591.4  712697      839.2 411027 52509     1038 11125     162 18219
## 3      169.2  114126      354.0  50658   718      105   480      12  1842
## 4       97.1  113252       98.7 105896  1723      191   598      26  1908
## 5       40.7   34812       39.9  34365   116       61   182      10   455
##   MULT_RACE HISPANIC  MALES FEMALES AGE_UNDER5 AGE_5_17 AGE_18_21
## 0       119      342  11175   11114       1246     3916       900
## 1      2781    12791  72545   76829       9486    26038      9357
## 2      8186    43768 250832  251434      42028   108683     24449
## 3       729     4086  27092   27452       4362    11721      2501
## 4      1167     5791  55167   56342       7033    21019      5468
## 5       314     1732  17244   18259       2094     6691      1626
##   AGE_22_29 AGE_30_39 AGE_40_49 AGE_50_64 AGE_65_UP MED_AGE MED_AGE_M
## 0      1724      2811      3378      4316      3998    41.6      40.2
## 1     14702     19963     22802     24462     22564    37.8      36.4
## 2     49264     88597     80019     67616     41610    33.3      32.5
## 3      5120      9432      8745      8028      4635    34.1      33.4
## 4      9793     16000     16942     16962     18292    38.1      36.8
## 5      2850      4731      5343      5869      6299    39.6      37.9
##   MED_AGE_F HOUSEHOLDS AVE_HH_SZ HSEHLD_1_M HSEHLD_1_F MARHH_CHD
## 0      43.1       9218      2.40       1118       1421      2013
## 1      39.2      60712      2.38       7629      10699     11786
## 2      34.1     167542      2.94      13686      16198     59174
## 3      34.8      18798      2.89       1346       1731      6674
## 4      39.4      43417      2.49       4994       6914     10438
## 5      41.1      14182      2.47       1512       2314      3379
##   MARHH_NO_C MHH_CHILD FHH_CHILD FAMILIES AVE_FAM_SZ HSE_UNITS VACANT
## 0       3355       154       354     6287       2.92     12003   2785
## 1      18040      1249      4540    39162       2.97     64489   3777
## 2      49456      3067      9243   130972       3.36    175524   7982
## 3       6264       320       836    14969       3.27     19519    721
## 4      13728       922      2398    29840       3.04     46438   3021
## 5       4863       302       678     9890       2.99     15331   1149
##   OWNER_OCC RENTER_OCC NO_FARMS07 AVG_SIZE07 CROP_ACR07 AVG_SALE07   SQMI
## 0      7129       2089       1016        277     196027     134.50  618.7
## 1     42303      18409        700        255     148749     136.15  451.5
## 2    139311      28231        877        252     208874     145.49  849.3
## 3     15810       2988        424        394     160527     244.16  322.4
## 4     32584      10833       1622        397     614381     202.83 1148.0
## 5     10775       3407       1189        402     439887     255.14  873.3

Plot spatial objects

Thus we can treat the S4 spatial data classes as if they were regular data frames in some contexts, which is extremely useful for concise code. To plot the yield of corn crop on each county, the following code works:

#install.packages("RColorBrewer")
library(RColorBrewer)
cols <- brewer.pal(n = 4, name = "Greys")
lcols <- cut(x$POP2010,
  breaks = quantile(x$POP2010),
  labels = cols)
plot(x, col = as.character(lcols))

Joining attribute values

Now, how about joining additional variables to the spatial object? To join information to the existing variables, the join functions from dplyr can be a good choice. The following code loads a non-geographical dataset and joins an additional variable to x@data:

x@data <- left_join(x@data, A)
## Joining, by = "CNTY_FIPS"
## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## character vector and factor, coercing into character vector
head(x@data) # the new data has been added
##   ObjectID        NAME STATE_NAME STATE_FIPS CNTY_FIPS  FIPS POP2000
## 1      545  Jo Daviess   Illinois         17       085 17085   22289
## 2      621 Rock Island   Illinois         17       161 17161  149374
## 3      634        Will   Illinois         17       197 17197  502266
## 4      636     Kendall   Illinois         17       093 17093   54544
## 5      653    La Salle   Illinois         17       099 17099  111509
## 6      666      Bureau   Illinois         17       011 17011   35503
##   POP00_SQMI POP2010 POP10_SQMI  WHITE BLACK AMERI_ES ASIAN HAWN_PI OTHER
## 1       36.0   22046       35.6  21991    44       23    36       1    75
## 2      330.8  147257      326.2 127742 11260      410  1524      45  5612
## 3      591.4  712697      839.2 411027 52509     1038 11125     162 18219
## 4      169.2  114126      354.0  50658   718      105   480      12  1842
## 5       97.1  113252       98.7 105896  1723      191   598      26  1908
## 6       40.7   34812       39.9  34365   116       61   182      10   455
##   MULT_RACE HISPANIC  MALES FEMALES AGE_UNDER5 AGE_5_17 AGE_18_21
## 1       119      342  11175   11114       1246     3916       900
## 2      2781    12791  72545   76829       9486    26038      9357
## 3      8186    43768 250832  251434      42028   108683     24449
## 4       729     4086  27092   27452       4362    11721      2501
## 5      1167     5791  55167   56342       7033    21019      5468
## 6       314     1732  17244   18259       2094     6691      1626
##   AGE_22_29 AGE_30_39 AGE_40_49 AGE_50_64 AGE_65_UP MED_AGE MED_AGE_M
## 1      1724      2811      3378      4316      3998    41.6      40.2
## 2     14702     19963     22802     24462     22564    37.8      36.4
## 3     49264     88597     80019     67616     41610    33.3      32.5
## 4      5120      9432      8745      8028      4635    34.1      33.4
## 5      9793     16000     16942     16962     18292    38.1      36.8
## 6      2850      4731      5343      5869      6299    39.6      37.9
##   MED_AGE_F HOUSEHOLDS AVE_HH_SZ HSEHLD_1_M HSEHLD_1_F MARHH_CHD
## 1      43.1       9218      2.40       1118       1421      2013
## 2      39.2      60712      2.38       7629      10699     11786
## 3      34.1     167542      2.94      13686      16198     59174
## 4      34.8      18798      2.89       1346       1731      6674
## 5      39.4      43417      2.49       4994       6914     10438
## 6      41.1      14182      2.47       1512       2314      3379
##   MARHH_NO_C MHH_CHILD FHH_CHILD FAMILIES AVE_FAM_SZ HSE_UNITS VACANT
## 1       3355       154       354     6287       2.92     12003   2785
## 2      18040      1249      4540    39162       2.97     64489   3777
## 3      49456      3067      9243   130972       3.36    175524   7982
## 4       6264       320       836    14969       3.27     19519    721
## 5      13728       922      2398    29840       3.04     46438   3021
## 6       4863       302       678     9890       2.99     15331   1149
##   OWNER_OCC RENTER_OCC NO_FARMS07 AVG_SIZE07 CROP_ACR07 AVG_SALE07   SQMI
## 1      7129       2089       1016        277     196027     134.50  618.7
## 2     42303      18409        700        255     148749     136.15  451.5
## 3    139311      28231        877        252     208874     145.49  849.3
## 4     15810       2988        424        394     160527     244.16  322.4
## 5     32584      10833       1622        397     614381     202.83 1148.0
## 6     10775       3407       1189        402     439887     255.14  873.3
##   COUNTY_NAME Group GroupArea CornAllPurpose CornAcre CornYield
## 1  Jo Daviess    10 Northwest          97000    92800       163
## 2 Rock Island    10 Northwest          67000    66400       183
## 3        Will    20 Northeast          99000    98300       177
## 4     Kendall    20 Northeast          90000    89500       176
## 5     LaSalle    20 Northeast         336000   334000       192
## 6      Bureau    10 Northwest         299000   297500       199
##   CornProduction SoyAllPurpose SoyAcre SoyYield SoyProduction
## 1       15126400         37000   36500       43       1569500
## 2       12151200         43000   42800       48       2054400
## 3       17399100         97000   96300       46       4429800
## 4       15752000         50000   49700       47       2335900
## 5       64128000          8000    7900       34        268600
## 6       59202500        116000  115000       49       5635000
##   WheatAllPurpose WheatAcre WheatYield WheatProduction
## 1               0         0          0               0
## 2            1300      1100         59           64900
## 3           13200     12600         69          869400
## 4               0         0          0               0
## 5            1600      1500         63           94500
## 6            4400      4100         74          303400

Plotting nice maps

In order to plot the average corn yield across Illinois we can use ggplot2:

#install.packages("ggplot2")
#install.packages("maptools")
#install.packages("rgeos")
#install.packages("gpclib")
library(maptools)
## Checking rgeos availability: TRUE
library(ggplot2)
library(gpclib)
## General Polygon Clipper Library for R (version 1.5-5)
##  Type 'class ? gpc.poly' for help
gpclibPermit() 
## Warning in gpclibPermit(): support for gpclib will be withdrawn from
## maptools at the next major release
## [1] TRUE
lf <- fortify(x, region = "CNTY_FIPS")
lf <- rename(lf, CNTY_FIPS = id)
lf <- left_join(lf, x@data)
## Joining, by = "CNTY_FIPS"
ggplot(lf) + geom_polygon(aes(long, lat, group = group, fill = CornYield))

Do not forget to replicate this exercise in your own computer.

REFERENCES

Lovelace, R., 2014. Basic mapping and attribute joins in R. Available at http://robinlovelace.net

Oyana T.J. & Margai, F.M., 2016. Spatial Analysis – Statistics, Visualization, and Computational Methods. CRC Press. 294 pp.