Lecture: Point Pattern Analysis

Key Concepts covered in the lecture include:
1. P.P. Terminology (point, event, window)
2. Poisson Point Process
3. Purpose of Point Pattern Analysis
4. Types of distribution (random, uniform, clustered)
5. Kernel estimation and types of kernels
6. Quadrant-based PPA

The in-class exercise explores the WikiLeaks Iraq Data and performs a point pattern analysis.

Part 1: Organize the data

# Install all required libraries
library(spatstat)

## Loading required package: mgcv

## This is mgcv 1.7-22. For overview type 'help("mgcv-package")'.

## Loading required package: deldir

## deldir 0.0-21

## spatstat 1.30-0 Type 'help(spatstat)' for an overview of spatstat
## 'latest.news()' for news on latest version 'licence.polygons()' for
## licence information on polygon calculations

library(sp)
library(rgdal)

## rgdal: version: 0.8.1, (SVN revision 415) Geospatial Data Abstraction
## Library extensions to R successfully loaded Loaded GDAL runtime: GDAL
## 1.9.2, released 2012/10/08 Path to GDAL shared files: C:/Program
## Files/R/R-2.15.2/library/rgdal/gdal GDAL does not use iconv for recoding
## strings. Loaded PROJ.4 runtime: Rel. 4.7.1, 23 September 2009,
## [PJ_VERSION: 470] Path to PROJ.4 shared files: C:/Program
## Files/R/R-2.15.2/library/rgdal/proj

library(maptools)

## Loading required package: foreign

## Loading required package: grid

## Loading required package: lattice

## Checking rgeos availability: TRUE

library(ggmap)

## Loading required package: ggplot2


deaths <- read.csv("E:/Quant/inClassExercises/InClassExerciseData/Deaths only.csv")  #Read in the dataset

# Organize the data.  Use sub().  This replaces one text string for
# another.
deaths$Type <- sub(deaths$Type, pattern = "CRIMINAL EVENT", replacement = "Criminal Event")
deaths$Type <- sub(deaths$Type, pattern = "criminal event", replacement = "Criminal Event")
deaths$Type <- sub(deaths$Type, pattern = "EXPLOSIVE HAZARD", replacement = "Explosive Hazard")

# Make sure the old fields are removed:
deaths$Type <- factor(deaths$Type)
levels(deaths$Type)

## [1] "Criminal Event"      "Enemy Action"        "Explosive Hazard"   
## [4] "Friendly Action"     "Friendly Fire"       "Non-Combat Event"   
## [7] "Other"               "Suspicious Incident" "Threat Report"


deaths.marks.simple <- deaths[, 4]
deaths.marks.full <- deaths[, c(4, 7, 9:18)]

Part 2: Create a Map

deaths.spp <- SpatialPointsDataFrame(coords = deaths[, 19:20], proj4string = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"), 
    data = deaths.marks.full)  #Creates a spatial points dataframe.  There NEED to be the spaces between the projection string, else it fails.

# Use GGMAP to create a map
map <- get_map(location = "Iraq", maptype = "hybrid", zoom = 6)  #defines the area of interest and type of map.
ggmap(map, extent = "panel")  #This plots the google map.

plot of chunk unnamed-chunk-2


# Put our data on the map
ggmap(map, extent = "panel") + geom_point(data = deaths, aes(x = deaths$Longitude, 
    y = deaths$Latitude), alpha = 0.1, color = "red") + ggtitle("WikiLeaks Iraq Data \n Incidents involving Death(s)")  #plots points on top of the google map panel.

plot of chunk unnamed-chunk-2


# Add class breaks to the symbology: (COOL!)
ggmap(map, extent = "panel") + geom_point(data = deaths, aes(x = deaths$Longitude, 
    y = deaths$Latitude, size = Total.deaths), alpha = 0.3, color = "red") + 
    ggtitle("WikiLeaks Iraq Data \n Incidents involving Death(s)")

plot of chunk unnamed-chunk-2


# This is a lot of points... map only a subset of the data...
ggmap(map, extent = "panel") + geom_point(data = deaths[deaths$Enemy.detained > 
    0, ], aes(x = Longitude, y = Latitude, size = Enemy.detained), alpha = 0.5, 
    color = "green") + ggtitle("WikiLeaks Iraq Data \n Incidents involving Dententions(s)")

plot of chunk unnamed-chunk-2

Part 3: Exploratory Spatial Analysis

deaths.spp.rp <- spTransform(deaths.spp, CRS("+init=epsg:3839"))  #Somehow this projects data into meters...

# attach these values to the CSV...
deaths.spp.rp$Easting <- coordinates(deaths.spp.rp)[, 1]
deaths.spp.rp$Northing <- coordinates(deaths.spp.rp)[, 2]

# looking at the entire country can be difficult... Let's refocus our
# study area to Baghdad...
deaths.df.rp.bag <- deaths.spp.rp@data[deaths.spp.rp@data$Region == "MND-BAGHDAD", 
    ]  #Note, this is not a spatial data frame.

plot(x = deaths.df.rp.bag$Easting, y = deaths.df.rp.bag$Northing)  #plot the points.  This can be very messy because distortion can really easily creep into this.

plot of chunk unnamed-chunk-3


# Remove the outliers in the north...
deaths.df.rp.bag <- deaths.df.rp.bag[deaths.df.rp.bag$Northing < 4960000 & deaths.df.rp.bag$Easting > 
    9960000, ]  #Select all the data within a given bounds...

plot(x = deaths.df.rp.bag$Easting, y = deaths.df.rp.bag$Northing)

plot of chunk unnamed-chunk-3


# Select a subset of this yet...
deaths.df.rp.bag <- deaths.df.rp.bag[(deaths.df.rp.bag$Northing < 4950000 & 
    deaths.df.rp.bag$Northing > 4940000) & (deaths.df.rp.bag$Easting > 1e+07 & 
    deaths.df.rp.bag$Easting < 10010000), ]
plot(x = deaths.df.rp.bag$Easting, y = deaths.df.rp.bag$Northing)

plot of chunk unnamed-chunk-3

Some useful info:

About spatstat

Today we will use three types of spatstat() objects:

ppp: is a set of points located in space. Points can have attributes called “marks” or the ppp object can be “unmarked” in which case the object stores only locations. Marks are used to differentiate types of points or to store a variable that describes some continuous variable for each point. For example, if the points were trees the marks might record “species” and “dbh”.

owin: is a “window” which defines the spatial extent of the ppp object. All ppp objects must have an owin. im: a pixel image, like a raster layer in a GIS. Often derived from the ppp object.

deaths.chull <- convexhull.xy(x = deaths.df.rp.bag$Easting, y = deaths.df.rp.bag$Northing)  #Define the Convex hull of the area of interest...

# Note: this section is set not to run.  It will crash because it is
# dependent upon human interactions. we are able to manually define a
# search window...
plot(x = deaths.df.rp.bag$Easting, y = deaths.df.rp.bag$Northing)
bdry <- locator()  #This is the manual selection process
deaths.win <- owin(poly = bdry)

# Create a ppp object
deaths.ppp <- ppp(x = deaths.df.rp.bag$Easting, y = deaths.df.rp.bag$Northing, 
    window = deaths.chull, marks = deaths.df.rp.bag[, c(1, 3:12)])  #Safe to ignore the error...

## Warning: data contain duplicated points


unitname(deaths.ppp) <- c("meter", "meters")  ##Assign names to units #What does this mean??
# id.ppp <- id.ppp[id.win]#Or, we can use the one that was manually
# generated!

###################################### Plot the ppp object:
p1 <- plot.ppp(deaths.ppp)  #Using the convex hull
legend(max(coords(deaths.ppp))[1] + 1000, mean(coords(deaths.ppp))[2], pch = p1, 
    legend = names(p1), cex = 0.5)  #Oy... that a lot of detail in the legend...

## Warning: mean(<data.frame>) is deprecated.  Use colMeans() or sapply(*,
## mean) instead.

plot of chunk unnamed-chunk-6


# That plot was very difficult to interpret... create a density map using
# a kernel:
plot(density(deaths.ppp, sigma = 1000))  #using a gaussian kernel

plot of chunk unnamed-chunk-6

# Or make a contour map:
contour(density(deaths.ppp, 1000), axes = F)  #The saddle is much more visible now.

plot of chunk unnamed-chunk-6

## NULL


###################################### Next, we can look at 3D plots...

layout(matrix(c(1, 1, 1, 2, 3, 4), 2, 3, byrow = TRUE))
persp(density(deaths.ppp, 1000))

##            [,1]       [,2]       [,3]       [,4]
## [1,]  2.016e-04  0.000e+00  0.000e+00  0.000e+00
## [2,]  0.000e+00  5.219e-05 -1.948e-04  1.948e-04
## [3,]  0.000e+00  1.866e+04  5.000e+03 -5.000e+03
## [4,] -2.018e+03 -2.588e+02  9.602e+02 -9.592e+02

# Adjust the angle the data is viewed at:
persp(density(deaths.ppp, 1000), theta = 150, phi = 15)

##            [,1]       [,2]       [,3]       [,4]
## [1,] -1.746e-04 -2.610e-05  9.739e-05 -9.739e-05
## [2,]  1.008e-04 -4.520e-05  1.687e-04 -1.687e-04
## [3,] -5.915e-13  1.866e+04  5.000e+03 -5.000e+03
## [4,]  1.249e+03  4.839e+02 -1.811e+03  1.812e+03

persp(density(deaths.ppp, 1000), theta = 120, phi = 15)

##            [,1]       [,2]       [,3]       [,4]
## [1,] -1.008e-04 -4.520e-05  1.687e-04 -1.687e-04
## [2,]  1.746e-04 -2.610e-05  9.739e-05 -9.739e-05
## [3,] -1.024e-12  1.866e+04  5.000e+03 -5.000e+03
## [4,]  1.452e+02  5.805e+02 -2.172e+03  2.173e+03

persp(density(deaths.ppp, 1000), theta = 100, phi = 15)

plot of chunk unnamed-chunk-6

##            [,1]       [,2]       [,3]       [,4]
## [1,] -3.502e-05 -5.140e-05  1.918e-04 -1.918e-04
## [2,]  1.986e-04 -9.063e-06  3.382e-05 -3.382e-05
## [3,] -1.165e-12  1.866e+04  5.000e+03 -5.000e+03
## [4,] -6.317e+02  5.583e+02 -2.089e+03  2.090e+03


###################################### Perhaps something useful would be
###################################### to organize that original legend...
###################################### Plot data one event type at a
###################################### time...

plot(split(deaths.ppp))  #Dot density maps

plot of chunk unnamed-chunk-6

plot(density(split(deaths.ppp)))  #Kernel maps

plot of chunk unnamed-chunk-6

contour(density(split(deaths.ppp)), axes = F)  #contour maps

plot of chunk unnamed-chunk-6


plot(deaths.ppp, which.marks = "Total.deaths", maxsize = 3000, main = "Total Deaths")  #proportional circles

plot of chunk unnamed-chunk-6

##      0     50    100    150    200 
##    0.0  828.7 1657.5 2486.2 3314.9

Phew, that was a lot of data visualizaiton…. Part 4: Exploratory Point Pattern Analysis

# Clean up after visualization so we can do some analysis:
deaths.ppp.simp <- unmark(deaths.ppp)  #unmark removes the points.
deathsList <- marks(deaths.ppp)  #creates a list
deaths.ppp.simp.death <- deaths.ppp.simp %mark% deathsList  #Whoa... what does the %'s do?

# Create a kernel density map of deaths: deaths.death.im <-
# density(deaths.ppp.simp.death, sigma = 1000, weights =
# marks(deaths.ppp.simp.death), kernel = 'rectangular', eds = 100)#creates
# a kernel map with a resolution of 100 meters (map units) #THIS ISN'T
# WORKING AND GIVING AN ERROR ABOUT THE WEIGHTS NOT BEING A CONSISTENT
# LENGTH.
deaths.death.im <- density(deaths.ppp.simp.death, sigma = 1000, kernel = "rectangular", 
    eds = 100)
plot(deaths.death.im, main = "Weighted Density Map of \nWikiLeaks Deaths in Baghdad")

plot of chunk unnamed-chunk-7


# Change the color scheme... We can use colors() to see the all the colors
# available in R. The results are somewhat overwhelming...
grRd <- colorRampPalette(c("grey60", "red"))
co <- colourmap(grRd(5), range = c(1e-06, 7e-05))  #Define legend stretch for colors
plot(deaths.death.im, main = "Weighted Density Map of \nWikiLeaks Deaths in Baghdad", 
    col = co)

plot of chunk unnamed-chunk-7


# Classify the data:
death.map.breaks <- quantile(deaths.death.im, probs = (0:3)/3)  #classify into 3 classes using quantile
death.map.cuts <- cut(deaths.death.im, breaks = death.map.breaks, labels = c("Low DD", 
    "Medium DD", "High DD"))
death.map.quartile <- tess(image = death.map.cuts)  #tess is used to group points
plot(death.map.quartile)

plot of chunk unnamed-chunk-7


# Add labels to describe the number of cells in each category...
qCount.im <- quadratcount(X = deaths.ppp, tess = death.map.quartile)

## Warning: Tessellation does not contain all the points of X

plot(qCount.im)

plot of chunk unnamed-chunk-7


# Overlay a coarser grid over the data:
qCount <- quadratcount(X = deaths.ppp, nx = 4, ny = 4)
plot(qCount)  #The legend doesn't make sense any more...

plot of chunk unnamed-chunk-7



plot(rhohat(unmark(deaths.ppp), deaths.death.im))  #plot death density to point density...  Not sure what this would be used for.

plot of chunk unnamed-chunk-7

#'This plot basically shows you the density of points as a function of the values on the weighted death density map.'

############################ Split up the data again.

deaths.ppp.types <- split(deaths.ppp, which.marks = "Type")
deaths.ppp.types <- unmark(deaths.ppp.types)

# Look at criminal events:
deaths.ppp.types$"Criminal Event"  #nothing happens?

##  planar point pattern: 1816 points 
## window: polygonal boundary
## enclosing rectangle: [10000045, 10009992] x [4940001, 4949997] meters


nnc <- nnclean(X = deaths.ppp.types$"Criminal Event", k = 20)

## Iteration 1  logLik = -361092.732623139  p = 0.6355 
## Iteration 2  logLik = -359146.366272647  p = 0.5596 
## Iteration 3  logLik = -358203.24296497   p = 0.4912 
## Iteration 4  logLik = -358005.479109628  p = 0.4498 
## Estimated parameters:
## p [cluster] = 0.44985 
## lambda [cluster] = 7.2878e-05 
## lambda [noise]   = 1.25e-05

plot(split(nnc))

plot of chunk unnamed-chunk-7

Oy, I have no idea how this is going to be condensed into my R Journal…