Key Concepts covered in the lecture include:
1. P.P. Terminology (point, event, window)
2. Poisson Point Process
3. Purpose of Point Pattern Analysis
4. Types of distribution (random, uniform, clustered)
5. Kernel estimation and types of kernels
6. Quadrant-based PPA
The in-class exercise explores the WikiLeaks Iraq Data and performs a point pattern analysis.
Part 1: Organize the data
# Install all required libraries
library(spatstat)
## Loading required package: mgcv
## This is mgcv 1.7-22. For overview type 'help("mgcv-package")'.
## Loading required package: deldir
## deldir 0.0-21
## spatstat 1.30-0 Type 'help(spatstat)' for an overview of spatstat
## 'latest.news()' for news on latest version 'licence.polygons()' for
## licence information on polygon calculations
library(sp)
library(rgdal)
## rgdal: version: 0.8.1, (SVN revision 415) Geospatial Data Abstraction
## Library extensions to R successfully loaded Loaded GDAL runtime: GDAL
## 1.9.2, released 2012/10/08 Path to GDAL shared files: C:/Program
## Files/R/R-2.15.2/library/rgdal/gdal GDAL does not use iconv for recoding
## strings. Loaded PROJ.4 runtime: Rel. 4.7.1, 23 September 2009,
## [PJ_VERSION: 470] Path to PROJ.4 shared files: C:/Program
## Files/R/R-2.15.2/library/rgdal/proj
library(maptools)
## Loading required package: foreign
## Loading required package: grid
## Loading required package: lattice
## Checking rgeos availability: TRUE
library(ggmap)
## Loading required package: ggplot2
deaths <- read.csv("E:/Quant/inClassExercises/InClassExerciseData/Deaths only.csv") #Read in the dataset
# Organize the data. Use sub(). This replaces one text string for
# another.
deaths$Type <- sub(deaths$Type, pattern = "CRIMINAL EVENT", replacement = "Criminal Event")
deaths$Type <- sub(deaths$Type, pattern = "criminal event", replacement = "Criminal Event")
deaths$Type <- sub(deaths$Type, pattern = "EXPLOSIVE HAZARD", replacement = "Explosive Hazard")
# Make sure the old fields are removed:
deaths$Type <- factor(deaths$Type)
levels(deaths$Type)
## [1] "Criminal Event" "Enemy Action" "Explosive Hazard"
## [4] "Friendly Action" "Friendly Fire" "Non-Combat Event"
## [7] "Other" "Suspicious Incident" "Threat Report"
deaths.marks.simple <- deaths[, 4]
deaths.marks.full <- deaths[, c(4, 7, 9:18)]
Part 2: Create a Map
deaths.spp <- SpatialPointsDataFrame(coords = deaths[, 19:20], proj4string = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"),
data = deaths.marks.full) #Creates a spatial points dataframe. There NEED to be the spaces between the projection string, else it fails.
# Use GGMAP to create a map
map <- get_map(location = "Iraq", maptype = "hybrid", zoom = 6) #defines the area of interest and type of map.
ggmap(map, extent = "panel") #This plots the google map.
# Put our data on the map
ggmap(map, extent = "panel") + geom_point(data = deaths, aes(x = deaths$Longitude,
y = deaths$Latitude), alpha = 0.1, color = "red") + ggtitle("WikiLeaks Iraq Data \n Incidents involving Death(s)") #plots points on top of the google map panel.
# Add class breaks to the symbology: (COOL!)
ggmap(map, extent = "panel") + geom_point(data = deaths, aes(x = deaths$Longitude,
y = deaths$Latitude, size = Total.deaths), alpha = 0.3, color = "red") +
ggtitle("WikiLeaks Iraq Data \n Incidents involving Death(s)")
# This is a lot of points... map only a subset of the data...
ggmap(map, extent = "panel") + geom_point(data = deaths[deaths$Enemy.detained >
0, ], aes(x = Longitude, y = Latitude, size = Enemy.detained), alpha = 0.5,
color = "green") + ggtitle("WikiLeaks Iraq Data \n Incidents involving Dententions(s)")
Part 3: Exploratory Spatial Analysis
deaths.spp.rp <- spTransform(deaths.spp, CRS("+init=epsg:3839")) #Somehow this projects data into meters...
# attach these values to the CSV...
deaths.spp.rp$Easting <- coordinates(deaths.spp.rp)[, 1]
deaths.spp.rp$Northing <- coordinates(deaths.spp.rp)[, 2]
# looking at the entire country can be difficult... Let's refocus our
# study area to Baghdad...
deaths.df.rp.bag <- deaths.spp.rp@data[deaths.spp.rp@data$Region == "MND-BAGHDAD",
] #Note, this is not a spatial data frame.
plot(x = deaths.df.rp.bag$Easting, y = deaths.df.rp.bag$Northing) #plot the points. This can be very messy because distortion can really easily creep into this.
# Remove the outliers in the north...
deaths.df.rp.bag <- deaths.df.rp.bag[deaths.df.rp.bag$Northing < 4960000 & deaths.df.rp.bag$Easting >
9960000, ] #Select all the data within a given bounds...
plot(x = deaths.df.rp.bag$Easting, y = deaths.df.rp.bag$Northing)
# Select a subset of this yet...
deaths.df.rp.bag <- deaths.df.rp.bag[(deaths.df.rp.bag$Northing < 4950000 &
deaths.df.rp.bag$Northing > 4940000) & (deaths.df.rp.bag$Easting > 1e+07 &
deaths.df.rp.bag$Easting < 10010000), ]
plot(x = deaths.df.rp.bag$Easting, y = deaths.df.rp.bag$Northing)
Some useful info:
Today we will use three types of spatstat() objects:
ppp: is a set of points located in space. Points can have attributes called “marks” or the ppp object can be “unmarked” in which case the object stores only locations. Marks are used to differentiate types of points or to store a variable that describes some continuous variable for each point. For example, if the points were trees the marks might record “species” and “dbh”.
owin: is a “window” which defines the spatial extent of the ppp object. All ppp objects must have an owin. im: a pixel image, like a raster layer in a GIS. Often derived from the ppp object.
deaths.chull <- convexhull.xy(x = deaths.df.rp.bag$Easting, y = deaths.df.rp.bag$Northing) #Define the Convex hull of the area of interest...
# Note: this section is set not to run. It will crash because it is
# dependent upon human interactions. we are able to manually define a
# search window...
plot(x = deaths.df.rp.bag$Easting, y = deaths.df.rp.bag$Northing)
bdry <- locator() #This is the manual selection process
deaths.win <- owin(poly = bdry)
# Create a ppp object
deaths.ppp <- ppp(x = deaths.df.rp.bag$Easting, y = deaths.df.rp.bag$Northing,
window = deaths.chull, marks = deaths.df.rp.bag[, c(1, 3:12)]) #Safe to ignore the error...
## Warning: data contain duplicated points
unitname(deaths.ppp) <- c("meter", "meters") ##Assign names to units #What does this mean??
# id.ppp <- id.ppp[id.win]#Or, we can use the one that was manually
# generated!
###################################### Plot the ppp object:
p1 <- plot.ppp(deaths.ppp) #Using the convex hull
legend(max(coords(deaths.ppp))[1] + 1000, mean(coords(deaths.ppp))[2], pch = p1,
legend = names(p1), cex = 0.5) #Oy... that a lot of detail in the legend...
## Warning: mean(<data.frame>) is deprecated. Use colMeans() or sapply(*,
## mean) instead.
# That plot was very difficult to interpret... create a density map using
# a kernel:
plot(density(deaths.ppp, sigma = 1000)) #using a gaussian kernel
# Or make a contour map:
contour(density(deaths.ppp, 1000), axes = F) #The saddle is much more visible now.
## NULL
###################################### Next, we can look at 3D plots...
layout(matrix(c(1, 1, 1, 2, 3, 4), 2, 3, byrow = TRUE))
persp(density(deaths.ppp, 1000))
## [,1] [,2] [,3] [,4]
## [1,] 2.016e-04 0.000e+00 0.000e+00 0.000e+00
## [2,] 0.000e+00 5.219e-05 -1.948e-04 1.948e-04
## [3,] 0.000e+00 1.866e+04 5.000e+03 -5.000e+03
## [4,] -2.018e+03 -2.588e+02 9.602e+02 -9.592e+02
# Adjust the angle the data is viewed at:
persp(density(deaths.ppp, 1000), theta = 150, phi = 15)
## [,1] [,2] [,3] [,4]
## [1,] -1.746e-04 -2.610e-05 9.739e-05 -9.739e-05
## [2,] 1.008e-04 -4.520e-05 1.687e-04 -1.687e-04
## [3,] -5.915e-13 1.866e+04 5.000e+03 -5.000e+03
## [4,] 1.249e+03 4.839e+02 -1.811e+03 1.812e+03
persp(density(deaths.ppp, 1000), theta = 120, phi = 15)
## [,1] [,2] [,3] [,4]
## [1,] -1.008e-04 -4.520e-05 1.687e-04 -1.687e-04
## [2,] 1.746e-04 -2.610e-05 9.739e-05 -9.739e-05
## [3,] -1.024e-12 1.866e+04 5.000e+03 -5.000e+03
## [4,] 1.452e+02 5.805e+02 -2.172e+03 2.173e+03
persp(density(deaths.ppp, 1000), theta = 100, phi = 15)
## [,1] [,2] [,3] [,4]
## [1,] -3.502e-05 -5.140e-05 1.918e-04 -1.918e-04
## [2,] 1.986e-04 -9.063e-06 3.382e-05 -3.382e-05
## [3,] -1.165e-12 1.866e+04 5.000e+03 -5.000e+03
## [4,] -6.317e+02 5.583e+02 -2.089e+03 2.090e+03
###################################### Perhaps something useful would be
###################################### to organize that original legend...
###################################### Plot data one event type at a
###################################### time...
plot(split(deaths.ppp)) #Dot density maps
plot(density(split(deaths.ppp))) #Kernel maps
contour(density(split(deaths.ppp)), axes = F) #contour maps
plot(deaths.ppp, which.marks = "Total.deaths", maxsize = 3000, main = "Total Deaths") #proportional circles
## 0 50 100 150 200
## 0.0 828.7 1657.5 2486.2 3314.9
Phew, that was a lot of data visualizaiton…. Part 4: Exploratory Point Pattern Analysis
# Clean up after visualization so we can do some analysis:
deaths.ppp.simp <- unmark(deaths.ppp) #unmark removes the points.
deathsList <- marks(deaths.ppp) #creates a list
deaths.ppp.simp.death <- deaths.ppp.simp %mark% deathsList #Whoa... what does the %'s do?
# Create a kernel density map of deaths: deaths.death.im <-
# density(deaths.ppp.simp.death, sigma = 1000, weights =
# marks(deaths.ppp.simp.death), kernel = 'rectangular', eds = 100)#creates
# a kernel map with a resolution of 100 meters (map units) #THIS ISN'T
# WORKING AND GIVING AN ERROR ABOUT THE WEIGHTS NOT BEING A CONSISTENT
# LENGTH.
deaths.death.im <- density(deaths.ppp.simp.death, sigma = 1000, kernel = "rectangular",
eds = 100)
plot(deaths.death.im, main = "Weighted Density Map of \nWikiLeaks Deaths in Baghdad")
# Change the color scheme... We can use colors() to see the all the colors
# available in R. The results are somewhat overwhelming...
grRd <- colorRampPalette(c("grey60", "red"))
co <- colourmap(grRd(5), range = c(1e-06, 7e-05)) #Define legend stretch for colors
plot(deaths.death.im, main = "Weighted Density Map of \nWikiLeaks Deaths in Baghdad",
col = co)
# Classify the data:
death.map.breaks <- quantile(deaths.death.im, probs = (0:3)/3) #classify into 3 classes using quantile
death.map.cuts <- cut(deaths.death.im, breaks = death.map.breaks, labels = c("Low DD",
"Medium DD", "High DD"))
death.map.quartile <- tess(image = death.map.cuts) #tess is used to group points
plot(death.map.quartile)
# Add labels to describe the number of cells in each category...
qCount.im <- quadratcount(X = deaths.ppp, tess = death.map.quartile)
## Warning: Tessellation does not contain all the points of X
plot(qCount.im)
# Overlay a coarser grid over the data:
qCount <- quadratcount(X = deaths.ppp, nx = 4, ny = 4)
plot(qCount) #The legend doesn't make sense any more...
plot(rhohat(unmark(deaths.ppp), deaths.death.im)) #plot death density to point density... Not sure what this would be used for.
#'This plot basically shows you the density of points as a function of the values on the weighted death density map.'
############################ Split up the data again.
deaths.ppp.types <- split(deaths.ppp, which.marks = "Type")
deaths.ppp.types <- unmark(deaths.ppp.types)
# Look at criminal events:
deaths.ppp.types$"Criminal Event" #nothing happens?
## planar point pattern: 1816 points
## window: polygonal boundary
## enclosing rectangle: [10000045, 10009992] x [4940001, 4949997] meters
nnc <- nnclean(X = deaths.ppp.types$"Criminal Event", k = 20)
## Iteration 1 logLik = -361092.732623139 p = 0.6355
## Iteration 2 logLik = -359146.366272647 p = 0.5596
## Iteration 3 logLik = -358203.24296497 p = 0.4912
## Iteration 4 logLik = -358005.479109628 p = 0.4498
## Estimated parameters:
## p [cluster] = 0.44985
## lambda [cluster] = 7.2878e-05
## lambda [noise] = 1.25e-05
plot(split(nnc))
Oy, I have no idea how this is going to be condensed into my R Journal…