Exercise 10 - Point Pattern Analysis

Load Data & Packages and Doctor Data

library(spatstat)

## Loading required package: mgcv

## This is mgcv 1.7-22. For overview type 'help("mgcv-package")'.

## Loading required package: deldir

## deldir 0.0-21

## spatstat 1.31-1 Type 'help(spatstat)' for an overview of spatstat
## 'latest.news()' for news on latest version 'licence.polygons()' for
## licence information on polygon calculations

library(sp)
library(rgdal)

## rgdal: version: 0.8-5, (SVN revision 449) Geospatial Data Abstraction
## Library extensions to R successfully loaded Loaded GDAL runtime: GDAL
## 1.9.0, released 2011/12/29 Path to GDAL shared files:
## /Library/Frameworks/R.framework/Versions/2.15/Resources/library/rgdal/gdal
## Loaded PROJ.4 runtime: Rel. 4.8.0, 6 March 2012, [PJ_VERSION: 480] Path to
## PROJ.4 shared files:
## /Library/Frameworks/R.framework/Versions/2.15/Resources/library/rgdal/proj

library(maptools)

## Loading required package: foreign

## Loading required package: grid

## Loading required package: lattice

## Checking rgeos availability: FALSE Note: when rgeos is not available,
## polygon geometry computations in maptools depend on gpclib, which has a
## restricted licence. It is disabled by default; to enable gpclib, type
## gpclibPermit()

library(ggmap)

## Loading required package: ggplot2

library(mapproj)

## Warning: package 'mapproj' was built under R version 2.15.3

## Loading required package: maps


id <- read.csv("/Users/telekineticturtle/Desktop/Colorado 13/Quant Methods/Data/Deathsonly.csv", 
    header = T)
summary(id)

##                                 Report.key             Date.and.time  
##  00023432-98D1-7C41-A16F6C4DC998257A :    1   13/06/2006 00:00:   53  
##  0004720F-CDC0-4AB7-A02E-9B2DD86EFDE7:    1   19/07/2006 00:00:   50  
##  0004A198-CDFA-6C7A-316C5E0B1B99315A :    1   26/07/2006 00:00:   48  
##  000560C8-BC1A-7033-558315CE07936A6A :    1   29/06/2006 00:00:   45  
##  0007F13F-423D-4561-5458204F2B748859 :    1   16/07/2006 00:00:   44  
##  00084758-0A44-4D3F-971C-7675E2A227A4:    1   23/07/2006 00:00:   44  
##  (Other)                             :52029   (Other)         :51751  
##       YEAR                     Type                     Category    
##  2006   :11913   Criminal Event  :24353   Murder            :24044  
##  2007   :10432   Enemy Action    :11678   IED Explosion     : 9101  
##  2008   : 3429   Explosive Hazard: 9876   Direct Fire       : 7982  
##  2005   : 2741   Friendly Action : 4631   Attack            : 2343  
##  2004   : 1925   Non-Combat Event: 1107   Small Unit Actions: 1419  
##  2009   : 1208   Friendly Fire   :  184   Indirect Fire     : 1349  
##  (Other):20387   (Other)         :  206   (Other)           : 5797  
##                                                                                           Title      
##  UNK                                                                                         : 1521  
##                                                                                              : 1341  
##  (CRIMINAL EVENT) MURDER RPT BY NOT PROVIDED IVO  (ROUTE UNKNOWN): 1 CIV KIA                 :  304  
##  (CRIMINAL EVENT) MURDER RPT   BAGHDAD OPS/NCC : 1 CIV KIA                                   :  255  
##  (CRIMINAL EVENT) MURDER RPT   BAGHDAD POLICE OPS/NCC : 1 CIV KIA                            :  197  
##  (CRIMINAL EVENT) MURDER RPT BY NOT PROVIDED IVO BAGHDAD (ZONE 37) (ROUTE UNKNOWN): 1 CIV KIA:  171  
##  (Other)                                                                                     :48246  
##          Region        Attack.on     Coalition.forces.wounded
##  MND-BAGHDAD:23782   ENEMY  :45967   Min.   :  0.00          
##  MND-N      :15944   FRIEND : 4815   1st Qu.:  0.00          
##  MND-C      : 4678   NEUTRAL: 1253   Median :  0.00          
##  MNF-W      : 4472                   Mean   :  0.15          
##  MND-SE     : 2668                   3rd Qu.:  0.00          
##  MND-S      :  378                   Max.   :118.00          
##  (Other)    :  113                                           
##  Coalition.forces.killed Iraq.forces.wounded Iraq.forces.killed
##  Min.   : 0.000          Min.   : 0.00       Min.   : 0.00     
##  1st Qu.: 0.000          1st Qu.: 0.00       1st Qu.: 0.00     
##  Median : 0.000          Median : 0.00       Median : 0.00     
##  Mean   : 0.072          Mean   : 0.34       Mean   : 0.29     
##  3rd Qu.: 0.000          3rd Qu.: 0.00       3rd Qu.: 0.00     
##  Max.   :31.000          Max.   :97.00       Max.   :48.00     
##                                                                
##   Civilian.wia    Civilian.kia     Enemy.wia        Enemy.kia     
##  Min.   :  0.0   Min.   :  0.0   Min.   :  0.00   Min.   :  0.00  
##  1st Qu.:  0.0   1st Qu.:  0.0   1st Qu.:  0.00   1st Qu.:  0.00  
##  Median :  0.0   Median :  1.0   Median :  0.00   Median :  0.00  
##  Mean   :  1.2   Mean   :  1.3   Mean   :  0.07   Mean   :  0.46  
##  3rd Qu.:  0.0   3rd Qu.:  1.0   3rd Qu.:  0.00   3rd Qu.:  0.00  
##  Max.   :641.0   Max.   :436.0   Max.   :194.00   Max.   :262.00  
##                                                                   
##  Enemy.detained   Total.deaths      Latitude      Longitude   
##  Min.   :  0.0   Min.   :  1.0   Min.   :29.1   Min.   :38.9  
##  1st Qu.:  0.0   1st Qu.:  1.0   1st Qu.:33.3   1st Qu.:44.1  
##  Median :  0.0   Median :  1.0   Median :33.4   Median :44.4  
##  Mean   :  0.3   Mean   :  2.1   Mean   :33.7   Mean   :44.3  
##  3rd Qu.:  0.0   3rd Qu.:  2.0   3rd Qu.:33.9   3rd Qu.:44.5  
##  Max.   :560.0   Max.   :443.0   Max.   :37.9   Max.   :48.8  
##

names(id)

##  [1] "Report.key"               "Date.and.time"           
##  [3] "YEAR"                     "Type"                    
##  [5] "Category"                 "Title"                   
##  [7] "Region"                   "Attack.on"               
##  [9] "Coalition.forces.wounded" "Coalition.forces.killed" 
## [11] "Iraq.forces.wounded"      "Iraq.forces.killed"      
## [13] "Civilian.wia"             "Civilian.kia"            
## [15] "Enemy.wia"                "Enemy.kia"               
## [17] "Enemy.detained"           "Total.deaths"            
## [19] "Latitude"                 "Longitude"

summary(id$Type)

##      criminal event      Criminal Event      CRIMINAL EVENT 
##                   1               24353                   3 
##        Enemy Action    Explosive Hazard    EXPLOSIVE HAZARD 
##               11678                9876                   1 
##     Friendly Action       Friendly Fire    Non-Combat Event 
##                4631                 184                1107 
##               Other Suspicious Incident       Threat Report 
##                 146                  32                  23


# Clean-up Type data:
id$Type <- sub(id$Type, pattern = "CRIMINAL EVENT", replacement = "Criminal Event")
id$Type <- sub(id$Type, pattern = "criminal event", replacement = "Criminal Event")
id$Type <- sub(id$Type, pattern = "EXPLOSIVE HAZARD", replacement = "Explosive Hazard")
id$Type <- as.factor(id$Type)  # Converts Type back to a factor
levels(id$Type)

## [1] "Criminal Event"      "Enemy Action"        "Explosive Hazard"   
## [4] "Friendly Action"     "Friendly Fire"       "Non-Combat Event"   
## [7] "Other"               "Suspicious Incident" "Threat Report"

summary(id$Type)

##      Criminal Event        Enemy Action    Explosive Hazard 
##               24357               11678                9877 
##     Friendly Action       Friendly Fire    Non-Combat Event 
##                4631                 184                1107 
##               Other Suspicious Incident       Threat Report 
##                 146                  32                  23


# Create Marks: SS: The spatstat library is for the analysis and
# manipulation of spatial points. In a spatstat point file the attributes
# of each location are stored in “marks”. Each point on the map can have
# zero, one, or many associated marks. A file with too many marks is
# cumbersome, select a subset of attributes to serve as potential marks:
id.marks.simple <- id[, 4]
id.marks.full <- id[, c(4, 7, 9:18)]

# Create a spatial points data frame:
id.spp <- SpatialPointsDataFrame(coords = id[, 19:20], proj4string = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"), 
    data = id.marks.full)

Exploratory Maps

# Use ggmap to access Google and obtain a satellite image of any location
# location = 'Country' or 'City, STATE' or long/lat pair (long first)
# maptype = 'hybrid' (Google), 'terrain', 'watercolor','toner' (Stamen)
# zoom = 0 (world) to 21 (building); 10 = city (default)
map <- get_map(location = "Iraq", maptype = "hybrid", zoom = 6)  #get map
ggmap(map, extent = "panel")

plot of chunk unnamed-chunk-2


# Add WikiLeaks data to the map geom_point() adds the points to the map;
# include: data = DATASET, aes(x = LONG , y= LAT) for spatial coords,
# color = 'color', alpha = opacity (0 to 1) Include size = VARIABLE to
# make the size dependent on an attribute variable ggtitle('Title') adds a
# title
ggmap(map, extent = "panel") + geom_point(data = id, aes(x = id$Longitude, y = id$Latitude, 
    size = Total.deaths), alpha = 0.1, color = "blue") + ggtitle("WikiLeaks Iraq Data \n Incidents involving Death(s)")

plot of chunk unnamed-chunk-2


# Make a map with a subset by using bracket notation:
ggmap(map, extent = "panel") + geom_point(data = id[id$Enemy.detained > 0, ], 
    aes(x = Longitude, y = Latitude, size = Enemy.detained), alpha = 0.5, color = "green") + 
    ggtitle("WikiLeaks Iraq Data \n Incidents involving Dententions(s)")

plot of chunk unnamed-chunk-2

Exploratory Spatial Analysis

## New Coordinates: Convert to a different coordinate system. So that
## coordinates are in meters not degrees
id.spp.rp <- spTransform(id.spp, CRS("+init=epsg:3839"))
# Save the new coordinates x-values and y-values:
id.spp.rp$Easting <- coordinates(id.spp.rp)[, 1]
id.spp.rp$Northing <- coordinates(id.spp.rp)[, 2]

## Remove all events outside of Baghdad and convert back to a table
id.df.rp.bag <- id.spp.rp@data[id.spp.rp@data$Region == "MND-BAGHDAD", ]
# SS: Notice my use of @data I have pulled out a subset of the map and
# saved it as a non-spatial data.frame.  The spatial information is
# preserved in the columns Easting and Northing:
plot(x = id.df.rp.bag$Easting, y = id.df.rp.bag$Northing, ylab = "Northing", 
    xlab = "Easting", main = "Locations of Incidents Involving Death(s) \n Baghdad, Iraq")

plot of chunk unnamed-chunk-3

# Remove a few outliers:
id.df.rp.bag <- id.df.rp.bag[id.df.rp.bag$Northing < 4960000 & id.df.rp.bag$Easting > 
    9960000, ]
plot(x = id.df.rp.bag$Easting, y = id.df.rp.bag$Northing, ylab = "Northing", 
    xlab = "Easting", main = "Locations of Incidents Involving Death(s) \n Baghdad, Iraq", 
    cex = 0.5)

plot of chunk unnamed-chunk-3

# Focus on the area of greatest density:
id.df.rp.bag <- id.df.rp.bag[(id.df.rp.bag$Northing < 4950000 & id.df.rp.bag$Northing > 
    4940000) & (id.df.rp.bag$Easting > 1e+07 & id.df.rp.bag$Easting < 10010000), 
    ]
plot(x = id.df.rp.bag$Easting, y = id.df.rp.bag$Northing, ylab = "Northing", 
    xlab = "Easting", main = "Locations of Incidents Involving Death(s) \n Central Baghdad, Iraq", 
    cex = 0.5)

plot of chunk unnamed-chunk-3

Using SpatStat:

# 1) Define the Window: SS: Use convexhull.xy() which traces the outermost
# points of a scatter plot. Give the function the x and y coordinates of
# the points, and it returns an owin object – which is necessary for
# making a point pattern object.
id.chull <- convexhull.xy(x = id.df.rp.bag$Easting, y = id.df.rp.bag$Northing)
## Manually define a window by markig boundary points (interactive); save
## points to an object and then use owin(poly= BOUNDARY) to convert
## boundary object into an owin object.  plot(x = id.df.rp.bag$Easting, y
## = id.df.rp.bag$Northing) bdry <- locator() id.win <- owin(poly = bdry)

# 2) Create a ppp object using x and y points from before, window just
# defined, and marks from a subset of attributes
id.ppp <- ppp(x = id.df.rp.bag$Easting, y = id.df.rp.bag$Northing, window = id.chull, 
    marks = id.df.rp.bag[, c(1, 3:12)])

## Warning: data contain duplicated points

unitname(id.ppp) <- c("meter", "meters")  ##Assign names to units
# To change the window of an existing ppp object use
# id.ppp[newWINDOWobject] id.ppp <- id.ppp[id.win]

Visualization in Spatstat

# 1) Visualize ppp object with simple plot of each event:
p1 <- plot.ppp(id.ppp)  # By default, it will plot the first mark that you assigned
legend(max(coords(id.ppp))[1] + 1000, mean(coords(id.ppp))[2], pch = p1, legend = names(p1), 
    cex = 0.5)

## Warning: mean(<data.frame>) is deprecated.  Use colMeans() or sapply(*,
## mean) instead.

plot of chunk unnamed-chunk-5


# 2) Visualize ppp object by the density of points using a gaussian
# (normal) kernel with a 1000m standard deviation (sigma=).
plot(density(id.ppp, sigma = 1000))

plot of chunk unnamed-chunk-5

# This is just a density of counts.  SS: The density plot places a kernel
# over the center of each pixel and counts up the number of points per
# unit area. The points further away from the center of each pixel count
# less toward the density of the pixel than points that are close to the
# center. This is the standard kernel density estimator that is used in
# most GIS programs.  The density command creates a pixel image or im
# object. These objects allow us to do cool stuff when combined with
# points.

# 3) Visualize ppp object by contour of point density:
contour(density(id.ppp, 1000), axes = F)

plot of chunk unnamed-chunk-5

## NULL


# 4) Visualize ppp object using 3-D Perspective (a surface):
layout(matrix(c(1, 1, 1, 2, 3, 4), 2, 3, byrow = TRUE))
persp(density(id.ppp, 1000))  # theta and phi are agles defining viewing direction.

##            [,1]       [,2]       [,3]       [,4]
## [1,]  2.018e-04  0.000e+00  0.000e+00  0.000e+00
## [2,]  0.000e+00  5.223e-05 -1.949e-04  1.949e-04
## [3,]  0.000e+00  1.834e+04  4.915e+03 -4.915e+03
## [4,] -2.019e+03 -2.590e+02  9.611e+02 -9.601e+02

persp(density(id.ppp, 1000), theta = 150, phi = 15)  #rotate by adjusting theta

##            [,1]       [,2]       [,3]       [,4]
## [1,] -1.748e-04 -2.612e-05  9.747e-05 -9.747e-05
## [2,]  1.009e-04 -4.524e-05  1.688e-04 -1.688e-04
## [3,] -5.814e-13  1.834e+04  4.915e+03 -4.915e+03
## [4,]  1.250e+03  4.843e+02 -1.813e+03  1.814e+03

persp(density(id.ppp, 1000), theta = 120, phi = 15)  #rotate by adjusting theta

##            [,1]       [,2]       [,3]       [,4]
## [1,] -1.009e-04 -4.524e-05  1.688e-04 -1.688e-04
## [2,]  1.748e-04 -2.612e-05  9.747e-05 -9.747e-05
## [3,] -1.007e-12  1.834e+04  4.915e+03 -4.915e+03
## [4,]  1.453e+02  5.810e+02 -2.174e+03  2.175e+03

persp(density(id.ppp, 1000), theta = 100, phi = 15)  #rotate by adjusting theta

plot of chunk unnamed-chunk-5

##            [,1]       [,2]       [,3]       [,4]
## [1,] -3.505e-05 -5.144e-05  1.920e-04 -1.920e-04
## [2,]  1.988e-04 -9.070e-06  3.385e-05 -3.385e-05
## [3,] -1.145e-12  1.834e+04  4.915e+03 -4.915e+03
## [4,] -6.322e+02  5.588e+02 -2.091e+03  2.092e+03


dev.off()  #clearlayout

## null device 
##           1


# 8) Visualize by each type of event (again, the split takes the first
# mark by default) for each of the three visualization types:
plot(split(id.ppp))
plot(density(split(id.ppp)))
contour(density(split(id.ppp)), axes = F)

Visualizations based on a particular mark:

# which.marks= selects which mark to use; by default, the value of the
# mark will determine the size of each event.
plot(id.ppp, which.marks = "Total.deaths", maxsize = 3000, main = "Total Deaths")

plot of chunk unnamed-chunk-6

##      0     50    100    150    200 
##    0.0  828.7 1657.5 2486.2 3314.9

# SS: It takes some fiddling with maxsize to get a decent plot, by default
# all the circles are tiny.

Exploratory analysis of points

# 1) Simplify dataset by removing all mark data from the points and then
# marking them with deaths.
id.ppp.simp <- unmark(id.ppp)  #remove all data from the points
deaths <- marks(id.ppp)["Total.deaths"]  #create a list of # of deaths
# Notice how deaths has the same number of rows as id.df.rp.bag -- which
# is where the locations came from.  This is required for ppp.
id.ppp.simp.death <- id.ppp.simp %mark% deaths  #make a point pattern (ppp) with a single variable

# 2) Density Plot of total deaths weight each point by the total deaths
# using the “weight” option, specify the type of kernel (using kernel),
# and the size of the pixels (using eps):
id.death.im <- density(id.ppp.simp.death, sigma = 1000, weights = marks(id.ppp.simp.death), 
    kernel = "rectangular", eps = 100)  #eps sets pixel size in map units
plot(id.death.im, main = "Weighted Density Map of \nWikiLeaks Deaths in Baghdad")

plot of chunk unnamed-chunk-7

# SS: To change the colors we first have to define a function called a
# color ramp. The color ramp will return a specified number of colors
# between the two user selected colors. To see a list of possible colors
# type colors().
grRd <- colorRampPalette(c("grey60", "red"))  # Define color endmembers
summary(id.death.im)

## real-valued pixel image
## 100 x 100 pixel array (ny, nx)
## enclosing rectangle: [10000016.4855109, 10009990.1679714] x [4940002.54008002, 4949990.54520946] meters
## dimensions of each pixel: 99.7 x 99.9 meters 
## Image is defined on a subset of the rectangular grid
## Subset area =  97156646.9435216 square meters 
## Pixel values (inside window):
##  range = [2.82569785312866e-05,0.000211536747919665]
##  integral = 7942.44037063595
##  mean = 8.17488110232231e-05

co <- colourmap(grRd(7), range = c(2.82e-06, 0.000212))  #range from summary(id.death.im)
plot(id.death.im, main = "Weighted Density Map of \nWikiLeaks Deaths in Baghdad", 
    col = co)

plot of chunk unnamed-chunk-7


# 3) Tesselations SS: The death density image can be used to divide the
# study region into areas of high, medium, and low “death density”. The
# function tess is used to create a special type of spatstat object called
# a tessellation. Tessellations can be used to group points.
death.map.breaks <- quantile(id.death.im, probs = (0:3)/3)  # Define density cut points -- in this case 3 groups are defined (so two break points b/wn max and min).
death.map.cuts <- cut(id.death.im, breaks = death.map.breaks, labels = c("Low DD", 
    "Medium DD", "High DD"))  # Cut the image using breaks
death.map.quantile <- tess(image = death.map.cuts)  # Use the tess() function to group points.
plot(death.map.quantile, main = "Death Density in Baghdad")  # Plot the groups.

plot of chunk unnamed-chunk-7


# Count the number of events in each death density group using
# quadratcount(X = FULLppp, tess= TESS_object)
qCount.im <- quadratcount(X = id.ppp, tess = death.map.quantile)

## Warning: Tessellation does not contain all the points of X

plot(qCount.im, main = "Density Density and Event Count \n Baghdad, Iraq")

plot of chunk unnamed-chunk-7

# This is the same exact plot as before, only now it has counts in each
# grouping.

# We could also divide the window up into a n by n grid and count the
# number of events in each grid cell:
qCount <- quadratcount(X = id.ppp, nx = 4, ny = 4)  #returns the count of points in each quadrat
plot(qCount, main = "Event Count for Incidents Involving Death \n Baghdad, Iraq")  # The colors seem arbitrary here

plot of chunk unnamed-chunk-7


# 4) Intensity (events/unit area) relation to density of death image: We
# can also use the density map to look at how the intensity of events
# (number of points per unit area) relates to the density of death image:
plot(rhohat(unmark(id.ppp), id.death.im), main = "probability of death event v. death density")

plot of chunk unnamed-chunk-7


# 5) SS: We could also split the data frame up into a series of point
# patterns, one for each type of event:
id.ppp.types <- split(id.ppp, which.marks = "Type")
id.ppp.types <- unmark(id.ppp.types)
# SS: The object id.ppp.types now holds a bunch of unmarked point
# patterns, each type of event is in its own ppp object. For example, we
# could look at only the criminal events:
id.ppp.types$"Criminal Event"

##  planar point pattern: 1802 points 
## window: polygonal boundary
## enclosing rectangle: [10000016, 10009990] x [4940003, 4949991] meters

# SS: There are so many criminal events that its difficult to make heads
# or tails of the map. Let's try to filter the significant clusters (high
# density areas) from the background rate (noise). We'll do this using an
# approach developed by Byers and Raferty (1998) called Nearest Neighbor
# Cleaning. We'll discuss the theory in class, the basic idea is to
# estimate the parameters of two point generating processes (one for noise
# and one for signal). Once the parameters of each process are estimated
# points are assigned to one of the two processes.
nnc <- nnclean(X = id.ppp.types$"Criminal Event", k = 20)

## Iteration 1  logLik = -361421.031345153  p = 0.7109 
## Iteration 2  logLik = -358561.899117613  p = 0.6307 
## Iteration 3  logLik = -356876.844782523  p = 0.5624 
## Iteration 4  logLik = -356003.613549312  p = 0.4991 
## Iteration 5  logLik = -355766.598100418  p = 0.4594 
## Estimated parameters:
## p [cluster] = 0.45944 
## lambda [cluster] = 7.1591e-05 
## lambda [noise]   = 1.1937e-05

plot(split(nnc))

plot of chunk unnamed-chunk-7

# SS: The map above shows intense clusters of criminal activity against
# the background “noise.” In the next exercise we'll do hypothesis tests
# to understand the processes that generated the observed point patterns.