library(spatstat)
## Loading required package: mgcv
## This is mgcv 1.7-22. For overview type 'help("mgcv-package")'.
## Loading required package: deldir
## deldir 0.0-21
## spatstat 1.31-1 Type 'help(spatstat)' for an overview of spatstat
## 'latest.news()' for news on latest version 'licence.polygons()' for
## licence information on polygon calculations
library(sp)
library(rgdal)
## rgdal: version: 0.8-5, (SVN revision 449) Geospatial Data Abstraction
## Library extensions to R successfully loaded Loaded GDAL runtime: GDAL
## 1.9.0, released 2011/12/29 Path to GDAL shared files:
## /Library/Frameworks/R.framework/Versions/2.15/Resources/library/rgdal/gdal
## Loaded PROJ.4 runtime: Rel. 4.8.0, 6 March 2012, [PJ_VERSION: 480] Path to
## PROJ.4 shared files:
## /Library/Frameworks/R.framework/Versions/2.15/Resources/library/rgdal/proj
library(maptools)
## Loading required package: foreign
## Loading required package: grid
## Loading required package: lattice
## Checking rgeos availability: FALSE Note: when rgeos is not available,
## polygon geometry computations in maptools depend on gpclib, which has a
## restricted licence. It is disabled by default; to enable gpclib, type
## gpclibPermit()
library(ggmap)
## Loading required package: ggplot2
library(mapproj)
## Warning: package 'mapproj' was built under R version 2.15.3
## Loading required package: maps
id <- read.csv("/Users/telekineticturtle/Desktop/Colorado 13/Quant Methods/Data/Deathsonly.csv",
header = T)
summary(id)
## Report.key Date.and.time
## 00023432-98D1-7C41-A16F6C4DC998257A : 1 13/06/2006 00:00: 53
## 0004720F-CDC0-4AB7-A02E-9B2DD86EFDE7: 1 19/07/2006 00:00: 50
## 0004A198-CDFA-6C7A-316C5E0B1B99315A : 1 26/07/2006 00:00: 48
## 000560C8-BC1A-7033-558315CE07936A6A : 1 29/06/2006 00:00: 45
## 0007F13F-423D-4561-5458204F2B748859 : 1 16/07/2006 00:00: 44
## 00084758-0A44-4D3F-971C-7675E2A227A4: 1 23/07/2006 00:00: 44
## (Other) :52029 (Other) :51751
## YEAR Type Category
## 2006 :11913 Criminal Event :24353 Murder :24044
## 2007 :10432 Enemy Action :11678 IED Explosion : 9101
## 2008 : 3429 Explosive Hazard: 9876 Direct Fire : 7982
## 2005 : 2741 Friendly Action : 4631 Attack : 2343
## 2004 : 1925 Non-Combat Event: 1107 Small Unit Actions: 1419
## 2009 : 1208 Friendly Fire : 184 Indirect Fire : 1349
## (Other):20387 (Other) : 206 (Other) : 5797
## Title
## UNK : 1521
## : 1341
## (CRIMINAL EVENT) MURDER RPT BY NOT PROVIDED IVO (ROUTE UNKNOWN): 1 CIV KIA : 304
## (CRIMINAL EVENT) MURDER RPT BAGHDAD OPS/NCC : 1 CIV KIA : 255
## (CRIMINAL EVENT) MURDER RPT BAGHDAD POLICE OPS/NCC : 1 CIV KIA : 197
## (CRIMINAL EVENT) MURDER RPT BY NOT PROVIDED IVO BAGHDAD (ZONE 37) (ROUTE UNKNOWN): 1 CIV KIA: 171
## (Other) :48246
## Region Attack.on Coalition.forces.wounded
## MND-BAGHDAD:23782 ENEMY :45967 Min. : 0.00
## MND-N :15944 FRIEND : 4815 1st Qu.: 0.00
## MND-C : 4678 NEUTRAL: 1253 Median : 0.00
## MNF-W : 4472 Mean : 0.15
## MND-SE : 2668 3rd Qu.: 0.00
## MND-S : 378 Max. :118.00
## (Other) : 113
## Coalition.forces.killed Iraq.forces.wounded Iraq.forces.killed
## Min. : 0.000 Min. : 0.00 Min. : 0.00
## 1st Qu.: 0.000 1st Qu.: 0.00 1st Qu.: 0.00
## Median : 0.000 Median : 0.00 Median : 0.00
## Mean : 0.072 Mean : 0.34 Mean : 0.29
## 3rd Qu.: 0.000 3rd Qu.: 0.00 3rd Qu.: 0.00
## Max. :31.000 Max. :97.00 Max. :48.00
##
## Civilian.wia Civilian.kia Enemy.wia Enemy.kia
## Min. : 0.0 Min. : 0.0 Min. : 0.00 Min. : 0.00
## 1st Qu.: 0.0 1st Qu.: 0.0 1st Qu.: 0.00 1st Qu.: 0.00
## Median : 0.0 Median : 1.0 Median : 0.00 Median : 0.00
## Mean : 1.2 Mean : 1.3 Mean : 0.07 Mean : 0.46
## 3rd Qu.: 0.0 3rd Qu.: 1.0 3rd Qu.: 0.00 3rd Qu.: 0.00
## Max. :641.0 Max. :436.0 Max. :194.00 Max. :262.00
##
## Enemy.detained Total.deaths Latitude Longitude
## Min. : 0.0 Min. : 1.0 Min. :29.1 Min. :38.9
## 1st Qu.: 0.0 1st Qu.: 1.0 1st Qu.:33.3 1st Qu.:44.1
## Median : 0.0 Median : 1.0 Median :33.4 Median :44.4
## Mean : 0.3 Mean : 2.1 Mean :33.7 Mean :44.3
## 3rd Qu.: 0.0 3rd Qu.: 2.0 3rd Qu.:33.9 3rd Qu.:44.5
## Max. :560.0 Max. :443.0 Max. :37.9 Max. :48.8
##
names(id)
## [1] "Report.key" "Date.and.time"
## [3] "YEAR" "Type"
## [5] "Category" "Title"
## [7] "Region" "Attack.on"
## [9] "Coalition.forces.wounded" "Coalition.forces.killed"
## [11] "Iraq.forces.wounded" "Iraq.forces.killed"
## [13] "Civilian.wia" "Civilian.kia"
## [15] "Enemy.wia" "Enemy.kia"
## [17] "Enemy.detained" "Total.deaths"
## [19] "Latitude" "Longitude"
summary(id$Type)
## criminal event Criminal Event CRIMINAL EVENT
## 1 24353 3
## Enemy Action Explosive Hazard EXPLOSIVE HAZARD
## 11678 9876 1
## Friendly Action Friendly Fire Non-Combat Event
## 4631 184 1107
## Other Suspicious Incident Threat Report
## 146 32 23
# Clean-up Type data:
id$Type <- sub(id$Type, pattern = "CRIMINAL EVENT", replacement = "Criminal Event")
id$Type <- sub(id$Type, pattern = "criminal event", replacement = "Criminal Event")
id$Type <- sub(id$Type, pattern = "EXPLOSIVE HAZARD", replacement = "Explosive Hazard")
id$Type <- as.factor(id$Type) # Converts Type back to a factor
levels(id$Type)
## [1] "Criminal Event" "Enemy Action" "Explosive Hazard"
## [4] "Friendly Action" "Friendly Fire" "Non-Combat Event"
## [7] "Other" "Suspicious Incident" "Threat Report"
summary(id$Type)
## Criminal Event Enemy Action Explosive Hazard
## 24357 11678 9877
## Friendly Action Friendly Fire Non-Combat Event
## 4631 184 1107
## Other Suspicious Incident Threat Report
## 146 32 23
# Create Marks: SS: The spatstat library is for the analysis and
# manipulation of spatial points. In a spatstat point file the attributes
# of each location are stored in “marks”. Each point on the map can have
# zero, one, or many associated marks. A file with too many marks is
# cumbersome, select a subset of attributes to serve as potential marks:
id.marks.simple <- id[, 4]
id.marks.full <- id[, c(4, 7, 9:18)]
# Create a spatial points data frame:
id.spp <- SpatialPointsDataFrame(coords = id[, 19:20], proj4string = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"),
data = id.marks.full)
# Use ggmap to access Google and obtain a satellite image of any location
# location = 'Country' or 'City, STATE' or long/lat pair (long first)
# maptype = 'hybrid' (Google), 'terrain', 'watercolor','toner' (Stamen)
# zoom = 0 (world) to 21 (building); 10 = city (default)
map <- get_map(location = "Iraq", maptype = "hybrid", zoom = 6) #get map
ggmap(map, extent = "panel")
# Add WikiLeaks data to the map geom_point() adds the points to the map;
# include: data = DATASET, aes(x = LONG , y= LAT) for spatial coords,
# color = 'color', alpha = opacity (0 to 1) Include size = VARIABLE to
# make the size dependent on an attribute variable ggtitle('Title') adds a
# title
ggmap(map, extent = "panel") + geom_point(data = id, aes(x = id$Longitude, y = id$Latitude,
size = Total.deaths), alpha = 0.1, color = "blue") + ggtitle("WikiLeaks Iraq Data \n Incidents involving Death(s)")
# Make a map with a subset by using bracket notation:
ggmap(map, extent = "panel") + geom_point(data = id[id$Enemy.detained > 0, ],
aes(x = Longitude, y = Latitude, size = Enemy.detained), alpha = 0.5, color = "green") +
ggtitle("WikiLeaks Iraq Data \n Incidents involving Dententions(s)")
## New Coordinates: Convert to a different coordinate system. So that
## coordinates are in meters not degrees
id.spp.rp <- spTransform(id.spp, CRS("+init=epsg:3839"))
# Save the new coordinates x-values and y-values:
id.spp.rp$Easting <- coordinates(id.spp.rp)[, 1]
id.spp.rp$Northing <- coordinates(id.spp.rp)[, 2]
## Remove all events outside of Baghdad and convert back to a table
id.df.rp.bag <- id.spp.rp@data[id.spp.rp@data$Region == "MND-BAGHDAD", ]
# SS: Notice my use of @data I have pulled out a subset of the map and
# saved it as a non-spatial data.frame. The spatial information is
# preserved in the columns Easting and Northing:
plot(x = id.df.rp.bag$Easting, y = id.df.rp.bag$Northing, ylab = "Northing",
xlab = "Easting", main = "Locations of Incidents Involving Death(s) \n Baghdad, Iraq")
# Remove a few outliers:
id.df.rp.bag <- id.df.rp.bag[id.df.rp.bag$Northing < 4960000 & id.df.rp.bag$Easting >
9960000, ]
plot(x = id.df.rp.bag$Easting, y = id.df.rp.bag$Northing, ylab = "Northing",
xlab = "Easting", main = "Locations of Incidents Involving Death(s) \n Baghdad, Iraq",
cex = 0.5)
# Focus on the area of greatest density:
id.df.rp.bag <- id.df.rp.bag[(id.df.rp.bag$Northing < 4950000 & id.df.rp.bag$Northing >
4940000) & (id.df.rp.bag$Easting > 1e+07 & id.df.rp.bag$Easting < 10010000),
]
plot(x = id.df.rp.bag$Easting, y = id.df.rp.bag$Northing, ylab = "Northing",
xlab = "Easting", main = "Locations of Incidents Involving Death(s) \n Central Baghdad, Iraq",
cex = 0.5)
# 1) Define the Window: SS: Use convexhull.xy() which traces the outermost
# points of a scatter plot. Give the function the x and y coordinates of
# the points, and it returns an owin object – which is necessary for
# making a point pattern object.
id.chull <- convexhull.xy(x = id.df.rp.bag$Easting, y = id.df.rp.bag$Northing)
## Manually define a window by markig boundary points (interactive); save
## points to an object and then use owin(poly= BOUNDARY) to convert
## boundary object into an owin object. plot(x = id.df.rp.bag$Easting, y
## = id.df.rp.bag$Northing) bdry <- locator() id.win <- owin(poly = bdry)
# 2) Create a ppp object using x and y points from before, window just
# defined, and marks from a subset of attributes
id.ppp <- ppp(x = id.df.rp.bag$Easting, y = id.df.rp.bag$Northing, window = id.chull,
marks = id.df.rp.bag[, c(1, 3:12)])
## Warning: data contain duplicated points
unitname(id.ppp) <- c("meter", "meters") ##Assign names to units
# To change the window of an existing ppp object use
# id.ppp[newWINDOWobject] id.ppp <- id.ppp[id.win]
# 1) Visualize ppp object with simple plot of each event:
p1 <- plot.ppp(id.ppp) # By default, it will plot the first mark that you assigned
legend(max(coords(id.ppp))[1] + 1000, mean(coords(id.ppp))[2], pch = p1, legend = names(p1),
cex = 0.5)
## Warning: mean(<data.frame>) is deprecated. Use colMeans() or sapply(*,
## mean) instead.
# 2) Visualize ppp object by the density of points using a gaussian
# (normal) kernel with a 1000m standard deviation (sigma=).
plot(density(id.ppp, sigma = 1000))
# This is just a density of counts. SS: The density plot places a kernel
# over the center of each pixel and counts up the number of points per
# unit area. The points further away from the center of each pixel count
# less toward the density of the pixel than points that are close to the
# center. This is the standard kernel density estimator that is used in
# most GIS programs. The density command creates a pixel image or im
# object. These objects allow us to do cool stuff when combined with
# points.
# 3) Visualize ppp object by contour of point density:
contour(density(id.ppp, 1000), axes = F)
## NULL
# 4) Visualize ppp object using 3-D Perspective (a surface):
layout(matrix(c(1, 1, 1, 2, 3, 4), 2, 3, byrow = TRUE))
persp(density(id.ppp, 1000)) # theta and phi are agles defining viewing direction.
## [,1] [,2] [,3] [,4]
## [1,] 2.018e-04 0.000e+00 0.000e+00 0.000e+00
## [2,] 0.000e+00 5.223e-05 -1.949e-04 1.949e-04
## [3,] 0.000e+00 1.834e+04 4.915e+03 -4.915e+03
## [4,] -2.019e+03 -2.590e+02 9.611e+02 -9.601e+02
persp(density(id.ppp, 1000), theta = 150, phi = 15) #rotate by adjusting theta
## [,1] [,2] [,3] [,4]
## [1,] -1.748e-04 -2.612e-05 9.747e-05 -9.747e-05
## [2,] 1.009e-04 -4.524e-05 1.688e-04 -1.688e-04
## [3,] -5.814e-13 1.834e+04 4.915e+03 -4.915e+03
## [4,] 1.250e+03 4.843e+02 -1.813e+03 1.814e+03
persp(density(id.ppp, 1000), theta = 120, phi = 15) #rotate by adjusting theta
## [,1] [,2] [,3] [,4]
## [1,] -1.009e-04 -4.524e-05 1.688e-04 -1.688e-04
## [2,] 1.748e-04 -2.612e-05 9.747e-05 -9.747e-05
## [3,] -1.007e-12 1.834e+04 4.915e+03 -4.915e+03
## [4,] 1.453e+02 5.810e+02 -2.174e+03 2.175e+03
persp(density(id.ppp, 1000), theta = 100, phi = 15) #rotate by adjusting theta
## [,1] [,2] [,3] [,4]
## [1,] -3.505e-05 -5.144e-05 1.920e-04 -1.920e-04
## [2,] 1.988e-04 -9.070e-06 3.385e-05 -3.385e-05
## [3,] -1.145e-12 1.834e+04 4.915e+03 -4.915e+03
## [4,] -6.322e+02 5.588e+02 -2.091e+03 2.092e+03
dev.off() #clearlayout
## null device
## 1
# 8) Visualize by each type of event (again, the split takes the first
# mark by default) for each of the three visualization types:
plot(split(id.ppp))
plot(density(split(id.ppp)))
contour(density(split(id.ppp)), axes = F)
# which.marks= selects which mark to use; by default, the value of the
# mark will determine the size of each event.
plot(id.ppp, which.marks = "Total.deaths", maxsize = 3000, main = "Total Deaths")
## 0 50 100 150 200
## 0.0 828.7 1657.5 2486.2 3314.9
# SS: It takes some fiddling with maxsize to get a decent plot, by default
# all the circles are tiny.
# 1) Simplify dataset by removing all mark data from the points and then
# marking them with deaths.
id.ppp.simp <- unmark(id.ppp) #remove all data from the points
deaths <- marks(id.ppp)["Total.deaths"] #create a list of # of deaths
# Notice how deaths has the same number of rows as id.df.rp.bag -- which
# is where the locations came from. This is required for ppp.
id.ppp.simp.death <- id.ppp.simp %mark% deaths #make a point pattern (ppp) with a single variable
# 2) Density Plot of total deaths weight each point by the total deaths
# using the “weight” option, specify the type of kernel (using kernel),
# and the size of the pixels (using eps):
id.death.im <- density(id.ppp.simp.death, sigma = 1000, weights = marks(id.ppp.simp.death),
kernel = "rectangular", eps = 100) #eps sets pixel size in map units
plot(id.death.im, main = "Weighted Density Map of \nWikiLeaks Deaths in Baghdad")
# SS: To change the colors we first have to define a function called a
# color ramp. The color ramp will return a specified number of colors
# between the two user selected colors. To see a list of possible colors
# type colors().
grRd <- colorRampPalette(c("grey60", "red")) # Define color endmembers
summary(id.death.im)
## real-valued pixel image
## 100 x 100 pixel array (ny, nx)
## enclosing rectangle: [10000016.4855109, 10009990.1679714] x [4940002.54008002, 4949990.54520946] meters
## dimensions of each pixel: 99.7 x 99.9 meters
## Image is defined on a subset of the rectangular grid
## Subset area = 97156646.9435216 square meters
## Pixel values (inside window):
## range = [2.82569785312866e-05,0.000211536747919665]
## integral = 7942.44037063595
## mean = 8.17488110232231e-05
co <- colourmap(grRd(7), range = c(2.82e-06, 0.000212)) #range from summary(id.death.im)
plot(id.death.im, main = "Weighted Density Map of \nWikiLeaks Deaths in Baghdad",
col = co)
# 3) Tesselations SS: The death density image can be used to divide the
# study region into areas of high, medium, and low “death density”. The
# function tess is used to create a special type of spatstat object called
# a tessellation. Tessellations can be used to group points.
death.map.breaks <- quantile(id.death.im, probs = (0:3)/3) # Define density cut points -- in this case 3 groups are defined (so two break points b/wn max and min).
death.map.cuts <- cut(id.death.im, breaks = death.map.breaks, labels = c("Low DD",
"Medium DD", "High DD")) # Cut the image using breaks
death.map.quantile <- tess(image = death.map.cuts) # Use the tess() function to group points.
plot(death.map.quantile, main = "Death Density in Baghdad") # Plot the groups.
# Count the number of events in each death density group using
# quadratcount(X = FULLppp, tess= TESS_object)
qCount.im <- quadratcount(X = id.ppp, tess = death.map.quantile)
## Warning: Tessellation does not contain all the points of X
plot(qCount.im, main = "Density Density and Event Count \n Baghdad, Iraq")
# This is the same exact plot as before, only now it has counts in each
# grouping.
# We could also divide the window up into a n by n grid and count the
# number of events in each grid cell:
qCount <- quadratcount(X = id.ppp, nx = 4, ny = 4) #returns the count of points in each quadrat
plot(qCount, main = "Event Count for Incidents Involving Death \n Baghdad, Iraq") # The colors seem arbitrary here
# 4) Intensity (events/unit area) relation to density of death image: We
# can also use the density map to look at how the intensity of events
# (number of points per unit area) relates to the density of death image:
plot(rhohat(unmark(id.ppp), id.death.im), main = "probability of death event v. death density")
# 5) SS: We could also split the data frame up into a series of point
# patterns, one for each type of event:
id.ppp.types <- split(id.ppp, which.marks = "Type")
id.ppp.types <- unmark(id.ppp.types)
# SS: The object id.ppp.types now holds a bunch of unmarked point
# patterns, each type of event is in its own ppp object. For example, we
# could look at only the criminal events:
id.ppp.types$"Criminal Event"
## planar point pattern: 1802 points
## window: polygonal boundary
## enclosing rectangle: [10000016, 10009990] x [4940003, 4949991] meters
# SS: There are so many criminal events that its difficult to make heads
# or tails of the map. Let's try to filter the significant clusters (high
# density areas) from the background rate (noise). We'll do this using an
# approach developed by Byers and Raferty (1998) called Nearest Neighbor
# Cleaning. We'll discuss the theory in class, the basic idea is to
# estimate the parameters of two point generating processes (one for noise
# and one for signal). Once the parameters of each process are estimated
# points are assigned to one of the two processes.
nnc <- nnclean(X = id.ppp.types$"Criminal Event", k = 20)
## Iteration 1 logLik = -361421.031345153 p = 0.7109
## Iteration 2 logLik = -358561.899117613 p = 0.6307
## Iteration 3 logLik = -356876.844782523 p = 0.5624
## Iteration 4 logLik = -356003.613549312 p = 0.4991
## Iteration 5 logLik = -355766.598100418 p = 0.4594
## Estimated parameters:
## p [cluster] = 0.45944
## lambda [cluster] = 7.1591e-05
## lambda [noise] = 1.1937e-05
plot(split(nnc))
# SS: The map above shows intense clusters of criminal activity against
# the background “noise.” In the next exercise we'll do hypothesis tests
# to understand the processes that generated the observed point patterns.