Andy Lyons
Stanford University
Vector data classes
SpatialPoints, SpatialPointsDataFrame
SpatialLines, SpatialLinesDataFrame
SpatialPolygons, SpatialPolygonsDataFrame
To see all spatial data classes:
require(sp)
getClass('Spatial')
Raster classes
SpatialPixels
SpatialGrid
Three options:
require(sp)
xy <- matrix(data=runif(n=100), ncol=2)
head(xy)
[,1] [,2]
[1,] 0.7185 0.2928
[2,] 0.4791 0.2193
[3,] 0.8507 0.5771
[4,] 0.8482 0.6412
[5,] 0.2603 0.5139
[6,] 0.4422 0.2934
xySp <- SpatialPoints(xy)
summary(xySp)
Object of class SpatialPoints
Coordinates:
min max
coords.x1 0.01442 0.9882
coords.x2 0.01046 0.9992
Is projected: NA
proj4string : [NA]
Number of points: 50
plot(xySp)
xy_offset <- xy + 0.01
xySp_offset <- SpatialPoints(xy_offset)
xySp_with_offsets <- rbind(xySp, xySp_offset)
plot(xySp_with_offsets)
## See which datasets come bundled with the maps package
## data(package="maps")
## Load the data frame called 'canada.cities'
require(maps)
data(canada.cities)
head(canada.cities)
name country.etc pop lat long capital
1 Abbotsford BC BC 157795 49.06 -122.30 0
2 Acton ON ON 8308 43.63 -80.03 0
3 Acton Vale QC QC 5153 45.63 -72.57 0
4 Airdrie AB AB 25863 51.30 -114.02 0
5 Aklavik NT NT 643 68.22 -135.00 0
6 Albanel QC QC 1090 48.87 -72.42 0
## Get just the cities just for Ontario
citiesOnt <- canada.cities[canada.cities$country.etc == "ON",]
## Promote this data frame to a SpatialPointsDataFrame
## by stating which columns contain the x and y coordinates
coordinates(citiesOnt) <- ~long + lat
class(citiesOnt)
[1] "SpatialPointsDataFrame"
attr(,"package")
[1] "sp"
plot(citiesOnt)
nrow(citiesOnt@data)
[1] 258
head(citiesOnt@data)
name country.etc pop capital
2 Acton ON ON 8308 0
10 Alexandria ON ON 3604 0
12 Alliston ON ON 10353 0
14 Almonte ON ON 4984 0
17 Amherstburg ON ON 11605 0
18 Amigo Beach ON ON 1145 0
head(citiesOnt@data)
name country.etc pop capital
2 Acton ON ON 8308 0
10 Alexandria ON ON 3604 0
12 Alliston ON ON 10353 0
14 Almonte ON ON 4984 0
17 Amherstburg ON ON 11605 0
18 Amigo Beach ON ON 1145 0
Fortunately we can get read them with the coordinates() function
head(coordinates(citiesOnt))
long lat
2 -80.03 43.63
10 -74.63 45.30
12 -79.87 44.15
14 -76.20 45.22
17 -83.10 42.10
18 -79.34 44.77
selectForStudy <- sample(c(FALSE,TRUE), size=nrow(citiesOnt@data), replace=TRUE)
summary(selectForStudy)
Mode FALSE TRUE NA's
logical 141 117 0
citiesOnt@data[["samp"]] <- selectForStudy
head(citiesOnt@data)
name country.etc pop capital samp
2 Acton ON ON 8308 0 TRUE
10 Alexandria ON ON 3604 0 TRUE
12 Alliston ON ON 10353 0 FALSE
14 Almonte ON ON 4984 0 FALSE
17 Amherstburg ON ON 11605 0 FALSE
18 Amigo Beach ON ON 1145 0 TRUE
summary(citiesOnt@data$pop)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1080 2090 4010 41300 9720 4670000
bigOnt <- citiesOnt@data$pop > 100000
table(bigOnt)
bigOnt
FALSE TRUE
245 13
## Plot the cities
plot(citiesOnt, axes=T, asp=1, pch=16)
## Highlight big cities
plot(citiesOnt[bigOnt, ], pch=1, col="red", cex=3, add=TRUE)
ogrDrivers()package rgdal has a large set of import and export filters for both vector and raster GIS data
require(rgdal)
head(ogrDrivers())
name write
1 AeronavFAA FALSE
2 ARCGEN FALSE
3 AVCBin FALSE
4 AVCE00 FALSE
5 BNA TRUE
6 CSV TRUE
Let's get one of the sample shapefiles that comes with the rgdal package
vectorDir <- system.file("vectors", package="rgdal")
vectorDir
[1] "C:/Users/Andy/Documents/R/win-library/3.0/rgdal/vectors"
list.files(vectorDir, pattern="*.shp")
[1] "cities.shp" "kiritimati_primary_roads.shp"
[3] "scot_BNG.shp" "trin_inca_pl03.shp"
orgInfo() returns metadata about GIS data saved on disk. Note how it also reads the projection info
ogrInfo(dsn=vectorDir, layer="scot_BNG")
Source: "C:/Users/Andy/Documents/R/win-library/3.0/rgdal/vectors", layer: "scot_BNG"
Driver: ESRI Shapefile number of rows 56
Feature type: wkbPolygon with 2 dimensions
Extent: (7095 529495) - (468285 1218342)
CRS: +proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +datum=OSGB36 +units=m +no_defs
LDID: 0
Number of fields: 13
name type length typeName
1 SP_ID 4 5 String
2 NAME 4 13 String
3 ID_x 2 19 Real
4 COUNT 2 19 Real
5 SMR 2 19 Real
6 LONG 2 19 Real
7 LAT 2 19 Real
8 PY 2 19 Real
9 EXP_ 2 19 Real
10 AFF 2 19 Real
11 X_COOR 2 19 Real
12 Y_COOR 2 19 Real
13 ID_y 2 19 Real
readOGR() is the function you use to import a vector file.
scotland <- readOGR(dsn=vectorDir, layer="scot_BNG")
OGR data source with driver: ESRI Shapefile
Source: "C:/Users/Andy/Documents/R/win-library/3.0/rgdal/vectors", layer: "scot_BNG"
with 56 features and 13 fields
Feature type: wkbPolygon with 2 dimensions
class(scotland)
[1] "SpatialPolygonsDataFrame"
attr(,"package")
[1] "sp"
plot(scotland, axes=T, asp=1, col=palette())
Inspect attached data frame (attribute table).
head(scotland@data)
SP_ID NAME ID_x COUNT SMR LONG LAT PY EXP_ AFF X_COOR
0 0 Sutherland 12 5 279.3 58.06 4.64 37521 1.8 16 247757
1 1 Nairn 13 3 277.8 57.47 3.98 29374 1.1 10 289592
2 2 Inverness 19 9 162.7 57.24 4.73 162867 5.5 7 249685
3 3 Banff-Buchan 2 39 450.3 57.56 2.36 231337 8.7 16 385776
4 4 Bedenoch 17 2 186.9 57.06 4.09 27075 1.1 10 280492
5 5 Kincardine 16 9 197.8 57.00 3.00 111665 4.6 16 351351
Y_COOR ID_y
0 924954 12
1 842305 13
2 825855 19
3 852378 2
4 801036 17
5 792162 16
The raster package has a function getData() to grab both vector and raster data from online repositories.
require(raster)
args(getData)
function (name = "GADM", download = TRUE, path = "", ...)
NULL
peruRegions <- getData('GADM', country='PER', level=1)
class(peruRegions)
[1] "SpatialPolygonsDataFrame"
attr(,"package")
[1] "sp"
plot(peruRegions, axes=T, asp=1)
First, let's create some fake GPS points in Peru:
ctr <- c(-77, -8)
x <- runif(n=20, min=ctr[1] - 1, max=ctr[1] + 1)
y <- runif(n=20, min=ctr[2] - 1, max=ctr[2] + 1)
mygps_df <- data.frame(long=x, lat=y)
head(mygps_df)
long lat
1 -76.72 -8.597
2 -77.98 -8.257
3 -77.91 -7.927
4 -77.34 -7.801
5 -76.52 -7.033
6 -77.54 -8.299
mygps_spdf <- SpatialPoints(mygps_df)
mygps_spdf
class : SpatialPoints
features : 20
extent : -77.98, -76.07, -8.977, -7.033 (xmin, xmax, ymin, ymax)
coord. ref. : NA
projection(mygps_spdf) <- CRS("+proj=longlat +ellps=WGS84")
plot(peruRegions, axes=T, asp=1)
plot(mygps_spdf, pch=16, col="red", add=TRUE)
Note: we are not projecting coordinates here, only entering known projection info.
require(rgdal)
mygps_utm <- spTransform(mygps_spdf, CRS("+proj=utm +south +zone=18 +ellps=WGS84"))
summary(mygps_utm)
Object of class SpatialPoints
Coordinates:
min max
long 171893 382039
lat 9007005 9222332
Is projected: TRUE
proj4string : [+proj=utm +south +zone=18 +ellps=WGS84]
Number of points: 20
peruRegions_utm <- spTransform(peruRegions, CRS("+proj=utm +south +zone=18 +ellps=WGS84"))
plot(peruRegions_utm, axes=TRUE)
plot(mygps_utm, add=TRUE, col="red", pch=16)
require(rgeos)
inver <- scotland[scotland@data$NAME == "Inverness", ]
plot(inver, axes=TRUE, asp=1)
inver_buf <- gBuffer(inver, width=2000)
plot(inver_buf, border="blue", lty=2, lwd=2, add=TRUE)
delTri <- gDelaunayTriangulation(mygps_utm)
plot(delTri, axes=TRUE)
plot(mygps_utm, col="red", pch=16, cex=2, add=TRUE)
Writable ogrDrivers
x <- ogrDrivers()
as.character(x[x$write==TRUE,"name"])
[1] "BNA" "CSV" "DGN" "DXF"
[5] "ESRI Shapefile" "Geoconcept" "GeoJSON" "GeoRSS"
[9] "GML" "GMT" "GPSBabel" "GPSTrackMaker"
[13] "GPX" "KML" "MapInfo File" "Memory"
[17] "MSSQLSpatial" "ODBC" "ODS" "PCIDSK"
[21] "PDF" "PGDump" "S57" "TIGER"
[25] "XLSX"
projection(citiesOnt) <- CRS("+proj=longlat +ellps=WGS84 +datum=WGS84")
writeOGR(citiesOnt[citiesOnt[["samp"]], ], dsn="OntarioCities.kml", layer="Ontario Cities", driver="KML", dataset_options=c("NameField=name"))
Sources
require(raster)
RasterLayer - a single layer rasterRasterStack - multi-layer rasterRasterBrick - multi-layer raster (from a single file on disk, faster)Note that Raster* objects can live in memory or disk (see vignette for details)
myrast <- raster(extent(c(0,20,0,20)), ncol=10, nrow=10)
myrast[] <- 1:100
myrast
class : RasterLayer
dimensions : 10, 10, 100 (nrow, ncol, ncell)
resolution : 2, 2 (x, y)
extent : 0, 20, 0, 20 (xmin, xmax, ymin, ymax)
coord. ref. : NA
data source : in memory
names : layer
values : 1, 100 (min, max)
plot(myrast)
myrastSquared <- -1 * myrast^2
plot(myrastSquared)
rgdalDir <- system.file("pictures", package="rgdal")
rgdalDir
[1] "C:/Users/Andy/Documents/R/win-library/3.0/rgdal/pictures"
chicago.tif <- file.path(rgdalDir, "SP27GTIF.TIF")
file.exists(chicago.tif)
[1] TRUE
GDALinfo(chicago.tif)
rows 929
columns 699
bands 1
lower left origin.x 681480
lower left origin.y 1882579
res.x 32.8
res.y 32.8
ysign -1
oblique.x 0
oblique.y 0
driver GTiff
projection +proj=tmerc +lat_0=36.66666666666666 +lon_0=-88.33333333333333
+k=0.999975 +x_0=152400.3048006096 +y_0=0 +datum=NAD27
+units=us-ft +no_defs
file C:/Users/Andy/Documents/R/win-library/3.0/rgdal/pictures/SP27GTIF.TIF
apparent band summary:
GDType hasNoDataValue NoDataValue blockSize1 blockSize2
1 Byte FALSE 0 11 699
apparent band statistics:
Bmin Bmax Bmean Bsd
1 0 255 NA NA
Metadata:
AREA_OR_POINT=Area
TIFFTAG_RESOLUTIONUNIT=1 (unitless)
TIFFTAG_XRESOLUTION=72
TIFFTAG_YRESOLUTION=72
chicago.sgdf <- readGDAL(chicago.tif)
C:/Users/Andy/Documents/R/win-library/3.0/rgdal/pictures/SP27GTIF.TIF has GDAL driver GTiff
and has 929 rows and 699 columns
image(chicago.sgdf, col=gray(0:255/255))
demPeru <- getData('SRTM', lon=-77, lat=-8, download=TRUE)
demPeru
class : RasterLayer
dimensions : 6000, 6000, 3.6e+07 (nrow, ncol, ncell)
resolution : 0.0008333, 0.0008333 (x, y)
extent : -80, -75, -10, -5 (xmin, xmax, ymin, ymax)
coord. ref. : +proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0
data source : C:\GitHub\rgeospatialdata\srtm_21_14.TIF
names : srtm_21_14
values : -32768, 32767 (min, max)
my_ext <- extent(-78,-76,-9,-7)
demCentralPeru <- crop(demPeru, my_ext)
rm(demPeru) ## we don't need this big raster anymore so lets get the memory back
plot(demCentralPeru)
peruSlope <- terrain(demCentralPeru, opt="slope")
peruAspect <- terrain(demCentralPeru, opt="aspect")
peruHill <- hillShade(peruSlope, peruAspect, 40,270)
plot(peruHill, col=grey(0:100/100), legend=FALSE)
hist(getValues(peruSlope))
## Create a blank copy of peruSlope
steepSlope <- raster(peruSlope)
## Fill in cells where slope > 0.5
steepSlope[peruSlope > 0.5] <- 1
plot(demCentralPeru)
plot(steepSlope, col="red", add=TRUE, legend=FALSE)
demCentralPeruUTM <- projectRaster(demCentralPeru, crs=CRS("+proj=utm +south +zone=18 +ellps=WGS84"))
demCentralPeruUTM
class : RasterLayer
dimensions : 2421, 2421, 5861241 (nrow, ncol, ncell)
resolution : 91.9, 92.2 (x, y)
extent : 168039, 390529, 9003378, 9226595 (xmin, xmax, ymin, ymax)
coord. ref. : +proj=utm +south +zone=18 +ellps=WGS84
data source : in memory
names : srtm_21_14
values : 223.7, 6161 (min, max)
demCentralPeru1km <- aggregate(demCentralPeruUTM, fact=11, fun=mean)
demCentralPeru1km
class : RasterLayer
dimensions : 221, 221, 48841 (nrow, ncol, ncell)
resolution : 1011, 1014 (x, y)
extent : 168039, 391448, 9002456, 9226595 (xmin, xmax, ymin, ymax)
coord. ref. : +proj=utm +south +zone=18 +ellps=WGS84
data source : in memory
names : srtm_21_14
values : 228.3, 5678 (min, max)
plot(demCentralPeruUTM, xlim=c(250000,300000), ylim=c(9100000,9150000))
plot(demCentralPeru1km, xlim=c(250000,300000), ylim=c(9100000,9150000))
plot(demCentralPeru)
plot(mygps_spdf, pch=16, col="red", add=TRUE)
extract(demCentralPeru, mygps_spdf)
[1] 2569 3428 3387 3144 240 3794 3296 1092 4214 3780 3482 3613 3576 4499
[15] 2214 3163 251 1127 2635 1705
samplePtsRast <- rasterize(mygps_spdf, demCentralPeru, field=1)
dist2SamplePtsRast <- distance(samplePtsRast)
plot(dist2SamplePtsRast)
plot(mygps_spdf, col="red", pch=16, add=TRUE)
require(ggmap)
bboxPeru <- c(-78,-9,-76,-7)
ggMapPeru <- get_map(location = bboxPeru, color = "color", source = "google", maptype = "satellite")
ggmap(ggMapPeru, extent = "device", ylab = "Latitude", xlab = "Longitude") + geom_point(aes(x = long, y = lat, colour = "red"), data = as.data.frame(coordinates(mygps_spdf))) + theme(legend.position="none")
require(ggmap)
loc <- geocode('Euclid and Hearst, Berkeley, CA')
loc
lon lat
1 -122.3 37.88
ihouse <- geocode('International House, UC Berkeley', output="more")
ihouse
lon lat type loctype
1 -122.3 37.87 bus_station approximate
address
1 international house: piedmont avenue @ bancroft way, university of california, berkeley, berkeley, ca 94720, usa
north south east west postal_code country
1 37.87 37.87 -122.3 -122.3 94720 united states
administrative_area_level_2 administrative_area_level_1 locality street
1 alameda county california berkeley <NA>
streetNo point_of_interest query
1 NA <NA> International House, UC Berkeley
qmap(location=c(loc$lon, loc$lat), zoom = 18, maptype = "hybrid") + geom_point(aes(x=lon, y=lat, size=15, colour="red"), data=loc) + theme(legend.position="none")
Overview of Spatial Packages
Tutorials
GIS
R
GIS
R