Testing speed of in DB and out DB queries

Files avaliable here.

https://dl.dropboxusercontent.com/u/2703650/Courses/geostats/geostats.zip

Set working directory

setwd("/home/duncan/Dropbox/Public/Courses/geostats")
rm(list = ls())
library(RODBC)
library(rgdal)
## Loading required package: sp
## rgdal: version: 0.8-10, (SVN revision 478) Geospatial Data Abstraction
## Library extensions to R successfully loaded Loaded GDAL runtime: GDAL
## 1.9.2, released 2012/10/08 Path to GDAL shared files: /usr/share/gdal/1.9
## Loaded PROJ.4 runtime: Rel. 4.8.0, 6 March 2012, [PJ_VERSION: 480] Path to
## PROJ.4 shared files: (autodetected)

Creating a new data base

# system('dropdb geostats') If already exists
system("createdb geostats")

Create ODBC connection (need to edit odb.ini first of)

sudo gedit /etc/odbc.ini

The example database is called “geostats”. Paste the following text into this file and save it.

[geostats]
Driver = /usr/lib/i386-linux-gnu/odbc/psqlodbcw.so
Database = geostats
Servername = localhost
Username = postgres
Password = postgres
Protocol = 8.2.5
ReadOnly = 0
con <- odbcConnect("geostats")
odbcQuery(con, "CREATE EXTENSION POSTGIS")
## [1] 1
odbcQuery(con, "CREATE EXTENSION PLR")
## [1] 1

Add a points shapefile

com <- "shp2pgsql -s 4326 -I shapefiles/oaks.shp oaks| psql -d geostats;"
system(com)

Load layers out db (elev_out) and in db (elev_in)

command <- "raster2pgsql -s 4326 -d  -M -R /home/duncan/geostats/elevation.tif -F -t 100x100 elev_out|psql -d geostats"
system(command)
command <- "raster2pgsql -s 4326 -d  -M /home/duncan/geostats/elevation.tif -F -t 100x100 elev_in|psql -d geostats"
system(command)

Timed points overlay In DB

query <- "select o.gid,genus,species,st_value(rast,geom) from elev_in, oaks o where st_intersects(geom,rast)"
system.time(d <- sqlQuery(con, query))
##    user  system elapsed 
##   0.184   0.012  27.860

Timed points overlay Out DB

query <- "select o.gid,genus,species,st_value(rast,geom) from elev_out, oaks o where st_intersects(geom,rast)"
system.time(d <- sqlQuery(con, query))
##    user  system elapsed 
##   0.140   0.000   4.041

Add a PLR function

query <- "CREATE OR REPLACE FUNCTION range_elev (float[]) RETURNS float AS '\nx<-arg1\nx<-as.numeric(as.character(x))\nx<-na.omit(x)\nmax(x)-min(x)'\nLANGUAGE 'plr' STRICT;"
odbcQuery(con, query)
## [1] 1

More complex query involving buffer around points

query <- "select gid,genus,species,range_elev((st_dumpvalues(st_union(st_clip(rast,geom)))).valarray) from (select gid,species,genus, st_buffer(geom::geography,2000)::geometry geom from oaks limit 100) o, elev_out  where st_intersects(rast,geom) group by gid,genus,species;"
system.time(d <- sqlQuery(con, query))
##    user  system elapsed 
##   0.004   0.000   0.415

In DB version

query <- "select gid,genus,species,range_elev((st_dumpvalues(st_union(st_clip(rast,geom)))).valarray) from (select gid,species,genus, st_buffer(geom::geography,2000)::geometry geom from oaks limit 100) o, elev_in  where st_intersects(rast,geom) group by gid,genus,species;"
system.time(d <- sqlQuery(con, query))
##    user  system elapsed 
##   0.004   0.000   5.621
odbcClose(con)
system("dropdb geostats")