In this example I will use QGIS geoprocessing scripts through the RQGIS library in R. I will use data from the 2005 Peru Demographic and Health Survey, where I construct Voronoi polygons from the primary sampling unit locations and generate estimates of educational attainment for women.

library(sf)

## Linking to GEOS 3.6.1, GDAL 2.2.3, proj.4 4.9.3

library(RQGIS) #you need Qgis 2.18 installed, not 3!

## Loading required package: reticulate

library(mapview)

peru_dhs_points<-st_read("P:/LAPIVpaper/PEGE5BFL", "PEGE52FL")

## Reading layer `PEGE52FL' from data source `P:\LAPIVpaper\PEGE5BFL' using driver `ESRI Shapefile'
## Simple feature collection with 1414 features and 20 fields
## geometry type:  POINT
## dimension:      XY
## bbox:           xmin: -81.30633 ymin: -18.22461 xmax: 0 ymax: 0
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs

peru_dhs_points<-st_transform(peru_dhs_points, crs=24892)
#project the data into a meter based system
mapview(peru_dhs_points["DHSCLUST"])

Set up QGIS environment

This lets R find where your QGIS binaries are located. set_env() should work without you specifying the path

my_env<-set_env()

## Trying to find QGIS in C:/OSGEO4~1.

Buffer analysis

First we find the name of the algorithm for point buffering

find_algorithms(search_term = "buffer", qgis_env = my_env)

##  [1] "Fixed distance buffer-------------------------------->qgis:fixeddistancebuffer"                                                                                               
##  [2] "Variable distance buffer----------------------------->qgis:variabledistancebuffer"                                                                                            
##  [3] "r.buffer - Creates a raster map layer showing buffer zones surrounding cells that contain non-NULL category values.--->grass7:r.buffer"                                       
##  [4] "r.buffer.lowmem - Creates a raster map layer showing buffer zones surrounding cells that contain non-NULL category values (low-memory alternative).--->grass7:r.buffer.lowmem"
##  [5] "v.buffer.column - Creates a buffer around features of given type.--->grass7:v.buffer.column"                                                                                  
##  [6] "v.buffer.distance - Creates a buffer around features of given type.--->grass7:v.buffer.distance"                                                                              
##  [7] "Fixed distance buffer-------------------------------->saga:shapesbufferfixeddistance"                                                                                         
##  [8] "Raster buffer---------------------------------------->saga:gridbuffer"                                                                                                        
##  [9] "Raster proximity buffer------------------------------>saga:gridproximitybuffer"                                                                                               
## [10] "Threshold raster buffer------------------------------>saga:thresholdbuffer"                                                                                                   
## [11] "Variable distance buffer----------------------------->saga:shapesbufferattributedistance"                                                                                     
## [12] "Buffer vectors--------------------------------------->gdalogr:buffervectors"                                                                                                  
## [13] "Single sided buffers (and offset lines) for lines---->gdalogr:singlesidedbuffersandoffsetlinesforlines"                                                                       
## [14] "r.buffer - Creates a raster map layer showing buffer zones surrounding cells that contain non-NULL category values.--->grass:r.buffer"                                        
## [15] "v.buffer.column - Creates a buffer around features of given type.--->grass:v.buffer.column"                                                                                   
## [16] "v.buffer.distance - Creates a buffer around features of given type.--->grass:v.buffer.distance"

For this, we’ll use the qgis:fixeddistancebuffer function, but we need to see what the arguments to the function are:

get_usage(alg="qgis:fixeddistancebuffer", qgis_env = my_env, intern = F)

## ALGORITHM: Fixed distance buffer
##  INPUT <ParameterVector>
##  DISTANCE <ParameterNumber>
##  SEGMENTS <ParameterNumber>
##  DISSOLVE <ParameterBoolean>
##  OUTPUT <OutputVector>

so we have 5 arguments, when we use the function, we need to specify all of these:

params <- get_args_man(alg = "qgis:fixeddistancebuffer", qgis_env = my_env)
params

## $INPUT
## [1] "None"
## 
## $DISTANCE
## [1] "10.0"
## 
## $SEGMENTS
## [1] "5"
## 
## $DISSOLVE
## [1] "False"
## 
## $OUTPUT
## [1] "None"

Here I do a 5km buffer around each PSU location.

wd<-"C:/Users/ozd504/Google Drive/classes/dem7093/GIS_class_2018/data"
params$INPUT <-peru_dhs_points
params$DISTANCE<-5000 #5km around each point
params$OUTPUT<-file.path(wd, "peru_psu_buffer_5k.shp") # path to the output shapefile

now we have our parameters defined, we run the script:

peru_buff <- run_qgis(alg = "qgis:fixeddistancebuffer",
                    params = params,
                    load_output = TRUE,
                    qgis_env = my_env)

## $OUTPUT
## [1] "C:/Users/ozd504/Google Drive/classes/dem7093/GIS_class_2018/data/peru_psu_buffer_5k.shp"

mapview(peru_buff["DHSCLUST"], legend=T,map.types="OpenStreetMap")

5km buffers done! Now, If I only had some other point data, I could do some point in polygon operations!

Vonoroi polygons

find_algorithms(search_term = "Voronoi")

## [1] "Voronoi polygons------------------------------------->qgis:voronoipolygons"                             
## [2] "v.voronoi - Creates a Voronoi diagram from an input vector layer containing points.--->grass7:v.voronoi"
## [3] "v.voronoi - Creates a Voronoi diagram from an input vector layer containing points.--->grass:v.voronoi"

get_usage(alg="qgis:voronoipolygons", qgis_env = my_env, intern = F)

## ALGORITHM: Voronoi polygons
##  INPUT <ParameterVector>
##  BUFFER <ParameterNumber>
##  OUTPUT <OutputVector>

so we have 3 arguments, when we use the function,:

params <- get_args_man(alg = "qgis:voronoipolygons", qgis_env = my_env)
params

## $INPUT
## [1] "None"
## 
## $BUFFER
## [1] "0.0"
## 
## $OUTPUT
## [1] "None"

wd<-"C:/Users/ozd504/Google Drive/classes/dem7093/GIS_class_2018/data"
params$INPUT <-peru_dhs_points
params$OUTPUT<-file.path(wd, "peru_psu_von_poly.shp") # path to the output shapefile

now we have our parameters defined, we run the script:

peru_von <- run_qgis(alg = "qgis:voronoipolygons",
                    params = params,
                    load_output = TRUE,
                    qgis_env = my_env)

## $OUTPUT
## [1] "C:/Users/ozd504/Google Drive/classes/dem7093/GIS_class_2018/data/peru_psu_von_poly.shp"

mapview(peru_von["DHSCLUST"],map.types="OpenStreetMap")

The polygons obviously have issues, so we can clip them to the Peruvian border:

find_algorithms(search_term = "clip")

##  [1] "Clip------------------------------------------------->qgis:clip"                    
##  [2] "Clip Data-------------------------------------------->lidartools:clipdata"          
##  [3] "Poly Clip Data--------------------------------------->lidartools:polyclipdata"      
##  [4] "lasclip---------------------------------------------->lidartools:lasclip"           
##  [5] "Clip points with polygons---------------------------->saga:clippointswithpolygons"  
##  [6] "Clip raster with polygon----------------------------->saga:clipgridwithpolygon"     
##  [7] "Polygon clipping------------------------------------->saga:polygonclipping"         
##  [8] "Clip raster by extent-------------------------------->gdalogr:cliprasterbyextent"   
##  [9] "Clip raster by mask layer---------------------------->gdalogr:cliprasterbymasklayer"
## [10] "Clip vectors by extent------------------------------->gdalogr:clipvectorsbyextent"  
## [11] "Clip vectors by polygon------------------------------>gdalogr:clipvectorsbypolygon"

get_usage(alg="qgis:clip", qgis_env = my_env, intern = F)

## ALGORITHM: Clip
##  INPUT <ParameterVector>
##  OVERLAY <ParameterVector>
##  OUTPUT <OutputVector>

so we have 3 arguments, when we use the function,:

params <- get_args_man(alg = "qgis:clip", qgis_env = my_env)
params

## $INPUT
## [1] "None"
## 
## $OVERLAY
## [1] "None"
## 
## $OUTPUT
## [1] "None"

peru_border<-st_read("P:/LAPIVpaper/PER_adm", "PER_adm0")

## Reading layer `PER_adm0' from data source `P:\LAPIVpaper\PER_adm' using driver `ESRI Shapefile'
## Simple feature collection with 1 feature and 70 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -81.3307 ymin: -18.3518 xmax: -68.65311 ymax: -0.03747
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs

peru_border<-st_transform(peru_border,crs=24892)

wd<-"C:/Users/ozd504/Google Drive/classes/dem7093/GIS_class_2018/data"
params$INPUT <-peru_von
params$OVERLAY<-peru_border
params$OUTPUT<-file.path(wd, "peru_psu_von_poly_clip.shp") # path to the output shapefile

now we have our parameters defined, we run the script:

peru_von_clip <- run_qgis(alg = "qgis:clip",
                    params = params,
                    load_output = TRUE,
                    qgis_env = my_env)

## $OUTPUT
## [1] "C:/Users/ozd504/Google Drive/classes/dem7093/GIS_class_2018/data/peru_psu_von_poly_clip.shp"

mapview(peru_von_clip["DHSCLUST"],map.types="OpenStreetMap")

Map some estimates:

If we have the 2005 DHS data, we could generate estimates of some quality, say proportion of women with a secondary education or more:

library(haven)
library(survey)

## Loading required package: grid

## Loading required package: Matrix

## Loading required package: survival

## 
## Attaching package: 'survey'

## The following object is masked from 'package:graphics':
## 
##     dotchart

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

peru_data<-read_dta("P:/LAPIVpaper/DHS_IPVdata/PEIR51FL.DTA")

peru_data<-peru_data%>%
  mutate(eduprim=ifelse(v106==1,1,0),
  edusecplus=ifelse(v106!=9&v106>=2,1,0),
  pwt=v005/1000000)%>%
  select(v000, v021, v022,v024,pwt, eduprim, edusecplus )

head(peru_data)

## # A tibble: 6 x 7
##   v000   v021  v022 v024        pwt eduprim edusecplus
##   <chr> <dbl> <dbl> <dbl+lbl> <dbl>   <dbl>      <dbl>
## 1 PE5       5     1 1         0.437       0          1
## 2 PE5       5     1 1         0.437       0          1
## 3 PE5       5     1 1         0.437       0          1
## 4 PE5       5     1 1         0.437       0          1
## 5 PE5       5     1 1         0.437       0          1
## 6 PE5       5     1 1         0.437       0          1

library(lme4)
fit<-glmer(edusecplus~1+(1|v022/v021),data=peru_data, family = binomial, weights=pwt)

## Warning in eval(family$initialize, rho): non-integer #successes in a
## binomial glm!

peru_data$edu_ests<-fitted(fit)
place_ests<-aggregate(edu_ests~v021, data=peru_data, FUN=mean)
head(place_ests)

##   v021  edu_ests
## 1    1 0.7627587
## 2    2 0.7356660
## 3    3 0.8597187
## 4    4 0.8915284
## 5    5 0.7499289
## 6    6 0.6600142

Merge the estimates to the polygons

peru_ests<-left_join(peru_von_clip, place_ests, by=c("DHSCLUST"= "v021"))


mapview(peru_ests["edu_ests"], legend=T,map.types="OpenStreetMap")

DEM 7093 - Using R for Point analysis Part 1

Corey S. Sparks, Ph.D. - University of Texas at San Antonio

June 12, 2018

Set up QGIS environment

Buffer analysis

Vonoroi polygons

Map some estimates: