In this example I will use QGIS geoprocessing scripts through the RQGIS library in R. I will use data from the 2005 Peru Demographic and Health Survey, where I construct Voronoi polygons from the primary sampling unit locations and generate estimates of educational attainment for women.
library(sf)
## Linking to GEOS 3.6.1, GDAL 2.2.3, proj.4 4.9.3
library(RQGIS) #you need Qgis 2.18 installed, not 3!
## Loading required package: reticulate
library(mapview)
peru_dhs_points<-st_read("P:/LAPIVpaper/PEGE5BFL", "PEGE52FL")
## Reading layer `PEGE52FL' from data source `P:\LAPIVpaper\PEGE5BFL' using driver `ESRI Shapefile'
## Simple feature collection with 1414 features and 20 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: -81.30633 ymin: -18.22461 xmax: 0 ymax: 0
## epsg (SRID): 4326
## proj4string: +proj=longlat +datum=WGS84 +no_defs
peru_dhs_points<-st_transform(peru_dhs_points, crs=24892)
#project the data into a meter based system
mapview(peru_dhs_points["DHSCLUST"])
This lets R find where your QGIS binaries are located. set_env() should work without you specifying the path
my_env<-set_env()
## Trying to find QGIS in C:/OSGEO4~1.
First we find the name of the algorithm for point buffering
find_algorithms(search_term = "buffer", qgis_env = my_env)
## [1] "Fixed distance buffer-------------------------------->qgis:fixeddistancebuffer"
## [2] "Variable distance buffer----------------------------->qgis:variabledistancebuffer"
## [3] "r.buffer - Creates a raster map layer showing buffer zones surrounding cells that contain non-NULL category values.--->grass7:r.buffer"
## [4] "r.buffer.lowmem - Creates a raster map layer showing buffer zones surrounding cells that contain non-NULL category values (low-memory alternative).--->grass7:r.buffer.lowmem"
## [5] "v.buffer.column - Creates a buffer around features of given type.--->grass7:v.buffer.column"
## [6] "v.buffer.distance - Creates a buffer around features of given type.--->grass7:v.buffer.distance"
## [7] "Fixed distance buffer-------------------------------->saga:shapesbufferfixeddistance"
## [8] "Raster buffer---------------------------------------->saga:gridbuffer"
## [9] "Raster proximity buffer------------------------------>saga:gridproximitybuffer"
## [10] "Threshold raster buffer------------------------------>saga:thresholdbuffer"
## [11] "Variable distance buffer----------------------------->saga:shapesbufferattributedistance"
## [12] "Buffer vectors--------------------------------------->gdalogr:buffervectors"
## [13] "Single sided buffers (and offset lines) for lines---->gdalogr:singlesidedbuffersandoffsetlinesforlines"
## [14] "r.buffer - Creates a raster map layer showing buffer zones surrounding cells that contain non-NULL category values.--->grass:r.buffer"
## [15] "v.buffer.column - Creates a buffer around features of given type.--->grass:v.buffer.column"
## [16] "v.buffer.distance - Creates a buffer around features of given type.--->grass:v.buffer.distance"
For this, we’ll use the qgis:fixeddistancebuffer function, but we need to see what the arguments to the function are:
get_usage(alg="qgis:fixeddistancebuffer", qgis_env = my_env, intern = F)
## ALGORITHM: Fixed distance buffer
## INPUT <ParameterVector>
## DISTANCE <ParameterNumber>
## SEGMENTS <ParameterNumber>
## DISSOLVE <ParameterBoolean>
## OUTPUT <OutputVector>
so we have 5 arguments, when we use the function, we need to specify all of these:
params <- get_args_man(alg = "qgis:fixeddistancebuffer", qgis_env = my_env)
params
## $INPUT
## [1] "None"
##
## $DISTANCE
## [1] "10.0"
##
## $SEGMENTS
## [1] "5"
##
## $DISSOLVE
## [1] "False"
##
## $OUTPUT
## [1] "None"
Here I do a 5km buffer around each PSU location.
wd<-"C:/Users/ozd504/Google Drive/classes/dem7093/GIS_class_2018/data"
params$INPUT <-peru_dhs_points
params$DISTANCE<-5000 #5km around each point
params$OUTPUT<-file.path(wd, "peru_psu_buffer_5k.shp") # path to the output shapefile
now we have our parameters defined, we run the script:
peru_buff <- run_qgis(alg = "qgis:fixeddistancebuffer",
params = params,
load_output = TRUE,
qgis_env = my_env)
## $OUTPUT
## [1] "C:/Users/ozd504/Google Drive/classes/dem7093/GIS_class_2018/data/peru_psu_buffer_5k.shp"
mapview(peru_buff["DHSCLUST"], legend=T,map.types="OpenStreetMap")
5km buffers done! Now, If I only had some other point data, I could do some point in polygon operations!
find_algorithms(search_term = "Voronoi")
## [1] "Voronoi polygons------------------------------------->qgis:voronoipolygons"
## [2] "v.voronoi - Creates a Voronoi diagram from an input vector layer containing points.--->grass7:v.voronoi"
## [3] "v.voronoi - Creates a Voronoi diagram from an input vector layer containing points.--->grass:v.voronoi"
get_usage(alg="qgis:voronoipolygons", qgis_env = my_env, intern = F)
## ALGORITHM: Voronoi polygons
## INPUT <ParameterVector>
## BUFFER <ParameterNumber>
## OUTPUT <OutputVector>
so we have 3 arguments, when we use the function,:
params <- get_args_man(alg = "qgis:voronoipolygons", qgis_env = my_env)
params
## $INPUT
## [1] "None"
##
## $BUFFER
## [1] "0.0"
##
## $OUTPUT
## [1] "None"
wd<-"C:/Users/ozd504/Google Drive/classes/dem7093/GIS_class_2018/data"
params$INPUT <-peru_dhs_points
params$OUTPUT<-file.path(wd, "peru_psu_von_poly.shp") # path to the output shapefile
now we have our parameters defined, we run the script:
peru_von <- run_qgis(alg = "qgis:voronoipolygons",
params = params,
load_output = TRUE,
qgis_env = my_env)
## $OUTPUT
## [1] "C:/Users/ozd504/Google Drive/classes/dem7093/GIS_class_2018/data/peru_psu_von_poly.shp"
mapview(peru_von["DHSCLUST"],map.types="OpenStreetMap")
The polygons obviously have issues, so we can clip them to the Peruvian border:
find_algorithms(search_term = "clip")
## [1] "Clip------------------------------------------------->qgis:clip"
## [2] "Clip Data-------------------------------------------->lidartools:clipdata"
## [3] "Poly Clip Data--------------------------------------->lidartools:polyclipdata"
## [4] "lasclip---------------------------------------------->lidartools:lasclip"
## [5] "Clip points with polygons---------------------------->saga:clippointswithpolygons"
## [6] "Clip raster with polygon----------------------------->saga:clipgridwithpolygon"
## [7] "Polygon clipping------------------------------------->saga:polygonclipping"
## [8] "Clip raster by extent-------------------------------->gdalogr:cliprasterbyextent"
## [9] "Clip raster by mask layer---------------------------->gdalogr:cliprasterbymasklayer"
## [10] "Clip vectors by extent------------------------------->gdalogr:clipvectorsbyextent"
## [11] "Clip vectors by polygon------------------------------>gdalogr:clipvectorsbypolygon"
get_usage(alg="qgis:clip", qgis_env = my_env, intern = F)
## ALGORITHM: Clip
## INPUT <ParameterVector>
## OVERLAY <ParameterVector>
## OUTPUT <OutputVector>
so we have 3 arguments, when we use the function,:
params <- get_args_man(alg = "qgis:clip", qgis_env = my_env)
params
## $INPUT
## [1] "None"
##
## $OVERLAY
## [1] "None"
##
## $OUTPUT
## [1] "None"
peru_border<-st_read("P:/LAPIVpaper/PER_adm", "PER_adm0")
## Reading layer `PER_adm0' from data source `P:\LAPIVpaper\PER_adm' using driver `ESRI Shapefile'
## Simple feature collection with 1 feature and 70 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -81.3307 ymin: -18.3518 xmax: -68.65311 ymax: -0.03747
## epsg (SRID): 4326
## proj4string: +proj=longlat +datum=WGS84 +no_defs
peru_border<-st_transform(peru_border,crs=24892)
wd<-"C:/Users/ozd504/Google Drive/classes/dem7093/GIS_class_2018/data"
params$INPUT <-peru_von
params$OVERLAY<-peru_border
params$OUTPUT<-file.path(wd, "peru_psu_von_poly_clip.shp") # path to the output shapefile
now we have our parameters defined, we run the script:
peru_von_clip <- run_qgis(alg = "qgis:clip",
params = params,
load_output = TRUE,
qgis_env = my_env)
## $OUTPUT
## [1] "C:/Users/ozd504/Google Drive/classes/dem7093/GIS_class_2018/data/peru_psu_von_poly_clip.shp"
mapview(peru_von_clip["DHSCLUST"],map.types="OpenStreetMap")
If we have the 2005 DHS data, we could generate estimates of some quality, say proportion of women with a secondary education or more:
library(haven)
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
peru_data<-read_dta("P:/LAPIVpaper/DHS_IPVdata/PEIR51FL.DTA")
peru_data<-peru_data%>%
mutate(eduprim=ifelse(v106==1,1,0),
edusecplus=ifelse(v106!=9&v106>=2,1,0),
pwt=v005/1000000)%>%
select(v000, v021, v022,v024,pwt, eduprim, edusecplus )
head(peru_data)
## # A tibble: 6 x 7
## v000 v021 v022 v024 pwt eduprim edusecplus
## <chr> <dbl> <dbl> <dbl+lbl> <dbl> <dbl> <dbl>
## 1 PE5 5 1 1 0.437 0 1
## 2 PE5 5 1 1 0.437 0 1
## 3 PE5 5 1 1 0.437 0 1
## 4 PE5 5 1 1 0.437 0 1
## 5 PE5 5 1 1 0.437 0 1
## 6 PE5 5 1 1 0.437 0 1
library(lme4)
fit<-glmer(edusecplus~1+(1|v022/v021),data=peru_data, family = binomial, weights=pwt)
## Warning in eval(family$initialize, rho): non-integer #successes in a
## binomial glm!
peru_data$edu_ests<-fitted(fit)
place_ests<-aggregate(edu_ests~v021, data=peru_data, FUN=mean)
head(place_ests)
## v021 edu_ests
## 1 1 0.7627587
## 2 2 0.7356660
## 3 3 0.8597187
## 4 4 0.8915284
## 5 5 0.7499289
## 6 6 0.6600142
Merge the estimates to the polygons
peru_ests<-left_join(peru_von_clip, place_ests, by=c("DHSCLUST"= "v021"))
mapview(peru_ests["edu_ests"], legend=T,map.types="OpenStreetMap")