knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
# Data wrangling
library(dplyr)
library(fs)
library(ruODK)

# Visualisation
library(skimr)
library(knitr)
library(DT)
library(leaflet)

# Dissemination
library(readr)
# library(ckanr)
# library(googledrive)

# Spatial
# library(stringr)
# library(rgeos)
# library(sf)

# Set ruODK defaults to an ODK Central form, choose tz and verbosity
ruODK::ru_setup(
  svc="https://sandbox.central.getodk.org/v1/projects/14/forms/2.svc",
  un = ruODK::get_test_un(), # form deployed to ruODK test server
  pw = ruODK::get_test_pw(),
  odkc_version = ruODK::get_test_odkc_version(),
  tz = "Australia/Perth",
  verbose = TRUE)
## <ruODK settings>
##   Default ODK Central Project ID: 14 
##   Default ODK Central Form ID: 2 
##   Default ODK Central URL: https://sandbox.central.getodk.org 
##   Default ODK Central Username: Florian.Mayer@dbca.wa.gov.au 
##   Default ODK Central Password: run ruODK::get_default_pw() to show 
##   Default Time Zone: Australia/Perth 
##   Default ODK Central Version: 0.8 
##   Default HTTP GET retries: 3 
##   Verbose messages: TRUE 
##   Test ODK Central Project ID: 14 
##   Test ODK Central Form ID: build_Flora-Quadrat-0-4_1564384341 
##   Test ODK Central Form ID (ZIP tests): build_Spotlighting-0-6_1558333698 
##   Test ODK Central Form ID (Attachment tests): build_Flora-Quadrat-0-1_1558330379 
##   Test ODK Central Form ID (Parsing tests): build_Turtle-Track-or-Nest-1-0_1569907666 
##   Test ODK Central Form ID (WKT tests): build_Locations_1589344221 
##   Test ODK Central URL: https://sandbox.central.getodk.org 
##   Test ODK Central Username: Florian.Mayer@dbca.wa.gov.au 
##   Test ODK Central Password: run ruODK::get_test_pw() to show 
##   Test ODK Central Version: 0.8
loc <- fs::path("media")
fs::dir_create(loc)

Download data

ft <- ruODK::odata_service_get()
ft %>% knitr::kable(.)
name kind url
Submissions EntitySet Submissions
# Option 1: via OData
data <- ruODK::odata_submission_get(
  table = ft$url[1], 
  local_dir = loc, 
  wkt = TRUE
)
#> ℹ Downloading submissions...
#> ✔ Downloaded submissions.
#> ℹ Reading form schema...
#> ℹ Form schema v0.8
#> ℹ Parsing submissions...
#> ℹ Not unnesting geo fields: value_polygon
#> ℹ Unnesting: value
#> ℹ Unnesting column "value"
#> ℹ Unnesting more list cols: value___system, value_meta
#> ℹ Not unnesting geo fields: value_polygon
#> ℹ Unnesting: value___system, value_meta
#> ℹ Unnesting column "value___system"
#> ℹ Unnesting column "value_meta"
#> ℹ Found date/times: date_time.
#> ℹ Found attachments: img1, img2, img3.
#> ℹ Downloading attachments...
#> ℹ Using local directory "media".
#> ◉ File already downloaded, keeping "media/1602832928380.jpg".
#> ◉ File already downloaded, keeping "media/1602832860083.jpg".
#> ◉ File already downloaded, keeping "media/1602832758438.jpg".
#> ◉ File already downloaded, keeping "media/1602831771224.jpg".
#> ◉ File already downloaded, keeping "media/1602814904105.jpg".
#> ℹ Using local directory "media".
#> ◉ File already downloaded, keeping "media/1602832932604.jpg".
#> ◉ File already downloaded, keeping "media/1602832865261.jpg".
#> ◉ File already downloaded, keeping "media/1602832763881.jpg".
#> ◉ File already downloaded, keeping "media/1602831777474.jpg".
#> ◉ File already downloaded, keeping "media/1602814917361.jpg".
#> ℹ Using local directory "media".
#> ◯ Filename is NA, skipping download.
#> ◯ Filename is NA, skipping download.
#> ◯ Filename is NA, skipping download.
#> ◯ Filename is NA, skipping download.
#> ◉ File already downloaded, keeping "media/1602814929185.jpg".
#> ℹ Found geopoints: .
#> ℹ Found geotraces: .
#> ℹ Found geoshapes: polygon.
#> ℹ Parsing polygon...
#> ✔ Returning parsed submissions.

# Option 2: via ZIP export
data_csv_zip <- ruODK::submission_export()
#> ✔ Overwriting previous download: "/home/florianm/projects/2.zip"
data_csv_extracted <- unzip(data_csv_zip)
data_csv <- readr::read_csv(data_csv_extracted[[1]])
#> 
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#>   .default = col_character(),
#>   SubmissionDate = col_datetime(format = ""),
#>   Introduction = col_logical(),
#>   DateTime = col_datetime(format = ""),
#>   Point_ID = col_logical(),
#>   Reach_point = col_logical(),
#>   Reach_No = col_logical(),
#>   Reach_No_img = col_logical(),
#>   Landcover = col_logical(),
#>   Mixed_crop = col_logical(),
#>   Info_polygon = col_logical(),
#>   Info_photo = col_logical(),
#>   img4 = col_logical(),
#>   irri_type = col_logical(),
#>   irri_other = col_logical(),
#>   SubmitterID = col_double(),
#>   AttachmentsPresent = col_double(),
#>   AttachmentsExpected = col_double()
#> )
#> ℹ Use `spec()` for the full column specifications.
#> Warning: 5 parsing failures.
#> row col   expected     actual      file
#>   1  -- 34 columns 33 columns './2.csv'
#>   2  -- 34 columns 33 columns './2.csv'
#>   3  -- 34 columns 33 columns './2.csv'
#>   4  -- 34 columns 33 columns './2.csv'
#>   5  -- 34 columns 33 columns './2.csv'

# Option 3: via REST
sl <- ruODK::submission_list()
#> ℹ https://sandbox.central.getodk.org/v1/projects/14/forms/2/submissions
sub_raw <- ruODK::submission_get(sl$instance_id)

Analyse and visualise data

First impressions of the data.

skimr::skim(data)
Data summary
Name data
Number of rows 5
Number of columns 25
_______________________
Column type frequency:
character 19
numeric 5
POSIXct 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
id 0 1.0 41 41 0 5 0
name 0 1.0 5 7 0 4 0
point_type 0 1.0 13 13 0 1 0
polygon 0 1.0 201 1435 0 5 0
img1 0 1.0 23 23 0 5 0
img2 0 1.0 23 23 0 5 0
system_submission_date 0 1.0 24 24 0 5 0
system_submitter_id 0 1.0 4 4 0 1 0
system_submitter_name 0 1.0 5 5 0 1 0
meta_instance_id 0 1.0 41 41 0 5 0
crop_cover 4 0.2 3 3 0 1 0
cropland 4 0.2 25 25 0 1 0
farmer_planting 4 0.2 9 9 0 1 0
farmer_harvest 4 0.2 13 13 0 1 0
gps_check 4 0.2 2 2 0 1 0
img3 0 1.0 2 23 0 2 0
irri_or_rain 4 0.2 9 9 0 1 0
comments 4 0.2 20 20 0 1 0
odata_context 0 1.0 83 83 0 1 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
polygon_longitude 0 1 27.71 49.29 5.67 5.67 5.67 5.67 115.88 ▇▁▁▁▂
polygon_latitude 0 1 35.19 37.56 -32.00 51.98 51.98 51.98 51.98 ▂▁▁▁▇
polygon_altitude 0 1 39.27 35.94 0.00 0.00 62.35 64.79 69.22 ▅▁▁▁▇
system_attachments_present 0 1 2.20 0.45 2.00 2.00 2.00 2.00 3.00 ▇▁▁▁▂
system_attachments_expected 0 1 2.20 0.45 2.00 2.00 2.00 2.00 3.00 ▇▁▁▁▂

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
date_time 0 1 2020-10-16 10:16:00 2020-10-16 15:21:00 2020-10-16 15:18:00 5
dplyr::glimpse(data)
#> Rows: 5
#> Columns: 25
#> $ id                          <chr> "uuid:a0b64a48-8f40-4c57-b314-04d940876bd…
#> $ name                        <chr> "Ananias", "David", "Ebilton", "Gilbino",…
#> $ date_time                   <dttm> 2020-10-16 15:21:00, 2020-10-16 15:19:00…
#> $ point_type                  <chr> "opportunistic", "opportunistic", "opport…
#> $ polygon                     <chr> "POLYGON ((5.6726565 51.9808498 69.224182…
#> $ polygon_longitude           <dbl> 5.672657, 5.672944, 5.672645, 5.665054, 1…
#> $ polygon_latitude            <dbl> 51.98085, 51.98054, 51.97994, 51.98189, -…
#> $ polygon_altitude            <dbl> 69.22418, 62.34576, 64.79395, 0.00000, 0.…
#> $ img1                        <chr> "media/1602832928380.jpg", "media/1602832…
#> $ img2                        <chr> "media/1602832932604.jpg", "media/1602832…
#> $ system_submission_date      <chr> "2020-10-16T07:31:01.858Z", "2020-10-16T0…
#> $ system_submitter_id         <chr> "1330", "1330", "1330", "1330", "1330"
#> $ system_submitter_name       <chr> "timon", "timon", "timon", "timon", "timo…
#> $ system_attachments_present  <int> 2, 2, 2, 2, 3
#> $ system_attachments_expected <int> 2, 2, 2, 2, 3
#> $ meta_instance_id            <chr> "uuid:a0b64a48-8f40-4c57-b314-04d940876bd…
#> $ crop_cover                  <chr> NA, NA, NA, NA, "lot"
#> $ cropland                    <chr> NA, NA, NA, NA, "harvested cucumber beans…
#> $ farmer_planting             <chr> NA, NA, NA, NA, "July 2020"
#> $ farmer_harvest              <chr> NA, NA, NA, NA, "December 2020"
#> $ gps_check                   <chr> NA, NA, NA, NA, "OK"
#> $ img3                        <chr> "NA", "NA", "NA", "NA", "media/1602814929…
#> $ irri_or_rain                <chr> NA, NA, NA, NA, "Irrigated"
#> $ comments                    <chr> NA, NA, NA, NA, "This is a test form."
#> $ odata_context               <chr> "https://sandbox.central.getodk.org/v1/pr…
DT::datatable(head(data))

Bug: incomplete polygons

The bug could be reproduced if we find any of the Polygon values to contain exactly one point.

We’ll show the Polygon field from all three access options: OData, ZIP, REST.

# Are all polygons valid? 
# Data from odata_submission_get
data %>% 
  dplyr::select(id, name, polygon) %>% 
  DT::datatable()

# Data from ZIP export
data_csv %>% 
  dplyr::select(KEY, Name, Polygon) %>% 
  DT::datatable()

# Polygons through REST submission_get
sub_raw[[1]]$Polygon
#> [[1]]
#> [1] "51.9808498 5.6726565 69.22418212890625 7.772;51.9811274 5.673009 62.74053955078125 7.504;51.981263 5.6733339 64.10791015625 7.718;51.9812353 5.6733591 65.70452880859375 8.04;51.9808498 5.6726565 69.22418212890625 7.772;"

Maps

geo_sf_poly <- data %>% sf::st_as_sf(wkt="polygon")

# All polygons
mapview::mapview(geo_sf_poly, col.regions = sf::sf.colors(10), popup = NULL)

# The first point of each polygon
leaflet::leaflet(data = data) %>% 
  leaflet::addTiles() %>% 
  leaflet::addMarkers(
    lng = ~ polygon_longitude,
    lat = ~ polygon_latitude,
    label = ~ name, 
    popup = ~ name)