knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
# Data wrangling
library(dplyr)
library(fs)
library(ruODK)
# Visualisation
library(skimr)
library(knitr)
library(DT)
library(leaflet)
# Dissemination
library(readr)
# library(ckanr)
# library(googledrive)
# Spatial
# library(stringr)
# library(rgeos)
# library(sf)
# Set ruODK defaults to an ODK Central form, choose tz and verbosity
ruODK::ru_setup(
svc="https://sandbox.central.getodk.org/v1/projects/14/forms/2.svc",
un = ruODK::get_test_un(), # form deployed to ruODK test server
pw = ruODK::get_test_pw(),
odkc_version = ruODK::get_test_odkc_version(),
tz = "Australia/Perth",
verbose = TRUE)
## <ruODK settings>
## Default ODK Central Project ID: 14
## Default ODK Central Form ID: 2
## Default ODK Central URL: https://sandbox.central.getodk.org
## Default ODK Central Username: Florian.Mayer@dbca.wa.gov.au
## Default ODK Central Password: run ruODK::get_default_pw() to show
## Default Time Zone: Australia/Perth
## Default ODK Central Version: 0.8
## Default HTTP GET retries: 3
## Verbose messages: TRUE
## Test ODK Central Project ID: 14
## Test ODK Central Form ID: build_Flora-Quadrat-0-4_1564384341
## Test ODK Central Form ID (ZIP tests): build_Spotlighting-0-6_1558333698
## Test ODK Central Form ID (Attachment tests): build_Flora-Quadrat-0-1_1558330379
## Test ODK Central Form ID (Parsing tests): build_Turtle-Track-or-Nest-1-0_1569907666
## Test ODK Central Form ID (WKT tests): build_Locations_1589344221
## Test ODK Central URL: https://sandbox.central.getodk.org
## Test ODK Central Username: Florian.Mayer@dbca.wa.gov.au
## Test ODK Central Password: run ruODK::get_test_pw() to show
## Test ODK Central Version: 0.8
loc <- fs::path("media")
fs::dir_create(loc)
ft <- ruODK::odata_service_get()
ft %>% knitr::kable(.)
| name | kind | url |
|---|---|---|
| Submissions | EntitySet | Submissions |
# Option 1: via OData
data <- ruODK::odata_submission_get(
table = ft$url[1],
local_dir = loc,
wkt = TRUE
)
#> ℹ Downloading submissions...
#> ✔ Downloaded submissions.
#> ℹ Reading form schema...
#> ℹ Form schema v0.8
#> ℹ Parsing submissions...
#> ℹ Not unnesting geo fields: value_polygon
#> ℹ Unnesting: value
#> ℹ Unnesting column "value"
#> ℹ Unnesting more list cols: value___system, value_meta
#> ℹ Not unnesting geo fields: value_polygon
#> ℹ Unnesting: value___system, value_meta
#> ℹ Unnesting column "value___system"
#> ℹ Unnesting column "value_meta"
#> ℹ Found date/times: date_time.
#> ℹ Found attachments: img1, img2, img3.
#> ℹ Downloading attachments...
#> ℹ Using local directory "media".
#> ◉ File already downloaded, keeping "media/1602832928380.jpg".
#> ◉ File already downloaded, keeping "media/1602832860083.jpg".
#> ◉ File already downloaded, keeping "media/1602832758438.jpg".
#> ◉ File already downloaded, keeping "media/1602831771224.jpg".
#> ◉ File already downloaded, keeping "media/1602814904105.jpg".
#> ℹ Using local directory "media".
#> ◉ File already downloaded, keeping "media/1602832932604.jpg".
#> ◉ File already downloaded, keeping "media/1602832865261.jpg".
#> ◉ File already downloaded, keeping "media/1602832763881.jpg".
#> ◉ File already downloaded, keeping "media/1602831777474.jpg".
#> ◉ File already downloaded, keeping "media/1602814917361.jpg".
#> ℹ Using local directory "media".
#> ◯ Filename is NA, skipping download.
#> ◯ Filename is NA, skipping download.
#> ◯ Filename is NA, skipping download.
#> ◯ Filename is NA, skipping download.
#> ◉ File already downloaded, keeping "media/1602814929185.jpg".
#> ℹ Found geopoints: .
#> ℹ Found geotraces: .
#> ℹ Found geoshapes: polygon.
#> ℹ Parsing polygon...
#> ✔ Returning parsed submissions.
# Option 2: via ZIP export
data_csv_zip <- ruODK::submission_export()
#> ✔ Overwriting previous download: "/home/florianm/projects/2.zip"
data_csv_extracted <- unzip(data_csv_zip)
data_csv <- readr::read_csv(data_csv_extracted[[1]])
#>
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#> .default = col_character(),
#> SubmissionDate = col_datetime(format = ""),
#> Introduction = col_logical(),
#> DateTime = col_datetime(format = ""),
#> Point_ID = col_logical(),
#> Reach_point = col_logical(),
#> Reach_No = col_logical(),
#> Reach_No_img = col_logical(),
#> Landcover = col_logical(),
#> Mixed_crop = col_logical(),
#> Info_polygon = col_logical(),
#> Info_photo = col_logical(),
#> img4 = col_logical(),
#> irri_type = col_logical(),
#> irri_other = col_logical(),
#> SubmitterID = col_double(),
#> AttachmentsPresent = col_double(),
#> AttachmentsExpected = col_double()
#> )
#> ℹ Use `spec()` for the full column specifications.
#> Warning: 5 parsing failures.
#> row col expected actual file
#> 1 -- 34 columns 33 columns './2.csv'
#> 2 -- 34 columns 33 columns './2.csv'
#> 3 -- 34 columns 33 columns './2.csv'
#> 4 -- 34 columns 33 columns './2.csv'
#> 5 -- 34 columns 33 columns './2.csv'
# Option 3: via REST
sl <- ruODK::submission_list()
#> ℹ https://sandbox.central.getodk.org/v1/projects/14/forms/2/submissions
sub_raw <- ruODK::submission_get(sl$instance_id)
First impressions of the data.
skimr::skim(data)
| Name | data |
| Number of rows | 5 |
| Number of columns | 25 |
| _______________________ | |
| Column type frequency: | |
| character | 19 |
| numeric | 5 |
| POSIXct | 1 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| id | 0 | 1.0 | 41 | 41 | 0 | 5 | 0 |
| name | 0 | 1.0 | 5 | 7 | 0 | 4 | 0 |
| point_type | 0 | 1.0 | 13 | 13 | 0 | 1 | 0 |
| polygon | 0 | 1.0 | 201 | 1435 | 0 | 5 | 0 |
| img1 | 0 | 1.0 | 23 | 23 | 0 | 5 | 0 |
| img2 | 0 | 1.0 | 23 | 23 | 0 | 5 | 0 |
| system_submission_date | 0 | 1.0 | 24 | 24 | 0 | 5 | 0 |
| system_submitter_id | 0 | 1.0 | 4 | 4 | 0 | 1 | 0 |
| system_submitter_name | 0 | 1.0 | 5 | 5 | 0 | 1 | 0 |
| meta_instance_id | 0 | 1.0 | 41 | 41 | 0 | 5 | 0 |
| crop_cover | 4 | 0.2 | 3 | 3 | 0 | 1 | 0 |
| cropland | 4 | 0.2 | 25 | 25 | 0 | 1 | 0 |
| farmer_planting | 4 | 0.2 | 9 | 9 | 0 | 1 | 0 |
| farmer_harvest | 4 | 0.2 | 13 | 13 | 0 | 1 | 0 |
| gps_check | 4 | 0.2 | 2 | 2 | 0 | 1 | 0 |
| img3 | 0 | 1.0 | 2 | 23 | 0 | 2 | 0 |
| irri_or_rain | 4 | 0.2 | 9 | 9 | 0 | 1 | 0 |
| comments | 4 | 0.2 | 20 | 20 | 0 | 1 | 0 |
| odata_context | 0 | 1.0 | 83 | 83 | 0 | 1 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| polygon_longitude | 0 | 1 | 27.71 | 49.29 | 5.67 | 5.67 | 5.67 | 5.67 | 115.88 | ▇▁▁▁▂ |
| polygon_latitude | 0 | 1 | 35.19 | 37.56 | -32.00 | 51.98 | 51.98 | 51.98 | 51.98 | ▂▁▁▁▇ |
| polygon_altitude | 0 | 1 | 39.27 | 35.94 | 0.00 | 0.00 | 62.35 | 64.79 | 69.22 | ▅▁▁▁▇ |
| system_attachments_present | 0 | 1 | 2.20 | 0.45 | 2.00 | 2.00 | 2.00 | 2.00 | 3.00 | ▇▁▁▁▂ |
| system_attachments_expected | 0 | 1 | 2.20 | 0.45 | 2.00 | 2.00 | 2.00 | 2.00 | 3.00 | ▇▁▁▁▂ |
Variable type: POSIXct
| skim_variable | n_missing | complete_rate | min | max | median | n_unique |
|---|---|---|---|---|---|---|
| date_time | 0 | 1 | 2020-10-16 10:16:00 | 2020-10-16 15:21:00 | 2020-10-16 15:18:00 | 5 |
dplyr::glimpse(data)
#> Rows: 5
#> Columns: 25
#> $ id <chr> "uuid:a0b64a48-8f40-4c57-b314-04d940876bd…
#> $ name <chr> "Ananias", "David", "Ebilton", "Gilbino",…
#> $ date_time <dttm> 2020-10-16 15:21:00, 2020-10-16 15:19:00…
#> $ point_type <chr> "opportunistic", "opportunistic", "opport…
#> $ polygon <chr> "POLYGON ((5.6726565 51.9808498 69.224182…
#> $ polygon_longitude <dbl> 5.672657, 5.672944, 5.672645, 5.665054, 1…
#> $ polygon_latitude <dbl> 51.98085, 51.98054, 51.97994, 51.98189, -…
#> $ polygon_altitude <dbl> 69.22418, 62.34576, 64.79395, 0.00000, 0.…
#> $ img1 <chr> "media/1602832928380.jpg", "media/1602832…
#> $ img2 <chr> "media/1602832932604.jpg", "media/1602832…
#> $ system_submission_date <chr> "2020-10-16T07:31:01.858Z", "2020-10-16T0…
#> $ system_submitter_id <chr> "1330", "1330", "1330", "1330", "1330"
#> $ system_submitter_name <chr> "timon", "timon", "timon", "timon", "timo…
#> $ system_attachments_present <int> 2, 2, 2, 2, 3
#> $ system_attachments_expected <int> 2, 2, 2, 2, 3
#> $ meta_instance_id <chr> "uuid:a0b64a48-8f40-4c57-b314-04d940876bd…
#> $ crop_cover <chr> NA, NA, NA, NA, "lot"
#> $ cropland <chr> NA, NA, NA, NA, "harvested cucumber beans…
#> $ farmer_planting <chr> NA, NA, NA, NA, "July 2020"
#> $ farmer_harvest <chr> NA, NA, NA, NA, "December 2020"
#> $ gps_check <chr> NA, NA, NA, NA, "OK"
#> $ img3 <chr> "NA", "NA", "NA", "NA", "media/1602814929…
#> $ irri_or_rain <chr> NA, NA, NA, NA, "Irrigated"
#> $ comments <chr> NA, NA, NA, NA, "This is a test form."
#> $ odata_context <chr> "https://sandbox.central.getodk.org/v1/pr…
DT::datatable(head(data))
The bug could be reproduced if we find any of the Polygon values to contain exactly one point.
We’ll show the Polygon field from all three access options: OData, ZIP, REST.
# Are all polygons valid?
# Data from odata_submission_get
data %>%
dplyr::select(id, name, polygon) %>%
DT::datatable()
# Data from ZIP export
data_csv %>%
dplyr::select(KEY, Name, Polygon) %>%
DT::datatable()
# Polygons through REST submission_get
sub_raw[[1]]$Polygon
#> [[1]]
#> [1] "51.9808498 5.6726565 69.22418212890625 7.772;51.9811274 5.673009 62.74053955078125 7.504;51.981263 5.6733339 64.10791015625 7.718;51.9812353 5.6733591 65.70452880859375 8.04;51.9808498 5.6726565 69.22418212890625 7.772;"
geo_sf_poly <- data %>% sf::st_as_sf(wkt="polygon")
# All polygons
mapview::mapview(geo_sf_poly, col.regions = sf::sf.colors(10), popup = NULL)
# The first point of each polygon
leaflet::leaflet(data = data) %>%
leaflet::addTiles() %>%
leaflet::addMarkers(
lng = ~ polygon_longitude,
lat = ~ polygon_latitude,
label = ~ name,
popup = ~ name)