Abstract
DB schema structure is prepared in DBSchema and saved as image and as SQL script. The procedure reads input data from GoogleSheets, completes the information, produces tables ready for the validating DB, reads the SQL script creating the SQLite DB and appends the tables.
Repo: https://gitlab.com/NuoroForestrySchool/Forest-Management-Tools_pinaster-and-radiata-pine_Arzana.git
Data collected measuring the felled sample trees has been registered in spreadsheets that replicate tally forms. The procedure transferd the measurements to a relational dababase. Data are read from the sheets, completed, verified and reshaped into tables corresponding to DB tables. The (empty) DB is generated and then populated.
The schema of the DB has been designed using DBSchema. The program produces a graphical representation of the schema and the SQL script file required to initialize the DB (create_DB_SQL)
Data tally procedures and forms are presented in this document: PROTOCOLLO DI RILIEVO CANALI ESCA_V2.pdf Data have been acquired through GoogleSheet tables that reproduced forms used in the forest.
library(plyr)
library(dplyr)
library(tidyverse)
library(magrittr)
library(googlesheets)
# getwd()
source("ReadDataFromGSheet.R")
Worksheets feed constructed with public visibility
Accessing worksheet titled 'Intestazione'.
Downloading: 1.2 kB
Downloading: 1.2 kB
Downloading: 1.7 kB
Downloading: 1.7 kB
Downloading: 1.7 kB
Downloading: 1.7 kB
Downloading: 1.7 kB
Downloading: 1.7 kB
Downloading: 1.7 kB
Downloading: 1.7 kB
Parsed with column specification:
cols(
complesso = [31mcol_character()[39m,
data = [31mcol_character()[39m,
rilevatori = [31mcol_character()[39m,
id_gradone = [31mcol_character()[39m,
progr_gradone = [31mcol_character()[39m,
id_fusto_campione = [32mcol_double()[39m,
specie = [31mcol_character()[39m,
d_130 = [32mcol_double()[39m,
h_ipso = [32mcol_double()[39m,
lung_atterrato = [32mcol_double()[39m,
peso_ramaglia = [32mcol_double()[39m,
fascine = [31mcol_character()[39m
)
Accessing worksheet titled 'Scheda_2'.
Downloading: 1.1 kB
Downloading: 1.1 kB
Downloading: 1.6 kB
Downloading: 1.6 kB
Downloading: 1.6 kB
Downloading: 1.6 kB
Downloading: 1.6 kB
Downloading: 1.6 kB
Downloading: 1.6 kB
Downloading: 1.6 kB
Parsed with column specification:
cols(
id_fusto_campione = [32mcol_double()[39m,
progressivo_foglio = [32mcol_double()[39m,
id_asta_secondaria = [31mcol_character()[39m,
dist_da_biforcazione = [32mcol_double()[39m,
diam_sezione = [32mcol_double()[39m,
id_asta_liv_inf = [31mcol_character()[39m
)
Accessing worksheet titled 'Rotelle'.
Downloading: 610 B
Downloading: 610 B
Downloading: 620 B
Downloading: 620 B
Downloading: 620 B
Downloading: 620 B
Downloading: 620 B
Downloading: 620 B
Parsed with column specification:
cols(
id_fusto_campione = [32mcol_double()[39m,
peso_rotelle = [32mcol_double()[39m,
dist_rot_id0 = [32mcol_double()[39m,
dist_rot_id1 = [32mcol_double()[39m,
dist_rot_id2 = [32mcol_double()[39m,
dist_rot_id3 = [32mcol_double()[39m,
dist_rot_id4 = [32mcol_double()[39m,
dist_rot_id5 = [32mcol_double()[39m,
dist_rot_id6 = [32mcol_double()[39m
)
Accessing worksheet titled 'Scheda_1'.
Downloading: 1.3 kB
Downloading: 1.3 kB
Downloading: 2.7 kB
Downloading: 2.7 kB
Downloading: 4 kB
Downloading: 4 kB
Downloading: 4.1 kB
Downloading: 4.1 kB
Downloading: 5.4 kB
Downloading: 5.4 kB
Downloading: 5.9 kB
Downloading: 5.9 kB
Downloading: 7.2 kB
Downloading: 7.2 kB
Downloading: 8.5 kB
Downloading: 8.5 kB
Downloading: 9.2 kB
Downloading: 9.2 kB
Downloading: 10 kB
Downloading: 10 kB
Downloading: 12 kB
Downloading: 12 kB
Downloading: 12 kB
Downloading: 12 kB
Downloading: 14 kB
Downloading: 14 kB
Downloading: 15 kB
Downloading: 15 kB
Downloading: 15 kB
Downloading: 15 kB
Downloading: 17 kB
Downloading: 17 kB
Downloading: 18 kB
Downloading: 18 kB
Downloading: 19 kB
Downloading: 19 kB
Downloading: 20 kB
Downloading: 20 kB
Downloading: 21 kB
Downloading: 21 kB
Downloading: 23 kB
Downloading: 23 kB
Downloading: 23 kB
Downloading: 23 kB
Downloading: 24 kB
Downloading: 24 kB
Downloading: 26 kB
Downloading: 26 kB
Downloading: 27 kB
Downloading: 27 kB
Downloading: 27 kB
Downloading: 27 kB
Downloading: 27 kB
Downloading: 27 kB
Downloading: 27 kB
Downloading: 27 kB
Downloading: 27 kB
Downloading: 27 kB [1] READ **** File name: FustiCampione_Arzana2018_InserimentoDati - updated: 2019-01-28 10:41:46
# Completing missing date
## Data mancante in scheda attribuita per logica
Intestazione <- Intestazione %>%
mutate(data = replace(data, id_fusto_campione == 57, "25/07/18"))
# Linear interpolation of data missing in diam_sez in Scheda_2
## Interpolation function
linterp <- function(h, h1, d1, h2, d2) d1 - (h-h1)*(d1-d2)/(h2-h1)
## Sort Scheda_2 for linear interpolation of NA in diam_sezione
Scheda_2 <- arrange(Scheda_2, id_fusto_campione, id_asta_secondaria, dist_da_biforcazione)
## Interpolation
Scheda_2 <- Scheda_2 %>%
mutate(d1 = lag(diam_sezione),
d2 = lead(diam_sezione),
h1 = lag(dist_da_biforcazione),
h2 = lead(dist_da_biforcazione),
diam_sez_interp = linterp(dist_da_biforcazione, h1, d1, h2, d2),
diam_sezione =
ifelse(is.na(diam_sezione), diam_sez_interp, diam_sezione)) %>%
select(-d1, -d2, -h1, -h2, -diam_sez_interp)
# Table "Campagne"
Campagne <- tribble(
~ID_Campagna, ~ComplessoForestale, ~SitoDiOsservazione,
1, "Perdas-Monte Idolo", "Canali Esca")
# Table "Rilievi"
Rilievi <- Intestazione %>%
group_by(data) %>%
summarise() %>%
ungroup() %>%
mutate(dataRilievo = as.Date(data, format="%d/%m/%y"))
## In order to count the number of "Rilievi" piping interrupted
Rilievi <- Rilievi %>%
mutate(ID_Campagna = Campagne[1,]$ID_Campagna, ID_Rilievo = paste0(Campagne$ID_Campagna, "_", 1:nrow(Rilievi))) %>%
select(ID_Rilievo, ID_Campagna, dataRilievo)
# Table "Rilevatori"
Rilevatori <- tribble(
~ID_Rilevatore, ~Cognome, ~Nome, ~Affiliazione,
"MRUMTT", "Mura", "Matteo", "NFS",
"PCCMTT", "Piccolo", "Matteo", "NFS",
"LAIPRI", "Lai", "Piero", "Forestas",
"MLSTTV", "Melis", "Ottavio", "Forestas",
"MCLMRA", "Muceli", "Mauro", "NFS",
"BBICRS", "Ibba", "Cristian", "CFVA",
"MRNMRA", "Marongiu", "Mauro", "Forestas")
# Table "Squadre"
Squadre <- Intestazione %>%
select(data, rilevatori) %>%
mutate(data = as.Date(data, format="%d/%m/%y")) %>%
unique %>%
separate(rilevatori, c("Rilevatore1", "Rilevatore2", "Rilevatore3", "Rilevatore4", "Rilevatore5", "Rilevatore6"),
remove = T, sep = ", ", fill = "right") %>%
gather(starts_with("Rilevatore"), key = "id_ril", value = "Cognome") %>%
select(-id_ril) %>%
mutate(Cognome = replace(Cognome, Cognome == "Mereu", NA)) %>%
filter(!is.na(Cognome)) %>%
unique %>%
right_join(select(Rilevatori, ID_Rilevatore, Cognome),.) %>%
select(-Cognome) %>%
right_join(select(Rilievi, ID_Rilievo, dataRilievo), .,
by = c("dataRilievo" = "data")) %>%
select(-dataRilievo)
Joining, by = "Cognome"
# Table "Specie" - ID_Specie from EPPO (https://data.eppo.int/)
Specie <- tribble(
~ID_Specie, ~nomeScientifico, ~nomeItaliano, ~species,
"PIURA", "Pinus radiata, D.Don", "pino di Monterey", "Monterey pine",
"PIUPL", "Pinus pinaster, Aiton", "pino marittimo", "maritime pine")
# Table "FustiCampioneEtAdF"
FustiCampioneEtAdF <- Intestazione %>%
full_join(select(Rotelle_gs, id_fusto_campione, peso_rotelle)) %>%
rename(
ID_fustoCampione = id_fusto_campione,
ID_gradone = id_gradone,
Prog_gradone = progr_gradone,
pesoFrescoRamaglia = peso_ramaglia,
pesoRotelle = peso_rotelle,
lunghezzaAbbattuto = lung_atterrato
) %>%
mutate(dataRilievo = as.Date(data, format="%d/%m/%y")) %>%
full_join(select(Rilievi, dataRilievo, ID_Rilievo)) %>%
add_column(KeyID = paste0(.$ID_Rilievo, "_", .$ID_fustoCampione), .before = 1) %>%
select(-c(complesso, data, rilevatori, fascine)) %>%
mutate(ID_specie = map_chr(specie, ~switch(., R = "PIURA", M = "PIUPL", -1))) %>%
select(-c(specie, dataRilievo)) %>%
select(KeyID, ID_Rilievo, ID_fustoCampione, ID_gradone, Prog_gradone,
ID_specie, d_130, h_ipso, lunghezzaAbbattuto, pesoFrescoRamaglia, pesoRotelle)
Joining, by = "id_fusto_campione"
Joining, by = "dataRilievo"
# Table "ProfiliFustiPrincipali"
ProfiliFustiPrincipali <- Scheda_1 %>%
filter(!is.na(diam_sezione)) %>%
rename(distSuolo = distanza_suolo, d_sez = diam_sezione) %>%
right_join(select(FustiCampioneEtAdF, KeyID, ID_fustoCampione), .,
by = c("ID_fustoCampione" = "id_fusto_campione")) %>%
arrange(KeyID, distSuolo) %>%
ddply(.(KeyID), mutate, ID_sez = seq_along(KeyID)) %>% # Da verificare se usare il mutate
select(KeyID, distSuolo, d_sez) %>%
distinct(KeyID, distSuolo, .keep_all = T)
# Table "ProfiliFustiSecondari"
ProfiliFustiSecondari <- Scheda_2 %>%
select(id_fusto_campione, progressivo_foglio, id_asta_secondaria, dist_da_biforcazione, diam_sezione) %>%
right_join(select(FustiCampioneEtAdF, KeyID, ID_fustoCampione), .,
by = c("ID_fustoCampione" = "id_fusto_campione")) %>%
select(KeyID, id_asta_secondaria, dist_da_biforcazione, diam_sezione) %>%
rename(ID_asta = id_asta_secondaria,
distanzaBiforcazione = dist_da_biforcazione,
diametro = diam_sezione)
# Table "Palchi"
Palchi <- filter(Scheda_1, is.na(diam_sezione)) %>%
select(stato_palco, numero_rami, diam_ramo_grosso, distanza_suolo, id_fusto_campione, id_asta_secondaria) %>%
rename(stato = stato_palco,
numeroRami = numero_rami,
distSuolo = distanza_suolo,
diamRamoGrosso = diam_ramo_grosso,
ID_fustoCampione = id_fusto_campione,
ID_asta = id_asta_secondaria) %>%
inner_join(select(FustiCampioneEtAdF, ID_fustoCampione, KeyID),.) %>%
ddply(.(KeyID), mutate, ID_palco = seq_along(KeyID)) %>% # Da verificare se usare il mutate
select(-ID_fustoCampione)
Joining, by = "ID_fustoCampione"
# Table "Rotelle"
Rotelle <- Rotelle_gs %>%
select(-peso_rotelle) %>%
inner_join(select(FustiCampioneEtAdF, KeyID, ID_fustoCampione),., by = c(ID_fustoCampione = "id_fusto_campione"))
if (nrow(Rotelle) != nrow(Rotelle_gs)) stop("si perdono rotelle")
Rotelle <- Rotelle %>%
select(-ID_fustoCampione) %>%
gather(starts_with("dist_rot_id"), key = "rotella_num", value = "distSuolo") %>%
mutate(cls_h = str_sub(rotella_num, 12)) %>%
select(-rotella_num) %>%
arrange(KeyID, distSuolo)
Rotelle <- Rotelle %>%
select(KeyID, cls_h, distSuolo)
# Table "ClassiAltezza"
ClassiAltezza <- tribble(
~cls_h, ~classe_altezza,
0, "base",
1, "50cm",
2, "dbh",
3, "1/4h_circa",
4, "1/2h",
5, "2/3h",
6, "5/6h")
## Check for trees without any indication in "stato"
Palchi %>%
filter(is.na(stato)) %>%
select(KeyID) %>%
unique %>%
nrow
[1] 57
## Selection of the first row of each tree where "stato" is NA, and set it as "X"
Palchi <- Palchi %>%
group_by(KeyID) %>%
mutate(stato = replace(stato, min(distSuolo) & is.na(stato), "X")) %>%
fill(stato)
Palchi <- Palchi %>%
group_by(KeyID) %>%
fill(stato)
## Selection of trees without stato indication (set to NA)
noStato <- Palchi %>%
select(KeyID) %>%
unique() %>%
setdiff(filter(Palchi, stato != "X") %>% select(KeyID) %>% unique) %>%
add_column(newStato = "not_avail")
Palchi <- Palchi %>%
full_join(noStato) %>%
mutate(stato = replace(stato, newStato == "not_avail", NA)) %>%
select(-newStato) %>%
select(KeyID, ID_palco, stato, numeroRami, diamRamoGrosso, distSuolo, ID_asta)
Joining, by = "KeyID"
library(DBI)
library(RSQLite)
dbFileName <- "2018Arzana_FustiCampione.sqlite"
if (file.exists(dbFileName)) cat("ERROR: SQLite file exists and has not been overwritten!\n To produce a new SQLite file delete the old one \n") else
{
syCommand <- paste("sqlite3", dbFileName, "<", "DB_Schema_Arzana2018.sql")
# Create SQLite DB file using the given schema
system(syCommand)
tablesList <- c("Specie",
"Campagne",
"Rilevatori",
"Rilievi",
"Squadre",
"FustiCampioneEtAdF",
"ProfiliFustiPrincipali",
"Palchi",
"ProfiliFustiSecondari",
"ClassiAltezza",
"Rotelle")
dbAdF <- dbConnect(RSQLite::SQLite(), dbFileName)
for (tab in tablesList) {
print(paste("writing table", tab))
RSQLite::dbWriteTable(dbAdF, tab, get(tab), append = T)
# Populate the (empty) DB tables (Order of tables has to respect external keys)
Sys.sleep(1)
# dbDisconnect(dbAdF) # !!! WHY DISCONNECT !!!
}
}
ERROR: SQLite file exists and has not been overwritten!
To produce a new SQLite file delete the old one