Aplicação de IDW (inverse distance weighting) com R




Script Desenvolvido

Aplicação de IDW para escolha da melhor potência.
Os dados utilizados foram obtidos utilizando web scraping a partir do API inmtetr.

Biblioteca Utilizada



if (!require("pacman")) install.packages("pacman")

pacman::p_load(dplyr, tidyr, tibble, stringr, gstat, sp, spm, raster,
               rasterVis, ggplot2, spdplyr, Metrics, viridis,
               gridExtra, grid, caret, sf, rgdal, mapproj, geosphere,
               spdep, cowplot, plyr,
               DT, plotly, shiny)
## package 'spdep' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\BRENNER BIASI\AppData\Local\Temp\RtmpKA7uNS\downloaded_packages
# if(!require("devtools")) install.packages("devtools") ; library(devtools) # Necessário para o pacote inmetr
if(!require("inmetr")) devtools::install_github('lhmet/inmetr') ; library(inmetr)

#  O pacote DT foi utilizado exclusivamente para criação de tabelas dinâmicas,
# facilitando a demonstração de alguns dados utilizados.

#  Os pacotes plotly e shiny são utilizados apenas para promover uma análise mais interativa
# dos visitantes com alguns plots.

Dataset

dataset <- dataset # Banco de dados

head(dataset, 5)
##       Data_1    Group.1 Dia Hora  LI73002  LI73003   LIC7309  LIC7307
## 1 2011-01-01 01/01/2011  NA   NA 31.23821 27.08799 10.560590 18.55438
## 2 2011-01-02 02/01/2011  NA   NA 27.38272 29.53459  9.742799 25.35912
## 3 2011-01-03 03/01/2011  NA   NA 22.70694 34.24997 19.677336 22.84778
## 4 2011-01-04 04/01/2011  NA   NA 25.61978 41.52428 39.178556 26.50447
## 5 2011-01-05 05/01/2011  NA   NA 29.52663 45.92729 65.202267 23.53899
##    LIC7310  LIC7306  LIC7315  LI73017  LI73016                Data
## 1 36.60356 63.59569 53.25386 52.45145 38.91401 2011-01-01 11:59:30
## 2 36.10735 65.15974 61.28047 51.83789 35.23613 2011-01-02 11:59:30
## 3 57.36837 63.20326 59.59342 49.65450 42.12386 2011-01-03 11:59:30
## 4 75.08241 73.28996 71.88722 67.27992 38.36544 2011-01-04 11:59:30
## 5 74.28950 62.11171 77.35249 63.57197 40.99165 2011-01-05 11:59:30
##   Quan_chu Chuva Ind_chu
## 1        0   Não       0
## 2        0   Não       0
## 3        0   Não       0
## 4        0   Não       0
## 5        0   Não       0


O objeto met_data deve ser gerado para pleno desenvolvimento do script. Este objeto foi gerado para não expor informacoes pessoais quanto ao login no BDMEP/INMET. O código do objeto met_data foi gerado com dput().


* Fica a sugestão de utilzar a opção Hide no chunck abaixo, de modo a tornar mais agradável a visualização do script.


{
met_data <- structure(list(date = structure(c(1293883200, 1293969600, 1294056000, 1294142400, 
                                              1294228800, 1294315200, 1294401600, 1294488000, 
                                              1294574400, 1294660800, 1294747200, 1294833600, 
                                              1294920000, 1295006400, 1295092800, 1295179200, 
                                              1295265600, 1295352000, 1295438400, 1295524800, 
                                              1295611200, 1295697600, 1295784000, 1295870400, 
                                              1295956800, 1296043200, 1296129600, 1296216000, 
                                              1296302400, 1296388800, 1296475200, 1296561600, 
                                              1296648000, 1296734400, 1296820800, 1296907200, 
                                              1296993600, 1297080000, 1297166400, 1297252800, 
                                              1297339200, 1297425600, 1297512000, 1297598400, 
                                              1297684800, 1297771200, 1297857600, 1297944000, 
                                              1298030400, 1298116800, 1298203200, 1298289600, 
                                              1298376000, 1298462400, 1298548800, 1298635200, 
                                              1298721600, 1298808000, 1298894400, 1298980800, 
                                              1299067200, 1299153600, 1299240000, 1299326400, 
                                              1299412800, 1299499200, 1299585600, 1299672000, 
                                              1299758400, 1299844800, 1299931200, 1300017600, 
                                              1300104000, 1300190400, 1300276800, 1300363200, 
                                              1300449600, 1300536000, 1300622400, 1300708800, 
                                              1300795200, 1300881600, 1300968000, 1301054400, 
                                              1301140800, 1301227200, 1301313600, 1301400000, 
                                              1301486400, 1301572800, 1301659200, 1301745600, 
                                              1301832000, 1301918400, 1302004800, 1302091200, 
                                              1302177600, 1302264000, 1302350400, 1302436800, 
                                              1302523200, 1302609600, 1302696000, 1302782400, 
                                              1302868800, 1302955200, 1303041600, 1303128000, 
                                              1303214400, 1303300800, 1303387200, 1303473600, 
                                              1303560000, 1303646400, 1303732800, 1303819200, 
                                              1303905600, 1303992000, 1304078400, 1304164800, 
                                              1304251200, 1304337600, 1304424000, 1304510400, 
                                              1304596800, 1304683200, 1304769600, 1304856000, 
                                              1304942400, 1305028800, 1305115200, 1305201600, 
                                              1305288000, 1305374400, 1305460800, 1305547200, 
                                              1305633600, 1305720000, 1305806400, 1305892800, 
                                              1305979200, 1306065600, 1306152000, 1306238400, 
                                              1306324800, 1306411200, 1306497600, 1306584000, 
                                              1306670400, 1306756800, 1306843200, 1306929600, 
                                              1307016000, 1307102400, 1307188800, 1307275200, 
                                              1307361600, 1307448000, 1307534400, 1307620800, 
                                              1307707200, 1307793600, 1307880000, 1307966400, 
                                              1308052800, 1308139200, 1308225600, 1308312000, 
                                              1308398400, 1308484800, 1308571200, 1308657600, 
                                              1308744000, 1308830400, 1308916800, 1309003200, 
                                              1309089600, 1309176000, 1309262400, 1309348800, 
                                              1309435200, 1309521600, 1309608000, 1309694400, 
                                              1309780800, 1309867200, 1309953600, 1310040000, 
                                              1310126400, 1310212800, 1310299200, 1310385600, 
                                              1310472000, 1310558400, 1310644800, 1310731200, 
                                              1310817600, 1310904000, 1310990400, 1311076800, 
                                              1311163200, 1311249600, 1311336000, 1311422400, 
                                              1311508800, 1311595200, 1311681600, 1311768000, 
                                              1311854400, 1311940800, 1312027200, 1312113600,
                                              1312200000, 1312286400, 1312372800, 1312459200, 
                                              1312545600, 1312632000, 1312718400, 1312804800, 
                                              1312891200, 1312977600, 1313064000, 1313150400, 
                                              1313236800, 1313323200, 1313409600, 1313496000, 
                                              1313582400, 1313668800, 1313755200, 1313841600,
                                              1313928000, 1314014400, 1314100800, 1314187200,
                                              1314273600, 1314360000, 1314446400, 1314532800, 
                                              1314619200, 1314705600, 1314792000, 1314878400, 
                                              1314964800, 1315051200, 1315137600, 1315224000,
                                              1315310400, 1315396800, 1315483200, 1315569600, 
                                              1315656000, 1315742400, 1315828800, 1315915200, 
                                              1316001600), 
                                            class = c("POSIXct", "POSIXt"), tzone = "UTC"), 
                           `Feira de Santana` = c(0, 0, 0, 0, 34.3, 0, 0, 3.8, 5.2, 0, 0, 24,
                                                  0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3.7, 1.3, 0, 
                                                  0, 2, 0, 0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                                                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0.8, 14.3, 0, 0, 
                                                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                                                  0, 0, 0, 0, 0, 2.9, 0, 1.6, 100, 0, 0, 0, 0,
                                                  0, 22, 8.3, 14, 0, 0, 1.2, 0, 0, 0, 0, 0, 0, 
                                                  0, 0, 0, 0, 10, 0, 11, 0, 3.2, 3.1, 0.4, 0,
                                                  0, 26.4, 2.5, 3.5, 0, 2.5, 2, 5.6, 0, 0, 0,
                                                  0.4, 0, 43.5, 3.2, 2.3, 0, 0.2, 0, 0, 0.6, 
                                                  0, 0, 1.1, 0, 0, 0.4, 2.5, 5.9, 0, 0, 0, 0, 
                                                  5, 4.2, 3.4, 1.2, 0.3, 7.1, 0, 0, 0.1, 0, 0, 
                                                  0, 0.8, 0, 1.7, 5.9, 1.2, 0, 8.8, 2.8, 0, 0, 
                                                  0, 0, 0, 11.7, 5.4, 12.5, 4.3, 4.6, 0.4, 2.2, 
                                                  0.5, 0, 0, 0, 0.3, 0, 0, 0, 0, 0, 2.7, 2.3,
                                                  0.1, 0.1, 0.1, 0, 0, 0, 0, 0, 5.7, 0, 0, 0, 
                                                  0, 0, 0, 0, 0, 4, 1.2, 0, 0, 0, 0.7, 2.7, 0, 
                                                  0.7, 2.2, 0.5, 0, 1.2, 4.4, 0, 0, 0, 0, 5, 0.9,
                                                  0, 0, 0, 0, 0, 1.2, 1.2, 0, 0.2, 3.2, 0, 2.6, 
                                                  0, 0, 0, 1.8, 4.7, 0.2, 0, 0, 0, 0, 0, 0, 0, 0,
                                                  0, 6.2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                           `Cruz das Almas` = c(0, 0, 0, 0, 15.4, 1.2, 0, 13.1, 3.4, 0, 0, 9.3,
                                                0.1, 0, 0, 0, 0, 0, 0, 0.4, 2, 4, 4.6, 5.8, 9.4,
                                                0.6, 0, 0.4, 4.7, 0.2, 0.1, 0, 0, 2.8, 0, 0, 0,
                                                0, 1.2, 15.9, 0, 1.7, 0, 0, 0, 0.1, 0, 6.5, 8.1,
                                                14.6, 1.4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                                                0, 0, 0, 0, 0, 0, 0, 0, 2, 5.8, 0, 0.2, 0, 0, 0,
                                                0, 0, 0, 28.6, 20, 1.3, 0.2, 11.4, 13, 0, 0, 0, 
                                                0, 0, 0, 0, 0, 0, 0, 16.9, 3.4, 20.2, 0, 17.8, 
                                                3.6, 5, 3.3, 3.4, 28.5, 5.9, 15.6, 6.6, 51, 24.2,
                                                11.3, 0.2, 0.1, 0.2, 0.8, 3.5, 67.8, 8.4, 10.4,
                                                0.8, 2, 0.7, 7, 10.6, 19.5, 0, 5.8, 0, 14, 3.8, 
                                                0.2, 0, 1.4, 0.4, 0, 10.9, 11.2, 3.8, 6, 9.1, 4.3, 
                                                11.1, 0, 0.2, 0, 0.1, 0.1, 0, 1.6, 1.3, 3.5, 2, 
                                                11.2, 0, 1, 8.2, 0, 0, 0, 0.2, 0, 38.3, 30.4, 
                                                19.8, 0, 11.4, 1, 3.4, 0.5, 0, 0, 0, 0.2, 2.2, 
                                                0.3, 0, 0.5, 0, 23.6, 1.1, 4.5, 2.6, 2.1, 0,
                                                0, 0.5, 0, 0.1, 7, 0, 0, 0, 0, 0, 0, 0, 0.1, 
                                                8.6, 0, 0.3, 0, 0, 6.8, 3.6, 2, 4.2, 11.2, 2.2, 
                                                2.8, 4.6, 0, 0, 0, 0, 0, 22.4, 1.6, 0, 0, 4.6, 
                                                0, 0, 6.1, 0.5, 0, 7.6, 5.7, 1.5, 11.8, 0, 0, 
                                                0, 5.1, 1.2, 0, 8.9, 5, 0.4, 5.3, 5, 0, 0, 0, 
                                                10.6, 8.2, 0, 0.1, 0, 0.1, 0, 0, 0, 0, 0, 0), 
                           Salvador = c(0, 0, 0, 0, 0, 0, 6.6, 12.4, 1.1, 0, 0, 31.6, 4, 6, 0, 
                                        0, 16.2, 0, 6.3, 1.5, 16.9, 0.1, 0.9, 1.3, 10.8, 1.5, 
                                        39.7, 4, 7.6,1.8, 0, 0, 0.1, 0, 0, 0, 0, 0, 0.6, 0, 1.4, 
                                        0, 0, 0.1, 0.5, 0.7, 0, 0.4, 5.8, 1.4, 0, 0, 0.8, 0, 0,
                                        0, 0, 34.2, 0.2, 0, 0, 12.7, 0, 1.1, 0, 0, 0, 5, 0, 0,
                                        0, 0, 6.8, 0, 0, 0, 0, 79, 0.3, 14.7, 2.4, 37.3, 10.3, 
                                        8, 0.4, 10.4, 12.1, 0, 0, 0.2, 0, 0, 0, 0, 0.2, 0, 0,
                                        13.2, 8.2, 22.2, 1, 20, 1.4, 4.4, 18.5, 3.3, 4.6, 2.2,
                                        7.1, 5.6, 42.8, 7.9, 4.2, 8.5, 0.2, 1.7, 1.2, 3.8, 97.6,
                                        51.2, 32.7, 0.6, 9.1, 1, 8.4, 3.2, 29.6, 0, 1.4, 1, 8, 
                                        15.7, 11.9, 13.8, 3.6, 21.3, 0, 22.5, 82.4, 17, 2.2, 1.8, 
                                        7.5, 4.2, 0, 0.7, 0.5, 0, 1, 1, 2.1, 0, 5.8, 15.4, 0.1,
                                        0, 15.6, 4, 0, 0, 0, 0, 26.6, 32.2, 22.1, 20.3, 1.3, 
                                        31.2, 10.6, 51, 1.6, 0, 1.8, 5.6, 6.4, 1.4, 0, 0, 0, 0,
                                        24.7, 0.2, 6.9, 0.7, 1, 0, 0, 0, 0, 0.2, 9.2, 0, 1.1, 
                                        0, 0.3, 0, 0, 0, 0, 10, 0, 1, 0, 0, 0.3, 2.6, 0, 3.1,
                                        5.9, 6.3, 3.3, 5.5, 0, 0, 0, 0, 0, 11, 3, 0, 4.2, 0.2, 
                                        0, 0, 20.8, 0, 0, 9.7, 0, 5.8, 6.6, 2.7, 3.1, 0.8, 9.1, 
                                        2, 3.8, 1.3, 5.8, 1, 0, 0.4, 0, 0, 0, 1.6, 0.8, 0, 0, 
                                        0.1, 0, 0, 6.1, 0, 0, 0, 0), 
                           Alagoinhas = c(0, 0, 0, 0, 1.1, 3.4, 0, 3.1, 21.9, 3.7, 0, 121.7, 0,
                                          0, 0, 0, 0, 0, 0, 0, 8.6, 2.3, 6.9, 0.4, 2.3, 0.3, 
                                          3.2, 1.4, 0, 0, 0, 0, 0, 0, 0.2, 0, 0, 0, 0, 0, 2.3,
                                          0.2, 0, 0, 0, 0.7, 0.5, 5.1, 2.4, 2.4, 1.6, 0, 0, 0,
                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                                          1.9, 0, 0, 0, 0.3, 0, 0, 1.2, 4.2, 5.7, 9, 0, 0.6, 
                                          1.9, 3.2, 35.3, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 12.2, 
                                          7.2, 5.5, 0, 3.6, 0.5, 1, 3.4, 1.1, 23.7, 4.2, 3.7, 
                                          0.2, 6.9, 8.1, 5.1, 0.8, 3.2, 0, 5, 7.8, 67.9, 9.6, 
                                          11.3, 3, 6.8, 0.7, 3.3, 2.5, 0.2, 1, 0, 0, 4.2, 2.5, 
                                          3.1, 5.9, 0, 0, 1.7, 0, 10.2, 6.3, 1.3, 2.6, 5.3, 
                                          37.2, 6, 0.7, 0, 0, 0, 0, 0, 0, 4.8, 4.5, 13.8, 0, 
                                          13.4, 0.9, 0, 0, 0, 0, 0, 27.9, 9.5, 11, 13.2, 10.7,
                                          2.3, 0, 1.4, 0, 0, 0, 3.1, 0.2, 0, 0, 0, 0, 6.9, 0.4,
                                          2.3, 0.6, 0.4, 0, 0, 0, 0, 0, 4.4, 0, 0.2, 1.4, 0.5, 
                                          0, 0, 0, 0, 9.5, 0.4, 1.6, 0, 0, 6.3, 4.8, 5.1, 0, 
                                          1.7, 6.1, 0.2, 12.1, 5.6, 0, 0, 0, 0, 24.8, 2.7, 0, 
                                          1.6, 0, 0, 0, 3.2, 4.9, 0, 1.6, 1.8, 3, 0.7, 0, 0.5, 
                                          0, 4.8, 5.8, 4.6, 0.9, 0, 0, 0, 0, 0, 0, 0, 0, 2.5, 
                                          0.7, 0, 0, 0, 0, 0.3, 0, 0, 0, 0.4)), 
                      class = c("tbl_df", "tbl", "data.frame"), 
                      row.names = c(NA, -257L))
}

Manipulação Inicial de Dados

dataset <- dataset %>% 
  dplyr::select(Data_1, Quan_chu) %>% 
  dplyr::mutate(Data_1 = as.Date(Data_1))

Data_real <- data.frame(Data_1 = seq(as.Date('2011-01-01'),
                                     as.Date('2011-09-14'), by = 1))

Data_real <- Data_real %>% 
  dplyr::full_join(dataset, by = "Data_1") %>% 
  dplyr::rename("Dia"      = Data_1,
                "mm_chuva" = Quan_chu)




Nesta parte do script, caso seja desejado utilizar o BDMEP para realizar um novo web scraping deve-se prestar atenção ao uso suprimido da função bdmep_import do pacote inmetr. No chunck abaixo que está localizado o passo no qual deveria ser criado o objeto met_data.
Para a modelagem aqui desenvolvida as estações escolinhas para suporte a implementação dos métodos de interpolação espacial, são: Alagoinhas (Alag), Cruz das Almas (Cruz), Feira de Santana (FSA) e Salvador (SSA).
A escolha dessas estações ocorreu em função da boa localização geo-espacial em relação ao município de São Francisco do Conde (SFC).
Para mais detalhes quanto ao emprego do web scraping aqui utilizado, sugere-se o acesso a página do API inmetr.

# Buscando Dados Faltantes no BDMEP/INMET

# Estações de interesse
stations <- c("Alagoinhas", "Cruz das Almas", "Feira de Santana", "Salvador")
stations_rows <- pmatch(stations, inmetr::bdmep_meta$name)     # Busca a estação
stns_codes    <- inmetr::bdmep_meta[stations_rows, "id"]       # Retorna o id

# Datas deinteresse
start_date <- "01/01/2011"
end_date   <- "14/09/2011"

#  Para o emprego efetivo do script, deve-se remover os comentarios (#) dos codigos a seguir.
# O codigo foi comentado em funcao da opcao de ocutacao de informacoes pessoais referente a login e senha
# de acesso ao BDMEP via pacote inmetr.

# data_inmet <- inmetr::bdmep_import(id = stns_codes,
#                                    sdate = start_date, 
#                                    edate = end_date, 
#                                    email = "seu email entre aspas",   # Preencher
#                                    passwd  = "sua senha entre aspas", # Preencher
#                                    verbose = TRUE)

# met_data <- data_inmet %>% 
#   dplyr::select(date, prec, id) %>% 
#   dplyr::mutate(id  = as.factor(id))

# levels(met_data$id)

# levels(met_data$id) <- c("Feira de Santana",
#                          "Cruz das Almas",
#                          "Salvador",
#                          "Alagoinhas")
# 
# met_data <- met_data %>% 
#   na.omit() %>% 
#   tidyr::spread(id, prec)

# Filtrando datas para os valores faltantes
filling <- met_data %>% 
  dplyr::mutate(Dia = as.Date(date)) %>% 
  dplyr::full_join(Data_real %>% 
                     dplyr::filter(is.na(mm_chuva)) %>% 
                     dplyr::mutate(mm_chuva = "xx"), 
                   by = "Dia") %>% 
  dplyr::select(-date) %>% 
  replace(is.na(.), 0) %>% 
  dplyr::filter(mm_chuva == "xx") %>% 
  dplyr::select(-mm_chuva)
Mapa de Localização
# Localização - *incompleto!
# By: Tarssio Barreto
# https://github.com/barretokra

shape <- rgdal::readOGR("C:/Users/BRENNER BIASI/Desktop/Mestrado/SER UFF/Estados_do_Brasil/Brasil.shp") %>% 
  dplyr::filter(ESTADOS == "Bahia") %>% 
  ggplot2::fortify() %>% 
  dplyr::mutate(long = round(long, 2)) %>% 
  dplyr::mutate(lat  = round(lat, 2))
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\BRENNER BIASI\Desktop\Mestrado\SER UFF\Estados_do_Brasil\Brasil.shp", layer: "Brasil"
## with 27 features
## It has 4 fields
# Localização das estações - Coordenadas em graus
# http://www.inmet.gov.br/portal/index.php?r=estacoes/estacoesConvencionais
df_map <- data.frame(lat  = c(-12.196111, -12.666667, 
                              -13.005278, -12.148333, -12.627778),
                     long = c(-38.967222, -39.083333, 
                              -38.505833, -38.425556, -38.68),
                     Cidade = c("FSA", "Cruz", "SSA", "ALAG", "SFC")) %>% 
  dplyr::mutate(group = 1)

p1 <- ggplot2::ggplot(df_map) +
  geom_polygon(data = shape, aes(x = long, 
                                 y = lat, 
                                 group = group), 
               fill = "white", 
               col = "black") +
  geom_point(aes(x = long, 
                 y = lat), 
             size = 3, 
             col  = "blue")

# Criando poligono
lat_map  <- df_map$lat
long_map <- df_map$long
cidade   <- df_map$Cidade

p2 <- ggplot2::ggplot(df_map) +
  geom_polygon(data  = shape, 
               aes(x = long, 
                   y = lat, 
                   group = group), 
               fill = "gray", 
               col  = "black") +
  geom_point(aes(x = long, 
                 y = lat), 
             size = 3, 
             col  = "blue") +
  coord_map(xlim = c(max(df_map$long + 0.5), min(df_map$long - 0.5)),
            ylim = c(max(df_map$lat + 0.5), min((df_map$lat - 0.5)))) + 
  annotate("text", x = long_map + 0.05, y = lat_map + 0.05, label = cidade, 
           fontface = "bold", size = 5)

p1

p2

levels(df_map$Cidade) <- c("Alagoinhas",
                           "Cruz das Almas",
                           "Feira de Santana",
                           "São Francisco do Conde",
                           "Salvador")

# https://gis.stackexchange.com/questions/225102/calculate-distance-between-points-and-nearest-polygon-in-r
# https://gis.stackexchange.com/questions/212879/what-are-the-coordinates-on-my-rectangle-of-the-closest-point-to-a-polygon-in-r
# https://stackoverflow.com/questions/51837454/r-measuring-distance-from-a-coastline


df_map <- df_map %>% 
  dplyr::select(long, lat)

df_map <- data.frame(Cidades = c("Alagoinhas x São Francisco do Conde",
                                 "Cruz das Almas x São Francisco do Conde",
                                 "Feira de Santana x São Francisco do Conde",
                                 "Salvador x São Francisco do Conde"),
                     Distancias_km = c(geosphere::distGeo(df_map[4, ], df_map[5, ]),
                                       geosphere::distGeo(df_map[2, ], df_map[5, ]),
                                       geosphere::distGeo(df_map[1, ], df_map[5, ]),
                                       geosphere::distGeo(df_map[3, ], df_map[5, ]))) %>% 
  dplyr::mutate(Distancias_km = round((Distancias_km / 1000), 2))



Análise Descritiva

modeling <- met_data %>% 
  dplyr::mutate(Dia = as.Date(date)) %>% 
  na.omit() %>% 
  dplyr::full_join(Data_real %>% 
                     dplyr::filter(is.na(mm_chuva)) %>% 
                     dplyr::mutate(mm_chuva = "xx"), 
                   by = "Dia") %>% 
  dplyr::select(-date) %>%
  dplyr::mutate(mm_chuva = ifelse(mm_chuva == "xx", 1, NA)) %>% #
  replace(is.na(.), 0) %>%  #
  dplyr::filter(mm_chuva == 0) %>% 
  dplyr::select(-mm_chuva)

modeling_EA <- modeling %>% 
  dplyr::left_join(Data_real, by = "Dia") %>% 
  dplyr::rename("SFC" = mm_chuva)

# Tema base para plots
tema <- theme_bw() +
  theme(axis.title   = element_text(color = "black", size = 11, face = "bold"),
        axis.text.x  = element_text(color = "black", size = 11, face = "bold"),
        axis.text.y  = element_text(color = "black", size = 11, face = "bold"),
        legend.text  = element_text(color = "black", size = 8,  face = "bold"),
        legend.title = element_text(color = "black", size = 9,  face = "bold"),
        plot.title   = element_text(color = "black", size = 14, face = "bold"))

# modeling_EA %>% 
#   dplyr::rename("São Francisco do Conde" = SFC) %>% 
#   reshape2::melt(id.vars = "Dia") %>%
#   dplyr::rename("Cidade" = variable,
#                 "Precipitacao" = value) %>% 
#   ggplot2::ggplot() +
#   geom_line(aes(x = Dia, y = Precipitacao, col = Cidade),
#             size = 1.2, alpha = 0.4) +
#   ylab("Precipitação") +
#   tema 
# 
# modeling_EA %>% 
#   reshape2::melt(id.vars = c("Dia", "SFC")) %>%
#   dplyr::rename("São Francisco do Conde" = SFC) %>% 
#   dplyr::rename("Cidade" = variable,
#                 "Precipitacao" = value) %>% 
#   ggplot2::ggplot() +
#   geom_line(aes(x = Dia, y = `São Francisco do Conde`, col = "S. Franc. Conde"),
#             size = 0.8, alpha = 0.6, col = "black") +
#   geom_line(aes(x = Dia, y = Precipitacao, col = Cidade),
#             size = 1.2, alpha = 0.4) +
#   facet_wrap(~Cidade) +
#   ylab("Precipitação") +
#   tema 
# 
# modeling_EA %>% 
#   reshape2::melt(id.vars = c("Dia", "SFC")) %>%
#   dplyr::rename("São Francisco do Conde" = SFC) %>% 
#   dplyr::rename("Cidade" = variable,
#                 "Precipitacao" = value) %>% 
#   ggplot2::ggplot() +
#   geom_point(aes(x = Precipitacao, y = `São Francisco do Conde`, col = Cidade),
#               alpha = 0.6) +
#   facet_wrap(~Cidade) +
#   ylab("Precipitação") +
#   tema 
  
 modeling_EA %>% 
  reshape2::melt(id.vars = c("Dia", "SFC")) %>%
  dplyr::rename("São Francisco do Conde" = SFC) %>% 
  dplyr::rename("Cidade" = variable,
                "Precipitacao" = value) %>% 
  ggplot2::ggplot() +
  geom_boxplot(aes(x = Cidade, y = Precipitacao, col = Cidade)) +
  ylab("Precipitação") +
  tema + theme(axis.text.x = element_text(angle = 90))

 K <- round(1 + 3.322 * log10(nrow(modeling)))
 
modeling_EA %>% 
  reshape2::melt(id.vars = c("Dia", "SFC")) %>%
  dplyr::rename("São Francisco do Conde" = SFC) %>% 
  dplyr::rename("Cidade" = variable,
                "Precipitacao" = value) %>% 
  ggplot2::ggplot() +
  geom_histogram(aes(x = Precipitacao,
                     y = ..density..), 
                 binwidth = .5,
                 bins = 9,
                 colour = "black", 
                 fill = "white") +
  geom_density(aes(x = Precipitacao), 
               alpha  = .2, 
               fill   = "#FF6666") +
  facet_wrap(~Cidade) +
  ylab("Precipitação") +
  tema



### IDW


Para verificação do idw, seis combinações de estações metereológicas serão testadas, sendo:
* Todas as estações; * Apenas as estações de Salvador e Cruz das Almas (mesamas classificações climatológicas);


# stations
# 1 = Alagoinhas
# 2 = Cruz
# 3 = FSA
# 4 = SSA

combinat <- base::expand.grid(1:4, 1:4, 1:4) %>% 
  dplyr::mutate(check = ifelse(Var1 == Var2 | Var1 == Var3 |
                                 Var2 == Var3 | Var3 == Var2 | 
                                 Var2 == Var1 | Var3 == Var1,
                               NA, "--")) %>% na.omit() %>% 
  dplyr::mutate(check = rowSums(.[1:3])) %>% 
  dplyr::distinct(check, .keep_all = T) %>% 
  dplyr::select(-check)

for (i in 1:length(stations)) {
  
  combinat[combinat == i] <- stations[i]
  
}





IDW

# Preparando dataset para validacao do modelo idw

# Data para validacção do método idw
modeling <- modeling


Para facilitar a manipulação de dados, e como a cross-validation será com \(k = 5\) (número de folds), o dataset de modelagem será particionado em cinco partes aleatórias.

modeling <- modeling %>% 
  dplyr::mutate(id = 1:nrow(modeling)) # atribuir coluna de identificacao

id <- modeling$id

{
  nfold <- 5
  set.seed(1)
  aleat_cv <- caret::createFolds(id, k = nfold, # criar folds para cv 
                                 list  = TRUE, returnTrain = FALSE)
}

#
aleat_cv_ <- list()

for(i in 1:nfold){
  
  df_x <- data.frame(id = aleat_cv[[i]]) # extrair folds
  
  df_x <- df_x %>% 
  dplyr::mutate(cod = rep("cv", nrow(df_x))) # Validação do id no proximo 'for'
  
  aleat_cv_[[paste0("aleat_cv_", i)]] <- df_x
  
}

#
modeling_cv <- list()

for(i in 1:nfold){
  
  modeling_cv_ <- modeling %>% 
  dplyr::full_join(aleat_cv_[[i]]) %>%  # validação do id
  na.omit() %>% 
  dplyr::select(-id, -cod) %>% 
  reshape2::melt(id.vars = "Dia") %>% 
  dplyr::mutate(Latitude = ifelse(variable == "Feira de Santana",
                                  -12.196111,
                                  ifelse(variable == "Cruz das Almas",
                                         -12.666667,
                                         ifelse(variable == "Salvador",
                                                -13.005278,
                                                ifelse(variable == "Alagoinhas",
                                                       -12.148333, "Erro")))),
                Longitude = ifelse(variable == "Feira de Santana",
                                   -38.967222,
                                   ifelse(variable == "Cruz das Almas",
                                          -39.083333,
                                         ifelse(variable == "Salvador",
                                                -38.505833,
                                                ifelse(variable == "Alagoinhas",
                                                       -38.425556, "Erro"))))) %>% 
  dplyr::select(-variable) %>% 
  dplyr::rename("Prec" = value) %>% 
  dplyr::mutate(Latitude  = as.numeric(Latitude),
                Longitude = as.numeric(Longitude))
  
  #
  modeling_cv[[paste0("cv", i)]] <- modeling_cv_
  
}

# head(modeling_cv$cv1, 3)


#### Cross-Validation do IDW e Estimativas para Quatro Estações

modeling_cv1_ <- modeling_cv1 <- data.frame()
modeling_cv2_ <- modeling_cv2 <- data.frame()
modeling_cv3_ <- modeling_cv3 <- data.frame()
modeling_cv4_ <- modeling_cv4 <- data.frame()
modeling_cv5_ <- modeling_cv5 <- data.frame()

for (i in 1:length(modeling_cv)) {
  
  df <- modeling_cv[[i]]
  assign(paste0("modeling_cv",i), df)
  assign(paste0("modeling_cv",i,"_"), df)
  
}

# Ajustando a localização de acordo com a projetação Darum WGS - 84
for(i in 1:nfold){
  
  sp::coordinates(modeling_cv[[i]]) <- ~Latitude+Longitude
  sp::proj4string(modeling_cv[[i]]) <- sp::CRS("+proj=longlat +datum=WGS84") 

}

dplyr::glimpse(modeling_cv[[1]])
## Formal class 'SpatialPointsDataFrame' [package "sp"] with 5 slots
##   ..@ data       :'data.frame':  200 obs. of  2 variables:
##   .. ..$ Dia : Date[1:200], format: "2011-01-05" ...
##   .. ..$ Prec: num [1:200] 34.3 0 0 0 0 0 0 0 0 0 ...
##   ..@ coords.nrs : int [1:2] 3 4
##   ..@ coords     : num [1:200, 1:2] -12.2 -12.2 -12.2 -12.2 -12.2 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   ..@ bbox       : num [1:2, 1:2] -13 -39.1 -12.1 -38.4
##   .. ..- attr(*, "dimnames")=List of 2
##   ..@ proj4string:Formal class 'CRS' [package "sp"] with 1 slot
# São Francisco do Conde
# Lozalização
SFC_lati  <- (-12.627778)
SFC_longe <- (-38.68)
df_SFC    <- data.frame(SFC_lati, SFC_longe)

sp::coordinates(df_SFC) <- ~SFC_lati+SFC_longe
sp::proj4string(df_SFC) <- sp::CRS("+proj=longlat +datum=WGS84")

#
idws <- list()
passo <- 0.5
idp_b <- seq(0.5, 4, passo) # potência do idw variando de 0.5 à 4 com passo de 0.5

# Variação de informação entre os folds!
Dia_1 <- modeling_cv1_$Dia
Dia_2 <- modeling_cv2_$Dia
Dia_3 <- modeling_cv3_$Dia 
Dia_4 <- modeling_cv4_$Dia
Dia_5 <- modeling_cv5_$Dia

df <- list(Dia_1, Dia_2, Dia_3, Dia_4, Dia_5)

lvl <- list()

for (i in 1:length(df)) {
  
  dfx <- as.data.frame(table(df[[i]]))
  dfx <- unique(dfx$Freq)
  
  lvl[[paste0("fold",i)]]  <- dfx
  
}

#
dfx <- c()

for (i in 1:nfold) {
  
  dfx[i] <- length(df[[i]]) / (lvl[[i]]) # lvl => level de datas
  
}

#

for(i in 1:length(idp_b)) { # Variacao da potencia
  
  for(x in 1:nfold){        # Variacao dos folds
    
    for(j in 1:(dfx[[x]])) {     # Regularizacao do tamanho dos folds
      
      modeling_cv_r <- modeling_cv[[x]] %>% # x   # list => Objeto class
                                                  # SpatialPointsDataFrame 
        dplyr::filter(Dia == Dia[j]) # j # Filter proporcionado pelo pacote spdplyr
      
      idw_ = gstat::idw(formula    = modeling_cv_r$Prec ~ 1, 
                         locations = modeling_cv_r,
                         newdata   = df_SFC, idp = idp_b[i]) # i
      
      idws[[paste0("cv",x,"_pot",i, "_Dia", j)]]  <- idw_$var1.pred
      
    }
  }
}
# Transformando uma lista em um tibble "pré-data frame"
idws <- tibble::enframe(idws)

# Ajustando data frame
idws <- idws %>% 
  tidyr::separate(name, into = c("x1", "x"), sep = 2) %>% 
  tidyr::separate(x,  into = c("Fold", "x"), sep = 1) %>% 
  tidyr::separate(x,  into = c("x", "x2"),   sep = 4) %>% 
  tidyr::separate(x2, into = c("Potencia", "x2"), sep = 2) %>% 
  tidyr::separate(x2, into = c("x2", "Dia"), sep = 3) %>% 
  dplyr::rename("Pred_Precipitacao" = value) %>% 
  dplyr::select(-x, -x1, -x2) %>% 
  as.data.frame() %>% 
  dplyr::mutate(Pred_Precipitacao = as.numeric(Pred_Precipitacao),
                Pred_Precipitacao = round(Pred_Precipitacao, 2))

idws$Potencia <- as.numeric(base::gsub("([0-9]+).*$", "\\1", idws$Potencia))
idws$Dia <- as.numeric(gsub("a", "", idws$Dia))

# View(idws)
#
Fold <- list()

for(i in 1:nfold){
  
  Foldz <- idws %>% 
    dplyr::filter(Fold == i)   # Filtrando valores preditos por fold
  
  Fold[[paste0("cv", i)]] <- Foldz
  
}

#
idp_bx <- base::gsub(".", "", idp_b, fixed = TRUE)
#
{
  {dfx1_05 <- dfx1_1 <- dfx1_15 <- dfx1_2 <- dfx1_25 <- dfx1_3 <- dfx1_35 <- dfx1_4  <-data.frame()}

  {dfx2_05 <- dfx2_1 <- dfx2_15 <- dfx2_2 <- dfx2_25 <- dfx2_3 <- dfx2_35 <- dfx2_4  <-data.frame()}

  {dfx3_05 <- dfx3_1 <- dfx3_15 <- dfx3_2 <- dfx3_25 <- dfx3_3 <- dfx3_35 <- dfx3_4  <-data.frame()}

  {dfx4_05 <- dfx4_1 <- dfx4_15 <- dfx4_2 <- dfx4_25 <- dfx4_3 <- dfx4_35 <- dfx4_4  <-data.frame()}

  {dfx5_05 <- dfx5_1 <- dfx5_15 <- dfx5_2 <- dfx5_25 <- dfx5_3 <- dfx5_35 <- dfx5_4  <-data.frame()}
}
Foldx <- list()

for(j in 1:nfold){
  
  for(i in 1:length(idp_b)){
    
    Foldz <- Fold[[j]]
    
    pot <- Foldz %>% 
      dplyr::filter(Potencia == i) #  Filtrando valores preditos por potência em 
                                   # em cada fold.
    
    Foldx[[paste0("cvF",j,"_pot", i * passo)]] <- pot
  
  }
  
}


dfx1_050 <- Foldx[["cvF1_pot0.5"]]

# head(dfx1_050, 7)
# View(dfx1_050)

#
# Logo:

for(j in 1:nfold){
  
  for(i in 1:length(idp_b)){
    
    Foldz <- Fold[[j]]
    
    pot <- Foldz %>% 
      dplyr::filter(Potencia == i) #  Filtrando valores preditos por potência em 
                                   # em cada fold.
      
    assign(paste0("dfx",j,"_",idp_bx[i]), pot)
  
  }
  
}

# table(modeling_cv1_$Dia)

dfx1 <- modeling_cv1_ %>% 
  dplyr::slice(1:(nrow(modeling_cv1_) / lvl[[1]] )) %>% 
  dplyr::select(Dia) %>% 
  dplyr::left_join(Data_real, by = "Dia") %>% # Dados observados
  dplyr::rename("Chuva_real" = mm_chuva) %>% # Chuva_real => Precipitação em SFC
  dplyr::mutate(dfx1_050 = dfx1_05$Pred_Precipitacao,
                dfx1_100 = dfx1_1$Pred_Precipitacao, 
                dfx1_150 = dfx1_15$Pred_Precipitacao,
                dfx1_200 = dfx1_2$Pred_Precipitacao,
                dfx1_250 = dfx1_25$Pred_Precipitacao,
                dfx1_300 = dfx1_3$Pred_Precipitacao,
                dfx1_350 = dfx1_35$Pred_Precipitacao,
                dfx1_400 = dfx1_4$Pred_Precipitacao)

dfx2 <- modeling_cv2_ %>% 
  dplyr::slice(1:(nrow(modeling_cv2_) / lvl[[2]] )) %>% 
  dplyr::select(Dia) %>% 
  dplyr::left_join(Data_real, by = "Dia") %>% # Dados observados
  dplyr::rename("Chuva_real" = mm_chuva) %>% # Chuva_real => Precipitação em SFC
  dplyr::mutate(dfx2_050 = dfx2_05$Pred_Precipitacao,
                dfx2_100 = dfx2_1$Pred_Precipitacao, 
                dfx2_150 = dfx2_15$Pred_Precipitacao,
                dfx2_200 = dfx2_2$Pred_Precipitacao,
                dfx2_250 = dfx2_25$Pred_Precipitacao,
                dfx2_300 = dfx2_3$Pred_Precipitacao,
                dfx2_350 = dfx2_35$Pred_Precipitacao,
                dfx2_400 = dfx2_4$Pred_Precipitacao)

dfx3 <- modeling_cv3_ %>% 
  dplyr::slice(1:(nrow(modeling_cv3_) / lvl[[3]] )) %>% 
  dplyr::select(Dia) %>% 
  dplyr::left_join(Data_real, by = "Dia") %>% # Dados observados
  dplyr::rename("Chuva_real" = mm_chuva) %>% # Chuva_real => Precipitação em SFC
  dplyr::mutate(dfx3_050 = dfx3_05$Pred_Precipitacao,
                dfx3_100 = dfx3_1$Pred_Precipitacao, 
                dfx3_150 = dfx3_15$Pred_Precipitacao,
                dfx3_200 = dfx3_2$Pred_Precipitacao,
                dfx3_250 = dfx3_25$Pred_Precipitacao,
                dfx3_300 = dfx3_3$Pred_Precipitacao,
                dfx3_350 = dfx3_35$Pred_Precipitacao,
                dfx3_400 = dfx3_4$Pred_Precipitacao)

dfx4 <- modeling_cv4_ %>% 
  dplyr::slice(1:(nrow(modeling_cv4_) / lvl[[4]] )) %>% 
  dplyr::select(Dia) %>% 
  dplyr::left_join(Data_real, by = "Dia") %>% # Dados observados
  dplyr::rename("Chuva_real" = mm_chuva) %>% # Chuva_real => Precipitação em SFC
  dplyr::mutate(dfx4_050 = dfx4_05$Pred_Precipitacao,
                dfx4_100 = dfx4_1$Pred_Precipitacao, 
                dfx4_150 = dfx4_15$Pred_Precipitacao,
                dfx4_200 = dfx4_2$Pred_Precipitacao,
                dfx4_250 = dfx4_25$Pred_Precipitacao,
                dfx4_300 = dfx4_3$Pred_Precipitacao,
                dfx4_350 = dfx4_35$Pred_Precipitacao,
                dfx4_400 = dfx4_4$Pred_Precipitacao)

dfx5 <- modeling_cv5_ %>% 
  dplyr::slice(1:(nrow(modeling_cv5_) / lvl[[5]] )) %>% 
  dplyr::select(Dia) %>% 
  dplyr::left_join(Data_real, by = "Dia") %>% # Dados observados
  dplyr::rename("Chuva_real" = mm_chuva) %>% # Chuva_real => Precipitação em SFC
  dplyr::mutate(dfx5_050 = dfx5_05$Pred_Precipitacao,
                dfx5_100 = dfx5_1$Pred_Precipitacao, 
                dfx5_150 = dfx5_15$Pred_Precipitacao,
                dfx5_200 = dfx5_2$Pred_Precipitacao,
                dfx5_250 = dfx5_25$Pred_Precipitacao,
                dfx5_300 = dfx5_3$Pred_Precipitacao,
                dfx5_350 = dfx5_35$Pred_Precipitacao,
                dfx5_400 = dfx5_4$Pred_Precipitacao)


Realizado a manipulação de dados para o fold, serão calculados as métricas \(RMSE\) e \(MAE\) para posterior tomada de decisão. Para tal, o pacote Metrics será utilizado a fim de facilitar a programação.

erro_fold1 <- dfx1 %>% 
  dplyr::mutate(b050_rmse = Metrics::rmse(dfx1$Chuva_real, dfx1$dfx1_050),
                b100_rmse = Metrics::rmse(dfx1$Chuva_real, dfx1$dfx1_100),
                b150_rmse = Metrics::rmse(dfx1$Chuva_real, dfx1$dfx1_150),
                b200_rmse = Metrics::rmse(dfx1$Chuva_real, dfx1$dfx1_200),
                b250_rmse = Metrics::rmse(dfx1$Chuva_real, dfx1$dfx1_250),
                b300_rmse = Metrics::rmse(dfx1$Chuva_real, dfx1$dfx1_300),
                b350_rmse = Metrics::rmse(dfx1$Chuva_real, dfx1$dfx1_350),
                b400_rmse = Metrics::rmse(dfx1$Chuva_real, dfx1$dfx1_400),
                
                b050_mae = Metrics::mae(dfx1$Chuva_real, dfx1$dfx1_050),
                b100_mae = Metrics::mae(dfx1$Chuva_real, dfx1$dfx1_100),
                b150_mae = Metrics::mae(dfx1$Chuva_real, dfx1$dfx1_150),
                b200_mae = Metrics::mae(dfx1$Chuva_real, dfx1$dfx1_200),
                b250_mae = Metrics::mae(dfx1$Chuva_real, dfx1$dfx1_250),
                b300_mae = Metrics::mae(dfx1$Chuva_real, dfx1$dfx1_300),
                b350_mae = Metrics::mae(dfx1$Chuva_real, dfx1$dfx1_350),
                b400_mae = Metrics::mae(dfx1$Chuva_real, dfx1$dfx1_400)) %>% 
  dplyr::select(b050_rmse, b100_rmse, b150_rmse, b200_rmse,
                b250_rmse, b300_rmse, b350_rmse, b400_rmse,
                
                b050_mae, b100_mae, b150_mae, b200_mae,
                b250_mae, b300_mae, b350_mae, b400_mae) %>% 
  dplyr::slice(1) %>% 
  reshape2::melt() %>% 
  dplyr::rename("IDW"  = variable,
                "Valor" = value) %>% 
  tidyr::separate(IDW, c("IDW", "Erro"))

# levels(erro_fold1$IDW)
erro_fold1$IDW <- as.factor(erro_fold1$IDW)

levels(erro_fold1$IDW) <- c("b = 0.50", "b = 1.00", "b = 1.50", "b = 2.00",
                            "b = 2.50", "b = 3.00", "b = 3.50", "b = 4.00")

# levels(erro_fold1$Erro)
erro_fold1$Erro <- as.factor(erro_fold1$Erro)

levels(erro_fold1$Erro) <- c("MAE", "RMSE")

erro_fold1 %>% 
  ggplot2::ggplot() + 
  geom_bar(aes(x = IDW,
               y = Valor,
               fill = IDW),
           col  = "black",
           stat = "identity",
           show.legend = F) +
  facet_wrap(~ Erro, scales = "free") +
  scale_fill_viridis_d() +
  guides(fill = guide_legend(title = "IDW")) +
  ggtitle("Fold 01") +
  xlab("Potência `b`") + ylab("RMSE") +
  tema + theme(axis.text.x     = element_text(angle = 90))

erro_fold1x <- erro_fold1 %>% 
  tidyr::spread(Erro, Valor)

p1 <- erro_fold1x %>% 
  ggplot2::ggplot() + 
  geom_point(aes(x = RMSE,
                 y = MAE,
               fill = IDW),
           col  = "black",
           size = 3,
           stroke = 1.2,
           shape  = 22) +
  scale_fill_viridis_d() +
  guides(fill = guide_legend(title = "IDW")) +
  ggtitle("Fold 01") +
  xlab("RMSE") + ylab("MAE") +
  tema
erro_fold2 <- dfx2 %>% 
  dplyr::mutate(b050_rmse = Metrics::rmse(dfx2$Chuva_real, dfx2$dfx2_050),
                b100_rmse = Metrics::rmse(dfx2$Chuva_real, dfx2$dfx2_100),
                b150_rmse = Metrics::rmse(dfx2$Chuva_real, dfx2$dfx2_150),
                b200_rmse = Metrics::rmse(dfx2$Chuva_real, dfx2$dfx2_200),
                b250_rmse = Metrics::rmse(dfx2$Chuva_real, dfx2$dfx2_250),
                b300_rmse = Metrics::rmse(dfx2$Chuva_real, dfx2$dfx2_300),
                b350_rmse = Metrics::rmse(dfx2$Chuva_real, dfx2$dfx2_350),
                b400_rmse = Metrics::rmse(dfx2$Chuva_real, dfx2$dfx2_400),
                
                b050_mae = Metrics::mae(dfx2$Chuva_real, dfx2$dfx2_050),
                b100_mae = Metrics::mae(dfx2$Chuva_real, dfx2$dfx2_100),
                b150_mae = Metrics::mae(dfx2$Chuva_real, dfx2$dfx2_150),
                b200_mae = Metrics::mae(dfx2$Chuva_real, dfx2$dfx2_200),
                b250_mae = Metrics::mae(dfx2$Chuva_real, dfx2$dfx2_250),
                b300_mae = Metrics::mae(dfx2$Chuva_real, dfx2$dfx2_300),
                b350_mae = Metrics::mae(dfx2$Chuva_real, dfx2$dfx2_350),
                b400_mae = Metrics::mae(dfx2$Chuva_real, dfx2$dfx2_400)) %>% 
  dplyr::select(b050_rmse, b100_rmse, b150_rmse, b200_rmse,
                b250_rmse, b300_rmse, b350_rmse, b400_rmse,
                
                b050_mae, b100_mae, b150_mae, b200_mae,
                b250_mae, b300_mae, b350_mae, b400_mae) %>% 
  dplyr::slice(1) %>% 
  reshape2::melt() %>% 
  dplyr::rename("IDW"  = variable,
                "Valor" = value) %>% 
  tidyr::separate(IDW, c("IDW", "Erro"))

erro_fold2$IDW <- as.factor(erro_fold2$IDW)

levels(erro_fold2$IDW) <- c("b = 0.50", "b = 1.00", "b = 1.50", "b = 2.00",
                            "b = 2.50", "b = 3.00", "b = 3.50", "b = 4.00")

erro_fold2$Erro <- as.factor(erro_fold2$Erro)

levels(erro_fold2$Erro) <- c("MAE", "RMSE")

erro_fold2x <- erro_fold2 %>% 
  tidyr::spread(Erro, Valor)

p2 <- erro_fold2x %>% 
  ggplot2::ggplot() + 
  geom_point(aes(x = RMSE,
                 y = MAE,
               fill = IDW),
           col  = "black",
           size = 3,
           stroke = 1.2,
           shape  = 22) +
  scale_fill_viridis_d() +
  guides(fill = guide_legend(title = "IDW")) +
  ggtitle("Fold 02") +
  xlab("RMSE") + ylab("MAE") +
  tema + theme(legend.position = "bottom")

#

erro_fold3 <- dfx3 %>% 
  dplyr::mutate(b050_rmse = Metrics::rmse(dfx3$Chuva_real, dfx3$dfx3_050),
                b100_rmse = Metrics::rmse(dfx3$Chuva_real, dfx3$dfx3_100),
                b150_rmse = Metrics::rmse(dfx3$Chuva_real, dfx3$dfx3_150),
                b200_rmse = Metrics::rmse(dfx3$Chuva_real, dfx3$dfx3_200),
                b250_rmse = Metrics::rmse(dfx3$Chuva_real, dfx3$dfx3_250),
                b300_rmse = Metrics::rmse(dfx3$Chuva_real, dfx3$dfx3_300),
                b350_rmse = Metrics::rmse(dfx3$Chuva_real, dfx3$dfx3_350),
                b400_rmse = Metrics::rmse(dfx3$Chuva_real, dfx3$dfx3_400),
                
                b050_mae = Metrics::mae(dfx3$Chuva_real, dfx3$dfx3_050),
                b100_mae = Metrics::mae(dfx3$Chuva_real, dfx3$dfx3_100),
                b150_mae = Metrics::mae(dfx3$Chuva_real, dfx3$dfx3_150),
                b200_mae = Metrics::mae(dfx3$Chuva_real, dfx3$dfx3_200),
                b250_mae = Metrics::mae(dfx3$Chuva_real, dfx3$dfx3_250),
                b300_mae = Metrics::mae(dfx3$Chuva_real, dfx3$dfx3_300),
                b350_mae = Metrics::mae(dfx3$Chuva_real, dfx3$dfx3_350),
                b400_mae = Metrics::mae(dfx3$Chuva_real, dfx3$dfx3_400)) %>% 
  dplyr::select(b050_rmse, b100_rmse, b150_rmse, b200_rmse,
                b250_rmse, b300_rmse, b350_rmse, b400_rmse,
                
                b050_mae, b100_mae, b150_mae, b200_mae,
                b250_mae, b300_mae, b350_mae, b400_mae) %>% 
  dplyr::slice(1) %>% 
  reshape2::melt() %>% 
  dplyr::rename("IDW"  = variable,
                "Valor" = value) %>% 
  tidyr::separate(IDW, c("IDW", "Erro"))

erro_fold3$IDW <- as.factor(erro_fold3$IDW)

levels(erro_fold3$IDW) <- c("b = 0.50", "b = 1.00", "b = 1.50", "b = 2.00",
                            "b = 2.50", "b = 3.00", "b = 3.50", "b = 4.00")

erro_fold3$Erro <- as.factor(erro_fold3$Erro)

levels(erro_fold3$Erro) <- c("MAE", "RMSE")

erro_fold3x <- erro_fold3 %>% 
  tidyr::spread(Erro, Valor)

p3 <- erro_fold3x %>% 
  ggplot2::ggplot() + 
  geom_point(aes(x = RMSE,
                 y = MAE,
               fill = IDW),
           col  = "black",
           size = 3,
           stroke = 1.2,
           shape  = 22) +
  scale_fill_viridis_d() +
  guides(fill = guide_legend(title = "IDW")) +
  ggtitle("Fold 03") +
  xlab("RMSE") + ylab("MAE") +
  tema + theme(legend.position = "bottom")

#
erro_fold4 <- dfx4 %>% 
  dplyr::mutate(b050_rmse = Metrics::rmse(dfx4$Chuva_real, dfx4$dfx4_050),
                b100_rmse = Metrics::rmse(dfx4$Chuva_real, dfx4$dfx4_100),
                b150_rmse = Metrics::rmse(dfx4$Chuva_real, dfx4$dfx4_150),
                b200_rmse = Metrics::rmse(dfx4$Chuva_real, dfx4$dfx4_200),
                b250_rmse = Metrics::rmse(dfx4$Chuva_real, dfx4$dfx4_250),
                b300_rmse = Metrics::rmse(dfx4$Chuva_real, dfx4$dfx4_300),
                b350_rmse = Metrics::rmse(dfx4$Chuva_real, dfx4$dfx4_350),
                b400_rmse = Metrics::rmse(dfx4$Chuva_real, dfx4$dfx4_400),
                
                b050_mae = Metrics::mae(dfx4$Chuva_real, dfx4$dfx4_050),
                b100_mae = Metrics::mae(dfx4$Chuva_real, dfx4$dfx4_100),
                b150_mae = Metrics::mae(dfx4$Chuva_real, dfx4$dfx4_150),
                b200_mae = Metrics::mae(dfx4$Chuva_real, dfx4$dfx4_200),
                b250_mae = Metrics::mae(dfx4$Chuva_real, dfx4$dfx4_250),
                b300_mae = Metrics::mae(dfx4$Chuva_real, dfx4$dfx4_300),
                b350_mae = Metrics::mae(dfx4$Chuva_real, dfx4$dfx4_350),
                b400_mae = Metrics::mae(dfx4$Chuva_real, dfx4$dfx4_400)) %>% 
  dplyr::select(b050_rmse, b100_rmse, b150_rmse, b200_rmse,
                b250_rmse, b300_rmse, b350_rmse, b400_rmse,
                
                b050_mae, b100_mae, b150_mae, b200_mae,
                b250_mae, b300_mae, b350_mae, b400_mae) %>% 
  dplyr::slice(1) %>% 
  reshape2::melt() %>% 
  dplyr::rename("IDW"  = variable,
                "Valor" = value) %>% 
  tidyr::separate(IDW, c("IDW", "Erro"))

erro_fold4$IDW <- as.factor(erro_fold4$IDW)

levels(erro_fold4$IDW) <- c("b = 0.50", "b = 1.00", "b = 1.50", "b = 2.00",
                            "b = 2.50", "b = 3.00", "b = 3.50", "b = 4.00")

erro_fold4$Erro <- as.factor(erro_fold4$Erro)

levels(erro_fold4$Erro) <- c("MAE", "RMSE")

erro_fold4x <- erro_fold4 %>% 
  tidyr::spread(Erro, Valor)

p4 <- erro_fold4x %>% 
  ggplot2::ggplot() + 
  geom_point(aes(x = RMSE,
                 y = MAE,
               fill = IDW),
           col  = "black",
           size = 3,
           stroke = 1.2,
           shape  = 22) +
  scale_fill_viridis_d() +
  guides(fill = guide_legend(title = "IDW")) +
  ggtitle("Fold 04") +
  xlab("RMSE") + ylab("MAE") +
  tema + theme(legend.position = "bottom")

#
erro_fold5 <- dfx5 %>% 
  dplyr::mutate(b050_rmse = Metrics::rmse(dfx5$Chuva_real, dfx5$dfx5_050),
                b100_rmse = Metrics::rmse(dfx5$Chuva_real, dfx5$dfx5_100),
                b150_rmse = Metrics::rmse(dfx5$Chuva_real, dfx5$dfx5_150),
                b200_rmse = Metrics::rmse(dfx5$Chuva_real, dfx5$dfx5_200),
                b250_rmse = Metrics::rmse(dfx5$Chuva_real, dfx5$dfx5_250),
                b300_rmse = Metrics::rmse(dfx5$Chuva_real, dfx5$dfx5_300),
                b350_rmse = Metrics::rmse(dfx5$Chuva_real, dfx5$dfx5_350),
                b400_rmse = Metrics::rmse(dfx5$Chuva_real, dfx5$dfx5_400),
                
                b050_mae = Metrics::mae(dfx5$Chuva_real, dfx5$dfx5_050),
                b100_mae = Metrics::mae(dfx5$Chuva_real, dfx5$dfx5_100),
                b150_mae = Metrics::mae(dfx5$Chuva_real, dfx5$dfx5_150),
                b200_mae = Metrics::mae(dfx5$Chuva_real, dfx5$dfx5_200),
                b250_mae = Metrics::mae(dfx5$Chuva_real, dfx5$dfx5_250),
                b300_mae = Metrics::mae(dfx5$Chuva_real, dfx5$dfx5_300),
                b350_mae = Metrics::mae(dfx5$Chuva_real, dfx5$dfx5_350),
                b400_mae = Metrics::mae(dfx5$Chuva_real, dfx5$dfx5_400)) %>% 
  dplyr::select(b050_rmse, b100_rmse, b150_rmse, b200_rmse,
                b250_rmse, b300_rmse, b350_rmse, b400_rmse,
                
                b050_mae, b100_mae, b150_mae, b200_mae,
                b250_mae, b300_mae, b350_mae, b400_mae) %>% 
  dplyr::slice(1) %>% 
  reshape2::melt() %>% 
  dplyr::rename("IDW"  = variable,
                "Valor" = value) %>% 
  tidyr::separate(IDW, c("IDW", "Erro"))

erro_fold5$IDW <- as.factor(erro_fold5$IDW)

levels(erro_fold5$IDW) <- c("b = 0.50", "b = 1.00", "b = 1.50", "b = 2.00",
                            "b = 2.50", "b = 3.00", "b = 3.50", "b = 4.00")

erro_fold5$Erro <- as.factor(erro_fold5$Erro)

levels(erro_fold5$Erro) <- c("MAE", "RMSE")

erro_fold5x <- erro_fold5 %>% 
  tidyr::spread(Erro, Valor)

p5 <- erro_fold5x %>% 
  ggplot2::ggplot() + 
  geom_point(aes(x = RMSE,
                 y = MAE,
               fill = IDW),
           col  = "black",
           size = 3,
           stroke = 1.2,
           shape  = 22) +
  scale_fill_viridis_d() +
  guides(fill = guide_legend(title = "IDW")) +
  ggtitle("Fold 05") +
  xlab("RMSE") + ylab("MAE") +
  tema + theme(legend.position = "bottom")

xplot <- cowplot::plot_grid(p1 + theme(legend.position = "none"),
                            p2 + theme(legend.position = "none"),
                            p3 + theme(legend.position = "none"),
                            p4 + theme(legend.position = "none"),
                            p5 + theme(legend.position = "none"),
                            align = 'vh',
                            hjust = -1, nrow = 1)

legend <- cowplot::get_legend(p1)

xplot  <- cowplot::plot_grid(xplot, legend, rel_widths = c(5, .3), nrow = 1)

shiny::div(plotly::ggplotly(xplot), align = "center")
erro_fold <- erro_fold1 %>% 
  dplyr::rename("CV1" = Valor) %>% 
  dplyr::mutate(CV2 = erro_fold2$Valor,
                CV3 = erro_fold3$Valor,
                CV4 = erro_fold4$Valor,
                CV5 = erro_fold5$Valor) %>% 
  reshape2::melt(id.vars = c("IDW", "Erro")) %>% 
  dplyr::rename("Fold" = variable,
                "Valor" = value)

plotx <- erro_fold %>%
  ggplot2::ggplot() +
  geom_line(aes(x = Fold,
                y = Valor,
                col = IDW,
                group = IDW)) +
  facet_wrap(~Erro, scales = "free") +
  ggtitle("Erros na Validação Cruzada") +
  xlab("Folds") + ylab("Valor dos Erros") +
  tema

shiny::div(plotly::ggplotly(plotx), align = "center")




Autor:
Brenner Silva;
****