Aplicação de IDW para escolha da melhor potência.
Os dados utilizados foram obtidos utilizando web scraping a partir do API inmtetr
.
if (!require("pacman")) install.packages("pacman")
pacman::p_load(dplyr, tidyr, tibble, stringr, gstat, sp, spm, raster,
rasterVis, ggplot2, spdplyr, Metrics, viridis,
gridExtra, grid, caret, sf, rgdal, mapproj, geosphere,
spdep, cowplot, plyr,
DT, plotly, shiny)
## package 'spdep' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\BRENNER BIASI\AppData\Local\Temp\RtmpKA7uNS\downloaded_packages
# if(!require("devtools")) install.packages("devtools") ; library(devtools) # Necessário para o pacote inmetr
if(!require("inmetr")) devtools::install_github('lhmet/inmetr') ; library(inmetr)
# O pacote DT foi utilizado exclusivamente para criação de tabelas dinâmicas,
# facilitando a demonstração de alguns dados utilizados.
# Os pacotes plotly e shiny são utilizados apenas para promover uma análise mais interativa
# dos visitantes com alguns plots.
dataset <- dataset # Banco de dados
head(dataset, 5)
## Data_1 Group.1 Dia Hora LI73002 LI73003 LIC7309 LIC7307
## 1 2011-01-01 01/01/2011 NA NA 31.23821 27.08799 10.560590 18.55438
## 2 2011-01-02 02/01/2011 NA NA 27.38272 29.53459 9.742799 25.35912
## 3 2011-01-03 03/01/2011 NA NA 22.70694 34.24997 19.677336 22.84778
## 4 2011-01-04 04/01/2011 NA NA 25.61978 41.52428 39.178556 26.50447
## 5 2011-01-05 05/01/2011 NA NA 29.52663 45.92729 65.202267 23.53899
## LIC7310 LIC7306 LIC7315 LI73017 LI73016 Data
## 1 36.60356 63.59569 53.25386 52.45145 38.91401 2011-01-01 11:59:30
## 2 36.10735 65.15974 61.28047 51.83789 35.23613 2011-01-02 11:59:30
## 3 57.36837 63.20326 59.59342 49.65450 42.12386 2011-01-03 11:59:30
## 4 75.08241 73.28996 71.88722 67.27992 38.36544 2011-01-04 11:59:30
## 5 74.28950 62.11171 77.35249 63.57197 40.99165 2011-01-05 11:59:30
## Quan_chu Chuva Ind_chu
## 1 0 Não 0
## 2 0 Não 0
## 3 0 Não 0
## 4 0 Não 0
## 5 0 Não 0
O objeto met_data
deve ser gerado para pleno desenvolvimento do script. Este objeto foi gerado para não expor informacoes pessoais quanto ao login no BDMEP/INMET. O código do objeto met_data
foi gerado com dput()
.
* Fica a sugestão de utilzar a opção Hide
no chunck
abaixo, de modo a tornar mais agradável a visualização do script.
{
met_data <- structure(list(date = structure(c(1293883200, 1293969600, 1294056000, 1294142400,
1294228800, 1294315200, 1294401600, 1294488000,
1294574400, 1294660800, 1294747200, 1294833600,
1294920000, 1295006400, 1295092800, 1295179200,
1295265600, 1295352000, 1295438400, 1295524800,
1295611200, 1295697600, 1295784000, 1295870400,
1295956800, 1296043200, 1296129600, 1296216000,
1296302400, 1296388800, 1296475200, 1296561600,
1296648000, 1296734400, 1296820800, 1296907200,
1296993600, 1297080000, 1297166400, 1297252800,
1297339200, 1297425600, 1297512000, 1297598400,
1297684800, 1297771200, 1297857600, 1297944000,
1298030400, 1298116800, 1298203200, 1298289600,
1298376000, 1298462400, 1298548800, 1298635200,
1298721600, 1298808000, 1298894400, 1298980800,
1299067200, 1299153600, 1299240000, 1299326400,
1299412800, 1299499200, 1299585600, 1299672000,
1299758400, 1299844800, 1299931200, 1300017600,
1300104000, 1300190400, 1300276800, 1300363200,
1300449600, 1300536000, 1300622400, 1300708800,
1300795200, 1300881600, 1300968000, 1301054400,
1301140800, 1301227200, 1301313600, 1301400000,
1301486400, 1301572800, 1301659200, 1301745600,
1301832000, 1301918400, 1302004800, 1302091200,
1302177600, 1302264000, 1302350400, 1302436800,
1302523200, 1302609600, 1302696000, 1302782400,
1302868800, 1302955200, 1303041600, 1303128000,
1303214400, 1303300800, 1303387200, 1303473600,
1303560000, 1303646400, 1303732800, 1303819200,
1303905600, 1303992000, 1304078400, 1304164800,
1304251200, 1304337600, 1304424000, 1304510400,
1304596800, 1304683200, 1304769600, 1304856000,
1304942400, 1305028800, 1305115200, 1305201600,
1305288000, 1305374400, 1305460800, 1305547200,
1305633600, 1305720000, 1305806400, 1305892800,
1305979200, 1306065600, 1306152000, 1306238400,
1306324800, 1306411200, 1306497600, 1306584000,
1306670400, 1306756800, 1306843200, 1306929600,
1307016000, 1307102400, 1307188800, 1307275200,
1307361600, 1307448000, 1307534400, 1307620800,
1307707200, 1307793600, 1307880000, 1307966400,
1308052800, 1308139200, 1308225600, 1308312000,
1308398400, 1308484800, 1308571200, 1308657600,
1308744000, 1308830400, 1308916800, 1309003200,
1309089600, 1309176000, 1309262400, 1309348800,
1309435200, 1309521600, 1309608000, 1309694400,
1309780800, 1309867200, 1309953600, 1310040000,
1310126400, 1310212800, 1310299200, 1310385600,
1310472000, 1310558400, 1310644800, 1310731200,
1310817600, 1310904000, 1310990400, 1311076800,
1311163200, 1311249600, 1311336000, 1311422400,
1311508800, 1311595200, 1311681600, 1311768000,
1311854400, 1311940800, 1312027200, 1312113600,
1312200000, 1312286400, 1312372800, 1312459200,
1312545600, 1312632000, 1312718400, 1312804800,
1312891200, 1312977600, 1313064000, 1313150400,
1313236800, 1313323200, 1313409600, 1313496000,
1313582400, 1313668800, 1313755200, 1313841600,
1313928000, 1314014400, 1314100800, 1314187200,
1314273600, 1314360000, 1314446400, 1314532800,
1314619200, 1314705600, 1314792000, 1314878400,
1314964800, 1315051200, 1315137600, 1315224000,
1315310400, 1315396800, 1315483200, 1315569600,
1315656000, 1315742400, 1315828800, 1315915200,
1316001600),
class = c("POSIXct", "POSIXt"), tzone = "UTC"),
`Feira de Santana` = c(0, 0, 0, 0, 34.3, 0, 0, 3.8, 5.2, 0, 0, 24,
0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3.7, 1.3, 0,
0, 2, 0, 0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0.8, 14.3, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 2.9, 0, 1.6, 100, 0, 0, 0, 0,
0, 22, 8.3, 14, 0, 0, 1.2, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 10, 0, 11, 0, 3.2, 3.1, 0.4, 0,
0, 26.4, 2.5, 3.5, 0, 2.5, 2, 5.6, 0, 0, 0,
0.4, 0, 43.5, 3.2, 2.3, 0, 0.2, 0, 0, 0.6,
0, 0, 1.1, 0, 0, 0.4, 2.5, 5.9, 0, 0, 0, 0,
5, 4.2, 3.4, 1.2, 0.3, 7.1, 0, 0, 0.1, 0, 0,
0, 0.8, 0, 1.7, 5.9, 1.2, 0, 8.8, 2.8, 0, 0,
0, 0, 0, 11.7, 5.4, 12.5, 4.3, 4.6, 0.4, 2.2,
0.5, 0, 0, 0, 0.3, 0, 0, 0, 0, 0, 2.7, 2.3,
0.1, 0.1, 0.1, 0, 0, 0, 0, 0, 5.7, 0, 0, 0,
0, 0, 0, 0, 0, 4, 1.2, 0, 0, 0, 0.7, 2.7, 0,
0.7, 2.2, 0.5, 0, 1.2, 4.4, 0, 0, 0, 0, 5, 0.9,
0, 0, 0, 0, 0, 1.2, 1.2, 0, 0.2, 3.2, 0, 2.6,
0, 0, 0, 1.8, 4.7, 0.2, 0, 0, 0, 0, 0, 0, 0, 0,
0, 6.2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
`Cruz das Almas` = c(0, 0, 0, 0, 15.4, 1.2, 0, 13.1, 3.4, 0, 0, 9.3,
0.1, 0, 0, 0, 0, 0, 0, 0.4, 2, 4, 4.6, 5.8, 9.4,
0.6, 0, 0.4, 4.7, 0.2, 0.1, 0, 0, 2.8, 0, 0, 0,
0, 1.2, 15.9, 0, 1.7, 0, 0, 0, 0.1, 0, 6.5, 8.1,
14.6, 1.4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 2, 5.8, 0, 0.2, 0, 0, 0,
0, 0, 0, 28.6, 20, 1.3, 0.2, 11.4, 13, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 16.9, 3.4, 20.2, 0, 17.8,
3.6, 5, 3.3, 3.4, 28.5, 5.9, 15.6, 6.6, 51, 24.2,
11.3, 0.2, 0.1, 0.2, 0.8, 3.5, 67.8, 8.4, 10.4,
0.8, 2, 0.7, 7, 10.6, 19.5, 0, 5.8, 0, 14, 3.8,
0.2, 0, 1.4, 0.4, 0, 10.9, 11.2, 3.8, 6, 9.1, 4.3,
11.1, 0, 0.2, 0, 0.1, 0.1, 0, 1.6, 1.3, 3.5, 2,
11.2, 0, 1, 8.2, 0, 0, 0, 0.2, 0, 38.3, 30.4,
19.8, 0, 11.4, 1, 3.4, 0.5, 0, 0, 0, 0.2, 2.2,
0.3, 0, 0.5, 0, 23.6, 1.1, 4.5, 2.6, 2.1, 0,
0, 0.5, 0, 0.1, 7, 0, 0, 0, 0, 0, 0, 0, 0.1,
8.6, 0, 0.3, 0, 0, 6.8, 3.6, 2, 4.2, 11.2, 2.2,
2.8, 4.6, 0, 0, 0, 0, 0, 22.4, 1.6, 0, 0, 4.6,
0, 0, 6.1, 0.5, 0, 7.6, 5.7, 1.5, 11.8, 0, 0,
0, 5.1, 1.2, 0, 8.9, 5, 0.4, 5.3, 5, 0, 0, 0,
10.6, 8.2, 0, 0.1, 0, 0.1, 0, 0, 0, 0, 0, 0),
Salvador = c(0, 0, 0, 0, 0, 0, 6.6, 12.4, 1.1, 0, 0, 31.6, 4, 6, 0,
0, 16.2, 0, 6.3, 1.5, 16.9, 0.1, 0.9, 1.3, 10.8, 1.5,
39.7, 4, 7.6,1.8, 0, 0, 0.1, 0, 0, 0, 0, 0, 0.6, 0, 1.4,
0, 0, 0.1, 0.5, 0.7, 0, 0.4, 5.8, 1.4, 0, 0, 0.8, 0, 0,
0, 0, 34.2, 0.2, 0, 0, 12.7, 0, 1.1, 0, 0, 0, 5, 0, 0,
0, 0, 6.8, 0, 0, 0, 0, 79, 0.3, 14.7, 2.4, 37.3, 10.3,
8, 0.4, 10.4, 12.1, 0, 0, 0.2, 0, 0, 0, 0, 0.2, 0, 0,
13.2, 8.2, 22.2, 1, 20, 1.4, 4.4, 18.5, 3.3, 4.6, 2.2,
7.1, 5.6, 42.8, 7.9, 4.2, 8.5, 0.2, 1.7, 1.2, 3.8, 97.6,
51.2, 32.7, 0.6, 9.1, 1, 8.4, 3.2, 29.6, 0, 1.4, 1, 8,
15.7, 11.9, 13.8, 3.6, 21.3, 0, 22.5, 82.4, 17, 2.2, 1.8,
7.5, 4.2, 0, 0.7, 0.5, 0, 1, 1, 2.1, 0, 5.8, 15.4, 0.1,
0, 15.6, 4, 0, 0, 0, 0, 26.6, 32.2, 22.1, 20.3, 1.3,
31.2, 10.6, 51, 1.6, 0, 1.8, 5.6, 6.4, 1.4, 0, 0, 0, 0,
24.7, 0.2, 6.9, 0.7, 1, 0, 0, 0, 0, 0.2, 9.2, 0, 1.1,
0, 0.3, 0, 0, 0, 0, 10, 0, 1, 0, 0, 0.3, 2.6, 0, 3.1,
5.9, 6.3, 3.3, 5.5, 0, 0, 0, 0, 0, 11, 3, 0, 4.2, 0.2,
0, 0, 20.8, 0, 0, 9.7, 0, 5.8, 6.6, 2.7, 3.1, 0.8, 9.1,
2, 3.8, 1.3, 5.8, 1, 0, 0.4, 0, 0, 0, 1.6, 0.8, 0, 0,
0.1, 0, 0, 6.1, 0, 0, 0, 0),
Alagoinhas = c(0, 0, 0, 0, 1.1, 3.4, 0, 3.1, 21.9, 3.7, 0, 121.7, 0,
0, 0, 0, 0, 0, 0, 0, 8.6, 2.3, 6.9, 0.4, 2.3, 0.3,
3.2, 1.4, 0, 0, 0, 0, 0, 0, 0.2, 0, 0, 0, 0, 0, 2.3,
0.2, 0, 0, 0, 0.7, 0.5, 5.1, 2.4, 2.4, 1.6, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1.9, 0, 0, 0, 0.3, 0, 0, 1.2, 4.2, 5.7, 9, 0, 0.6,
1.9, 3.2, 35.3, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 12.2,
7.2, 5.5, 0, 3.6, 0.5, 1, 3.4, 1.1, 23.7, 4.2, 3.7,
0.2, 6.9, 8.1, 5.1, 0.8, 3.2, 0, 5, 7.8, 67.9, 9.6,
11.3, 3, 6.8, 0.7, 3.3, 2.5, 0.2, 1, 0, 0, 4.2, 2.5,
3.1, 5.9, 0, 0, 1.7, 0, 10.2, 6.3, 1.3, 2.6, 5.3,
37.2, 6, 0.7, 0, 0, 0, 0, 0, 0, 4.8, 4.5, 13.8, 0,
13.4, 0.9, 0, 0, 0, 0, 0, 27.9, 9.5, 11, 13.2, 10.7,
2.3, 0, 1.4, 0, 0, 0, 3.1, 0.2, 0, 0, 0, 0, 6.9, 0.4,
2.3, 0.6, 0.4, 0, 0, 0, 0, 0, 4.4, 0, 0.2, 1.4, 0.5,
0, 0, 0, 0, 9.5, 0.4, 1.6, 0, 0, 6.3, 4.8, 5.1, 0,
1.7, 6.1, 0.2, 12.1, 5.6, 0, 0, 0, 0, 24.8, 2.7, 0,
1.6, 0, 0, 0, 3.2, 4.9, 0, 1.6, 1.8, 3, 0.7, 0, 0.5,
0, 4.8, 5.8, 4.6, 0.9, 0, 0, 0, 0, 0, 0, 0, 0, 2.5,
0.7, 0, 0, 0, 0, 0.3, 0, 0, 0, 0.4)),
class = c("tbl_df", "tbl", "data.frame"),
row.names = c(NA, -257L))
}
dataset <- dataset %>%
dplyr::select(Data_1, Quan_chu) %>%
dplyr::mutate(Data_1 = as.Date(Data_1))
Data_real <- data.frame(Data_1 = seq(as.Date('2011-01-01'),
as.Date('2011-09-14'), by = 1))
Data_real <- Data_real %>%
dplyr::full_join(dataset, by = "Data_1") %>%
dplyr::rename("Dia" = Data_1,
"mm_chuva" = Quan_chu)
Nesta parte do script, caso seja desejado utilizar o BDMEP para realizar um novo web scraping deve-se prestar atenção ao uso suprimido da função bdmep_import
do pacote inmetr
. No chunck
abaixo que está localizado o passo no qual deveria ser criado o objeto met_data
.
Para a modelagem aqui desenvolvida as estações escolinhas para suporte a implementação dos métodos de interpolação espacial, são: Alagoinhas (Alag), Cruz das Almas (Cruz), Feira de Santana (FSA) e Salvador (SSA).
A escolha dessas estações ocorreu em função da boa localização geo-espacial em relação ao município de São Francisco do Conde (SFC).
Para mais detalhes quanto ao emprego do web scraping aqui utilizado, sugere-se o acesso a página do API inmetr.
# Buscando Dados Faltantes no BDMEP/INMET
# Estações de interesse
stations <- c("Alagoinhas", "Cruz das Almas", "Feira de Santana", "Salvador")
stations_rows <- pmatch(stations, inmetr::bdmep_meta$name) # Busca a estação
stns_codes <- inmetr::bdmep_meta[stations_rows, "id"] # Retorna o id
# Datas deinteresse
start_date <- "01/01/2011"
end_date <- "14/09/2011"
# Para o emprego efetivo do script, deve-se remover os comentarios (#) dos codigos a seguir.
# O codigo foi comentado em funcao da opcao de ocutacao de informacoes pessoais referente a login e senha
# de acesso ao BDMEP via pacote inmetr.
# data_inmet <- inmetr::bdmep_import(id = stns_codes,
# sdate = start_date,
# edate = end_date,
# email = "seu email entre aspas", # Preencher
# passwd = "sua senha entre aspas", # Preencher
# verbose = TRUE)
# met_data <- data_inmet %>%
# dplyr::select(date, prec, id) %>%
# dplyr::mutate(id = as.factor(id))
# levels(met_data$id)
# levels(met_data$id) <- c("Feira de Santana",
# "Cruz das Almas",
# "Salvador",
# "Alagoinhas")
#
# met_data <- met_data %>%
# na.omit() %>%
# tidyr::spread(id, prec)
# Filtrando datas para os valores faltantes
filling <- met_data %>%
dplyr::mutate(Dia = as.Date(date)) %>%
dplyr::full_join(Data_real %>%
dplyr::filter(is.na(mm_chuva)) %>%
dplyr::mutate(mm_chuva = "xx"),
by = "Dia") %>%
dplyr::select(-date) %>%
replace(is.na(.), 0) %>%
dplyr::filter(mm_chuva == "xx") %>%
dplyr::select(-mm_chuva)
# Localização - *incompleto!
# By: Tarssio Barreto
# https://github.com/barretokra
shape <- rgdal::readOGR("C:/Users/BRENNER BIASI/Desktop/Mestrado/SER UFF/Estados_do_Brasil/Brasil.shp") %>%
dplyr::filter(ESTADOS == "Bahia") %>%
ggplot2::fortify() %>%
dplyr::mutate(long = round(long, 2)) %>%
dplyr::mutate(lat = round(lat, 2))
## OGR data source with driver: ESRI Shapefile
## Source: "C:\Users\BRENNER BIASI\Desktop\Mestrado\SER UFF\Estados_do_Brasil\Brasil.shp", layer: "Brasil"
## with 27 features
## It has 4 fields
# Localização das estações - Coordenadas em graus
# http://www.inmet.gov.br/portal/index.php?r=estacoes/estacoesConvencionais
df_map <- data.frame(lat = c(-12.196111, -12.666667,
-13.005278, -12.148333, -12.627778),
long = c(-38.967222, -39.083333,
-38.505833, -38.425556, -38.68),
Cidade = c("FSA", "Cruz", "SSA", "ALAG", "SFC")) %>%
dplyr::mutate(group = 1)
p1 <- ggplot2::ggplot(df_map) +
geom_polygon(data = shape, aes(x = long,
y = lat,
group = group),
fill = "white",
col = "black") +
geom_point(aes(x = long,
y = lat),
size = 3,
col = "blue")
# Criando poligono
lat_map <- df_map$lat
long_map <- df_map$long
cidade <- df_map$Cidade
p2 <- ggplot2::ggplot(df_map) +
geom_polygon(data = shape,
aes(x = long,
y = lat,
group = group),
fill = "gray",
col = "black") +
geom_point(aes(x = long,
y = lat),
size = 3,
col = "blue") +
coord_map(xlim = c(max(df_map$long + 0.5), min(df_map$long - 0.5)),
ylim = c(max(df_map$lat + 0.5), min((df_map$lat - 0.5)))) +
annotate("text", x = long_map + 0.05, y = lat_map + 0.05, label = cidade,
fontface = "bold", size = 5)
p1
p2
levels(df_map$Cidade) <- c("Alagoinhas",
"Cruz das Almas",
"Feira de Santana",
"São Francisco do Conde",
"Salvador")
# https://gis.stackexchange.com/questions/225102/calculate-distance-between-points-and-nearest-polygon-in-r
# https://gis.stackexchange.com/questions/212879/what-are-the-coordinates-on-my-rectangle-of-the-closest-point-to-a-polygon-in-r
# https://stackoverflow.com/questions/51837454/r-measuring-distance-from-a-coastline
df_map <- df_map %>%
dplyr::select(long, lat)
df_map <- data.frame(Cidades = c("Alagoinhas x São Francisco do Conde",
"Cruz das Almas x São Francisco do Conde",
"Feira de Santana x São Francisco do Conde",
"Salvador x São Francisco do Conde"),
Distancias_km = c(geosphere::distGeo(df_map[4, ], df_map[5, ]),
geosphere::distGeo(df_map[2, ], df_map[5, ]),
geosphere::distGeo(df_map[1, ], df_map[5, ]),
geosphere::distGeo(df_map[3, ], df_map[5, ]))) %>%
dplyr::mutate(Distancias_km = round((Distancias_km / 1000), 2))
modeling <- met_data %>%
dplyr::mutate(Dia = as.Date(date)) %>%
na.omit() %>%
dplyr::full_join(Data_real %>%
dplyr::filter(is.na(mm_chuva)) %>%
dplyr::mutate(mm_chuva = "xx"),
by = "Dia") %>%
dplyr::select(-date) %>%
dplyr::mutate(mm_chuva = ifelse(mm_chuva == "xx", 1, NA)) %>% #
replace(is.na(.), 0) %>% #
dplyr::filter(mm_chuva == 0) %>%
dplyr::select(-mm_chuva)
modeling_EA <- modeling %>%
dplyr::left_join(Data_real, by = "Dia") %>%
dplyr::rename("SFC" = mm_chuva)
# Tema base para plots
tema <- theme_bw() +
theme(axis.title = element_text(color = "black", size = 11, face = "bold"),
axis.text.x = element_text(color = "black", size = 11, face = "bold"),
axis.text.y = element_text(color = "black", size = 11, face = "bold"),
legend.text = element_text(color = "black", size = 8, face = "bold"),
legend.title = element_text(color = "black", size = 9, face = "bold"),
plot.title = element_text(color = "black", size = 14, face = "bold"))
# modeling_EA %>%
# dplyr::rename("São Francisco do Conde" = SFC) %>%
# reshape2::melt(id.vars = "Dia") %>%
# dplyr::rename("Cidade" = variable,
# "Precipitacao" = value) %>%
# ggplot2::ggplot() +
# geom_line(aes(x = Dia, y = Precipitacao, col = Cidade),
# size = 1.2, alpha = 0.4) +
# ylab("Precipitação") +
# tema
#
# modeling_EA %>%
# reshape2::melt(id.vars = c("Dia", "SFC")) %>%
# dplyr::rename("São Francisco do Conde" = SFC) %>%
# dplyr::rename("Cidade" = variable,
# "Precipitacao" = value) %>%
# ggplot2::ggplot() +
# geom_line(aes(x = Dia, y = `São Francisco do Conde`, col = "S. Franc. Conde"),
# size = 0.8, alpha = 0.6, col = "black") +
# geom_line(aes(x = Dia, y = Precipitacao, col = Cidade),
# size = 1.2, alpha = 0.4) +
# facet_wrap(~Cidade) +
# ylab("Precipitação") +
# tema
#
# modeling_EA %>%
# reshape2::melt(id.vars = c("Dia", "SFC")) %>%
# dplyr::rename("São Francisco do Conde" = SFC) %>%
# dplyr::rename("Cidade" = variable,
# "Precipitacao" = value) %>%
# ggplot2::ggplot() +
# geom_point(aes(x = Precipitacao, y = `São Francisco do Conde`, col = Cidade),
# alpha = 0.6) +
# facet_wrap(~Cidade) +
# ylab("Precipitação") +
# tema
modeling_EA %>%
reshape2::melt(id.vars = c("Dia", "SFC")) %>%
dplyr::rename("São Francisco do Conde" = SFC) %>%
dplyr::rename("Cidade" = variable,
"Precipitacao" = value) %>%
ggplot2::ggplot() +
geom_boxplot(aes(x = Cidade, y = Precipitacao, col = Cidade)) +
ylab("Precipitação") +
tema + theme(axis.text.x = element_text(angle = 90))
K <- round(1 + 3.322 * log10(nrow(modeling)))
modeling_EA %>%
reshape2::melt(id.vars = c("Dia", "SFC")) %>%
dplyr::rename("São Francisco do Conde" = SFC) %>%
dplyr::rename("Cidade" = variable,
"Precipitacao" = value) %>%
ggplot2::ggplot() +
geom_histogram(aes(x = Precipitacao,
y = ..density..),
binwidth = .5,
bins = 9,
colour = "black",
fill = "white") +
geom_density(aes(x = Precipitacao),
alpha = .2,
fill = "#FF6666") +
facet_wrap(~Cidade) +
ylab("Precipitação") +
tema
### IDW
Para verificação do idw, seis combinações de estações metereológicas serão testadas, sendo:
* Todas as estações; * Apenas as estações de Salvador e Cruz das Almas (mesamas classificações climatológicas);
# stations
# 1 = Alagoinhas
# 2 = Cruz
# 3 = FSA
# 4 = SSA
combinat <- base::expand.grid(1:4, 1:4, 1:4) %>%
dplyr::mutate(check = ifelse(Var1 == Var2 | Var1 == Var3 |
Var2 == Var3 | Var3 == Var2 |
Var2 == Var1 | Var3 == Var1,
NA, "--")) %>% na.omit() %>%
dplyr::mutate(check = rowSums(.[1:3])) %>%
dplyr::distinct(check, .keep_all = T) %>%
dplyr::select(-check)
for (i in 1:length(stations)) {
combinat[combinat == i] <- stations[i]
}
# Preparando dataset para validacao do modelo idw
# Data para validacção do método idw
modeling <- modeling
Para facilitar a manipulação de dados, e como a cross-validation será com \(k = 5\) (número de folds), o dataset de modelagem será particionado em cinco partes aleatórias.
modeling <- modeling %>%
dplyr::mutate(id = 1:nrow(modeling)) # atribuir coluna de identificacao
id <- modeling$id
{
nfold <- 5
set.seed(1)
aleat_cv <- caret::createFolds(id, k = nfold, # criar folds para cv
list = TRUE, returnTrain = FALSE)
}
#
aleat_cv_ <- list()
for(i in 1:nfold){
df_x <- data.frame(id = aleat_cv[[i]]) # extrair folds
df_x <- df_x %>%
dplyr::mutate(cod = rep("cv", nrow(df_x))) # Validação do id no proximo 'for'
aleat_cv_[[paste0("aleat_cv_", i)]] <- df_x
}
#
modeling_cv <- list()
for(i in 1:nfold){
modeling_cv_ <- modeling %>%
dplyr::full_join(aleat_cv_[[i]]) %>% # validação do id
na.omit() %>%
dplyr::select(-id, -cod) %>%
reshape2::melt(id.vars = "Dia") %>%
dplyr::mutate(Latitude = ifelse(variable == "Feira de Santana",
-12.196111,
ifelse(variable == "Cruz das Almas",
-12.666667,
ifelse(variable == "Salvador",
-13.005278,
ifelse(variable == "Alagoinhas",
-12.148333, "Erro")))),
Longitude = ifelse(variable == "Feira de Santana",
-38.967222,
ifelse(variable == "Cruz das Almas",
-39.083333,
ifelse(variable == "Salvador",
-38.505833,
ifelse(variable == "Alagoinhas",
-38.425556, "Erro"))))) %>%
dplyr::select(-variable) %>%
dplyr::rename("Prec" = value) %>%
dplyr::mutate(Latitude = as.numeric(Latitude),
Longitude = as.numeric(Longitude))
#
modeling_cv[[paste0("cv", i)]] <- modeling_cv_
}
# head(modeling_cv$cv1, 3)
#### Cross-Validation do IDW e Estimativas para Quatro Estações
modeling_cv1_ <- modeling_cv1 <- data.frame()
modeling_cv2_ <- modeling_cv2 <- data.frame()
modeling_cv3_ <- modeling_cv3 <- data.frame()
modeling_cv4_ <- modeling_cv4 <- data.frame()
modeling_cv5_ <- modeling_cv5 <- data.frame()
for (i in 1:length(modeling_cv)) {
df <- modeling_cv[[i]]
assign(paste0("modeling_cv",i), df)
assign(paste0("modeling_cv",i,"_"), df)
}
# Ajustando a localização de acordo com a projetação Darum WGS - 84
for(i in 1:nfold){
sp::coordinates(modeling_cv[[i]]) <- ~Latitude+Longitude
sp::proj4string(modeling_cv[[i]]) <- sp::CRS("+proj=longlat +datum=WGS84")
}
dplyr::glimpse(modeling_cv[[1]])
## Formal class 'SpatialPointsDataFrame' [package "sp"] with 5 slots
## ..@ data :'data.frame': 200 obs. of 2 variables:
## .. ..$ Dia : Date[1:200], format: "2011-01-05" ...
## .. ..$ Prec: num [1:200] 34.3 0 0 0 0 0 0 0 0 0 ...
## ..@ coords.nrs : int [1:2] 3 4
## ..@ coords : num [1:200, 1:2] -12.2 -12.2 -12.2 -12.2 -12.2 ...
## .. ..- attr(*, "dimnames")=List of 2
## ..@ bbox : num [1:2, 1:2] -13 -39.1 -12.1 -38.4
## .. ..- attr(*, "dimnames")=List of 2
## ..@ proj4string:Formal class 'CRS' [package "sp"] with 1 slot
# São Francisco do Conde
# Lozalização
SFC_lati <- (-12.627778)
SFC_longe <- (-38.68)
df_SFC <- data.frame(SFC_lati, SFC_longe)
sp::coordinates(df_SFC) <- ~SFC_lati+SFC_longe
sp::proj4string(df_SFC) <- sp::CRS("+proj=longlat +datum=WGS84")
#
idws <- list()
passo <- 0.5
idp_b <- seq(0.5, 4, passo) # potência do idw variando de 0.5 à 4 com passo de 0.5
# Variação de informação entre os folds!
Dia_1 <- modeling_cv1_$Dia
Dia_2 <- modeling_cv2_$Dia
Dia_3 <- modeling_cv3_$Dia
Dia_4 <- modeling_cv4_$Dia
Dia_5 <- modeling_cv5_$Dia
df <- list(Dia_1, Dia_2, Dia_3, Dia_4, Dia_5)
lvl <- list()
for (i in 1:length(df)) {
dfx <- as.data.frame(table(df[[i]]))
dfx <- unique(dfx$Freq)
lvl[[paste0("fold",i)]] <- dfx
}
#
dfx <- c()
for (i in 1:nfold) {
dfx[i] <- length(df[[i]]) / (lvl[[i]]) # lvl => level de datas
}
#
for(i in 1:length(idp_b)) { # Variacao da potencia
for(x in 1:nfold){ # Variacao dos folds
for(j in 1:(dfx[[x]])) { # Regularizacao do tamanho dos folds
modeling_cv_r <- modeling_cv[[x]] %>% # x # list => Objeto class
# SpatialPointsDataFrame
dplyr::filter(Dia == Dia[j]) # j # Filter proporcionado pelo pacote spdplyr
idw_ = gstat::idw(formula = modeling_cv_r$Prec ~ 1,
locations = modeling_cv_r,
newdata = df_SFC, idp = idp_b[i]) # i
idws[[paste0("cv",x,"_pot",i, "_Dia", j)]] <- idw_$var1.pred
}
}
}
# Transformando uma lista em um tibble "pré-data frame"
idws <- tibble::enframe(idws)
# Ajustando data frame
idws <- idws %>%
tidyr::separate(name, into = c("x1", "x"), sep = 2) %>%
tidyr::separate(x, into = c("Fold", "x"), sep = 1) %>%
tidyr::separate(x, into = c("x", "x2"), sep = 4) %>%
tidyr::separate(x2, into = c("Potencia", "x2"), sep = 2) %>%
tidyr::separate(x2, into = c("x2", "Dia"), sep = 3) %>%
dplyr::rename("Pred_Precipitacao" = value) %>%
dplyr::select(-x, -x1, -x2) %>%
as.data.frame() %>%
dplyr::mutate(Pred_Precipitacao = as.numeric(Pred_Precipitacao),
Pred_Precipitacao = round(Pred_Precipitacao, 2))
idws$Potencia <- as.numeric(base::gsub("([0-9]+).*$", "\\1", idws$Potencia))
idws$Dia <- as.numeric(gsub("a", "", idws$Dia))
# View(idws)
#
Fold <- list()
for(i in 1:nfold){
Foldz <- idws %>%
dplyr::filter(Fold == i) # Filtrando valores preditos por fold
Fold[[paste0("cv", i)]] <- Foldz
}
#
idp_bx <- base::gsub(".", "", idp_b, fixed = TRUE)
#
{
{dfx1_05 <- dfx1_1 <- dfx1_15 <- dfx1_2 <- dfx1_25 <- dfx1_3 <- dfx1_35 <- dfx1_4 <-data.frame()}
{dfx2_05 <- dfx2_1 <- dfx2_15 <- dfx2_2 <- dfx2_25 <- dfx2_3 <- dfx2_35 <- dfx2_4 <-data.frame()}
{dfx3_05 <- dfx3_1 <- dfx3_15 <- dfx3_2 <- dfx3_25 <- dfx3_3 <- dfx3_35 <- dfx3_4 <-data.frame()}
{dfx4_05 <- dfx4_1 <- dfx4_15 <- dfx4_2 <- dfx4_25 <- dfx4_3 <- dfx4_35 <- dfx4_4 <-data.frame()}
{dfx5_05 <- dfx5_1 <- dfx5_15 <- dfx5_2 <- dfx5_25 <- dfx5_3 <- dfx5_35 <- dfx5_4 <-data.frame()}
}
Foldx <- list()
for(j in 1:nfold){
for(i in 1:length(idp_b)){
Foldz <- Fold[[j]]
pot <- Foldz %>%
dplyr::filter(Potencia == i) # Filtrando valores preditos por potência em
# em cada fold.
Foldx[[paste0("cvF",j,"_pot", i * passo)]] <- pot
}
}
dfx1_050 <- Foldx[["cvF1_pot0.5"]]
# head(dfx1_050, 7)
# View(dfx1_050)
#
# Logo:
for(j in 1:nfold){
for(i in 1:length(idp_b)){
Foldz <- Fold[[j]]
pot <- Foldz %>%
dplyr::filter(Potencia == i) # Filtrando valores preditos por potência em
# em cada fold.
assign(paste0("dfx",j,"_",idp_bx[i]), pot)
}
}
# table(modeling_cv1_$Dia)
dfx1 <- modeling_cv1_ %>%
dplyr::slice(1:(nrow(modeling_cv1_) / lvl[[1]] )) %>%
dplyr::select(Dia) %>%
dplyr::left_join(Data_real, by = "Dia") %>% # Dados observados
dplyr::rename("Chuva_real" = mm_chuva) %>% # Chuva_real => Precipitação em SFC
dplyr::mutate(dfx1_050 = dfx1_05$Pred_Precipitacao,
dfx1_100 = dfx1_1$Pred_Precipitacao,
dfx1_150 = dfx1_15$Pred_Precipitacao,
dfx1_200 = dfx1_2$Pred_Precipitacao,
dfx1_250 = dfx1_25$Pred_Precipitacao,
dfx1_300 = dfx1_3$Pred_Precipitacao,
dfx1_350 = dfx1_35$Pred_Precipitacao,
dfx1_400 = dfx1_4$Pred_Precipitacao)
dfx2 <- modeling_cv2_ %>%
dplyr::slice(1:(nrow(modeling_cv2_) / lvl[[2]] )) %>%
dplyr::select(Dia) %>%
dplyr::left_join(Data_real, by = "Dia") %>% # Dados observados
dplyr::rename("Chuva_real" = mm_chuva) %>% # Chuva_real => Precipitação em SFC
dplyr::mutate(dfx2_050 = dfx2_05$Pred_Precipitacao,
dfx2_100 = dfx2_1$Pred_Precipitacao,
dfx2_150 = dfx2_15$Pred_Precipitacao,
dfx2_200 = dfx2_2$Pred_Precipitacao,
dfx2_250 = dfx2_25$Pred_Precipitacao,
dfx2_300 = dfx2_3$Pred_Precipitacao,
dfx2_350 = dfx2_35$Pred_Precipitacao,
dfx2_400 = dfx2_4$Pred_Precipitacao)
dfx3 <- modeling_cv3_ %>%
dplyr::slice(1:(nrow(modeling_cv3_) / lvl[[3]] )) %>%
dplyr::select(Dia) %>%
dplyr::left_join(Data_real, by = "Dia") %>% # Dados observados
dplyr::rename("Chuva_real" = mm_chuva) %>% # Chuva_real => Precipitação em SFC
dplyr::mutate(dfx3_050 = dfx3_05$Pred_Precipitacao,
dfx3_100 = dfx3_1$Pred_Precipitacao,
dfx3_150 = dfx3_15$Pred_Precipitacao,
dfx3_200 = dfx3_2$Pred_Precipitacao,
dfx3_250 = dfx3_25$Pred_Precipitacao,
dfx3_300 = dfx3_3$Pred_Precipitacao,
dfx3_350 = dfx3_35$Pred_Precipitacao,
dfx3_400 = dfx3_4$Pred_Precipitacao)
dfx4 <- modeling_cv4_ %>%
dplyr::slice(1:(nrow(modeling_cv4_) / lvl[[4]] )) %>%
dplyr::select(Dia) %>%
dplyr::left_join(Data_real, by = "Dia") %>% # Dados observados
dplyr::rename("Chuva_real" = mm_chuva) %>% # Chuva_real => Precipitação em SFC
dplyr::mutate(dfx4_050 = dfx4_05$Pred_Precipitacao,
dfx4_100 = dfx4_1$Pred_Precipitacao,
dfx4_150 = dfx4_15$Pred_Precipitacao,
dfx4_200 = dfx4_2$Pred_Precipitacao,
dfx4_250 = dfx4_25$Pred_Precipitacao,
dfx4_300 = dfx4_3$Pred_Precipitacao,
dfx4_350 = dfx4_35$Pred_Precipitacao,
dfx4_400 = dfx4_4$Pred_Precipitacao)
dfx5 <- modeling_cv5_ %>%
dplyr::slice(1:(nrow(modeling_cv5_) / lvl[[5]] )) %>%
dplyr::select(Dia) %>%
dplyr::left_join(Data_real, by = "Dia") %>% # Dados observados
dplyr::rename("Chuva_real" = mm_chuva) %>% # Chuva_real => Precipitação em SFC
dplyr::mutate(dfx5_050 = dfx5_05$Pred_Precipitacao,
dfx5_100 = dfx5_1$Pred_Precipitacao,
dfx5_150 = dfx5_15$Pred_Precipitacao,
dfx5_200 = dfx5_2$Pred_Precipitacao,
dfx5_250 = dfx5_25$Pred_Precipitacao,
dfx5_300 = dfx5_3$Pred_Precipitacao,
dfx5_350 = dfx5_35$Pred_Precipitacao,
dfx5_400 = dfx5_4$Pred_Precipitacao)
Realizado a manipulação de dados para o fold, serão calculados as métricas \(RMSE\) e \(MAE\) para posterior tomada de decisão. Para tal, o pacote Metrics
será utilizado a fim de facilitar a programação.
erro_fold1 <- dfx1 %>%
dplyr::mutate(b050_rmse = Metrics::rmse(dfx1$Chuva_real, dfx1$dfx1_050),
b100_rmse = Metrics::rmse(dfx1$Chuva_real, dfx1$dfx1_100),
b150_rmse = Metrics::rmse(dfx1$Chuva_real, dfx1$dfx1_150),
b200_rmse = Metrics::rmse(dfx1$Chuva_real, dfx1$dfx1_200),
b250_rmse = Metrics::rmse(dfx1$Chuva_real, dfx1$dfx1_250),
b300_rmse = Metrics::rmse(dfx1$Chuva_real, dfx1$dfx1_300),
b350_rmse = Metrics::rmse(dfx1$Chuva_real, dfx1$dfx1_350),
b400_rmse = Metrics::rmse(dfx1$Chuva_real, dfx1$dfx1_400),
b050_mae = Metrics::mae(dfx1$Chuva_real, dfx1$dfx1_050),
b100_mae = Metrics::mae(dfx1$Chuva_real, dfx1$dfx1_100),
b150_mae = Metrics::mae(dfx1$Chuva_real, dfx1$dfx1_150),
b200_mae = Metrics::mae(dfx1$Chuva_real, dfx1$dfx1_200),
b250_mae = Metrics::mae(dfx1$Chuva_real, dfx1$dfx1_250),
b300_mae = Metrics::mae(dfx1$Chuva_real, dfx1$dfx1_300),
b350_mae = Metrics::mae(dfx1$Chuva_real, dfx1$dfx1_350),
b400_mae = Metrics::mae(dfx1$Chuva_real, dfx1$dfx1_400)) %>%
dplyr::select(b050_rmse, b100_rmse, b150_rmse, b200_rmse,
b250_rmse, b300_rmse, b350_rmse, b400_rmse,
b050_mae, b100_mae, b150_mae, b200_mae,
b250_mae, b300_mae, b350_mae, b400_mae) %>%
dplyr::slice(1) %>%
reshape2::melt() %>%
dplyr::rename("IDW" = variable,
"Valor" = value) %>%
tidyr::separate(IDW, c("IDW", "Erro"))
# levels(erro_fold1$IDW)
erro_fold1$IDW <- as.factor(erro_fold1$IDW)
levels(erro_fold1$IDW) <- c("b = 0.50", "b = 1.00", "b = 1.50", "b = 2.00",
"b = 2.50", "b = 3.00", "b = 3.50", "b = 4.00")
# levels(erro_fold1$Erro)
erro_fold1$Erro <- as.factor(erro_fold1$Erro)
levels(erro_fold1$Erro) <- c("MAE", "RMSE")
erro_fold1 %>%
ggplot2::ggplot() +
geom_bar(aes(x = IDW,
y = Valor,
fill = IDW),
col = "black",
stat = "identity",
show.legend = F) +
facet_wrap(~ Erro, scales = "free") +
scale_fill_viridis_d() +
guides(fill = guide_legend(title = "IDW")) +
ggtitle("Fold 01") +
xlab("Potência `b`") + ylab("RMSE") +
tema + theme(axis.text.x = element_text(angle = 90))
erro_fold1x <- erro_fold1 %>%
tidyr::spread(Erro, Valor)
p1 <- erro_fold1x %>%
ggplot2::ggplot() +
geom_point(aes(x = RMSE,
y = MAE,
fill = IDW),
col = "black",
size = 3,
stroke = 1.2,
shape = 22) +
scale_fill_viridis_d() +
guides(fill = guide_legend(title = "IDW")) +
ggtitle("Fold 01") +
xlab("RMSE") + ylab("MAE") +
tema
erro_fold2 <- dfx2 %>%
dplyr::mutate(b050_rmse = Metrics::rmse(dfx2$Chuva_real, dfx2$dfx2_050),
b100_rmse = Metrics::rmse(dfx2$Chuva_real, dfx2$dfx2_100),
b150_rmse = Metrics::rmse(dfx2$Chuva_real, dfx2$dfx2_150),
b200_rmse = Metrics::rmse(dfx2$Chuva_real, dfx2$dfx2_200),
b250_rmse = Metrics::rmse(dfx2$Chuva_real, dfx2$dfx2_250),
b300_rmse = Metrics::rmse(dfx2$Chuva_real, dfx2$dfx2_300),
b350_rmse = Metrics::rmse(dfx2$Chuva_real, dfx2$dfx2_350),
b400_rmse = Metrics::rmse(dfx2$Chuva_real, dfx2$dfx2_400),
b050_mae = Metrics::mae(dfx2$Chuva_real, dfx2$dfx2_050),
b100_mae = Metrics::mae(dfx2$Chuva_real, dfx2$dfx2_100),
b150_mae = Metrics::mae(dfx2$Chuva_real, dfx2$dfx2_150),
b200_mae = Metrics::mae(dfx2$Chuva_real, dfx2$dfx2_200),
b250_mae = Metrics::mae(dfx2$Chuva_real, dfx2$dfx2_250),
b300_mae = Metrics::mae(dfx2$Chuva_real, dfx2$dfx2_300),
b350_mae = Metrics::mae(dfx2$Chuva_real, dfx2$dfx2_350),
b400_mae = Metrics::mae(dfx2$Chuva_real, dfx2$dfx2_400)) %>%
dplyr::select(b050_rmse, b100_rmse, b150_rmse, b200_rmse,
b250_rmse, b300_rmse, b350_rmse, b400_rmse,
b050_mae, b100_mae, b150_mae, b200_mae,
b250_mae, b300_mae, b350_mae, b400_mae) %>%
dplyr::slice(1) %>%
reshape2::melt() %>%
dplyr::rename("IDW" = variable,
"Valor" = value) %>%
tidyr::separate(IDW, c("IDW", "Erro"))
erro_fold2$IDW <- as.factor(erro_fold2$IDW)
levels(erro_fold2$IDW) <- c("b = 0.50", "b = 1.00", "b = 1.50", "b = 2.00",
"b = 2.50", "b = 3.00", "b = 3.50", "b = 4.00")
erro_fold2$Erro <- as.factor(erro_fold2$Erro)
levels(erro_fold2$Erro) <- c("MAE", "RMSE")
erro_fold2x <- erro_fold2 %>%
tidyr::spread(Erro, Valor)
p2 <- erro_fold2x %>%
ggplot2::ggplot() +
geom_point(aes(x = RMSE,
y = MAE,
fill = IDW),
col = "black",
size = 3,
stroke = 1.2,
shape = 22) +
scale_fill_viridis_d() +
guides(fill = guide_legend(title = "IDW")) +
ggtitle("Fold 02") +
xlab("RMSE") + ylab("MAE") +
tema + theme(legend.position = "bottom")
#
erro_fold3 <- dfx3 %>%
dplyr::mutate(b050_rmse = Metrics::rmse(dfx3$Chuva_real, dfx3$dfx3_050),
b100_rmse = Metrics::rmse(dfx3$Chuva_real, dfx3$dfx3_100),
b150_rmse = Metrics::rmse(dfx3$Chuva_real, dfx3$dfx3_150),
b200_rmse = Metrics::rmse(dfx3$Chuva_real, dfx3$dfx3_200),
b250_rmse = Metrics::rmse(dfx3$Chuva_real, dfx3$dfx3_250),
b300_rmse = Metrics::rmse(dfx3$Chuva_real, dfx3$dfx3_300),
b350_rmse = Metrics::rmse(dfx3$Chuva_real, dfx3$dfx3_350),
b400_rmse = Metrics::rmse(dfx3$Chuva_real, dfx3$dfx3_400),
b050_mae = Metrics::mae(dfx3$Chuva_real, dfx3$dfx3_050),
b100_mae = Metrics::mae(dfx3$Chuva_real, dfx3$dfx3_100),
b150_mae = Metrics::mae(dfx3$Chuva_real, dfx3$dfx3_150),
b200_mae = Metrics::mae(dfx3$Chuva_real, dfx3$dfx3_200),
b250_mae = Metrics::mae(dfx3$Chuva_real, dfx3$dfx3_250),
b300_mae = Metrics::mae(dfx3$Chuva_real, dfx3$dfx3_300),
b350_mae = Metrics::mae(dfx3$Chuva_real, dfx3$dfx3_350),
b400_mae = Metrics::mae(dfx3$Chuva_real, dfx3$dfx3_400)) %>%
dplyr::select(b050_rmse, b100_rmse, b150_rmse, b200_rmse,
b250_rmse, b300_rmse, b350_rmse, b400_rmse,
b050_mae, b100_mae, b150_mae, b200_mae,
b250_mae, b300_mae, b350_mae, b400_mae) %>%
dplyr::slice(1) %>%
reshape2::melt() %>%
dplyr::rename("IDW" = variable,
"Valor" = value) %>%
tidyr::separate(IDW, c("IDW", "Erro"))
erro_fold3$IDW <- as.factor(erro_fold3$IDW)
levels(erro_fold3$IDW) <- c("b = 0.50", "b = 1.00", "b = 1.50", "b = 2.00",
"b = 2.50", "b = 3.00", "b = 3.50", "b = 4.00")
erro_fold3$Erro <- as.factor(erro_fold3$Erro)
levels(erro_fold3$Erro) <- c("MAE", "RMSE")
erro_fold3x <- erro_fold3 %>%
tidyr::spread(Erro, Valor)
p3 <- erro_fold3x %>%
ggplot2::ggplot() +
geom_point(aes(x = RMSE,
y = MAE,
fill = IDW),
col = "black",
size = 3,
stroke = 1.2,
shape = 22) +
scale_fill_viridis_d() +
guides(fill = guide_legend(title = "IDW")) +
ggtitle("Fold 03") +
xlab("RMSE") + ylab("MAE") +
tema + theme(legend.position = "bottom")
#
erro_fold4 <- dfx4 %>%
dplyr::mutate(b050_rmse = Metrics::rmse(dfx4$Chuva_real, dfx4$dfx4_050),
b100_rmse = Metrics::rmse(dfx4$Chuva_real, dfx4$dfx4_100),
b150_rmse = Metrics::rmse(dfx4$Chuva_real, dfx4$dfx4_150),
b200_rmse = Metrics::rmse(dfx4$Chuva_real, dfx4$dfx4_200),
b250_rmse = Metrics::rmse(dfx4$Chuva_real, dfx4$dfx4_250),
b300_rmse = Metrics::rmse(dfx4$Chuva_real, dfx4$dfx4_300),
b350_rmse = Metrics::rmse(dfx4$Chuva_real, dfx4$dfx4_350),
b400_rmse = Metrics::rmse(dfx4$Chuva_real, dfx4$dfx4_400),
b050_mae = Metrics::mae(dfx4$Chuva_real, dfx4$dfx4_050),
b100_mae = Metrics::mae(dfx4$Chuva_real, dfx4$dfx4_100),
b150_mae = Metrics::mae(dfx4$Chuva_real, dfx4$dfx4_150),
b200_mae = Metrics::mae(dfx4$Chuva_real, dfx4$dfx4_200),
b250_mae = Metrics::mae(dfx4$Chuva_real, dfx4$dfx4_250),
b300_mae = Metrics::mae(dfx4$Chuva_real, dfx4$dfx4_300),
b350_mae = Metrics::mae(dfx4$Chuva_real, dfx4$dfx4_350),
b400_mae = Metrics::mae(dfx4$Chuva_real, dfx4$dfx4_400)) %>%
dplyr::select(b050_rmse, b100_rmse, b150_rmse, b200_rmse,
b250_rmse, b300_rmse, b350_rmse, b400_rmse,
b050_mae, b100_mae, b150_mae, b200_mae,
b250_mae, b300_mae, b350_mae, b400_mae) %>%
dplyr::slice(1) %>%
reshape2::melt() %>%
dplyr::rename("IDW" = variable,
"Valor" = value) %>%
tidyr::separate(IDW, c("IDW", "Erro"))
erro_fold4$IDW <- as.factor(erro_fold4$IDW)
levels(erro_fold4$IDW) <- c("b = 0.50", "b = 1.00", "b = 1.50", "b = 2.00",
"b = 2.50", "b = 3.00", "b = 3.50", "b = 4.00")
erro_fold4$Erro <- as.factor(erro_fold4$Erro)
levels(erro_fold4$Erro) <- c("MAE", "RMSE")
erro_fold4x <- erro_fold4 %>%
tidyr::spread(Erro, Valor)
p4 <- erro_fold4x %>%
ggplot2::ggplot() +
geom_point(aes(x = RMSE,
y = MAE,
fill = IDW),
col = "black",
size = 3,
stroke = 1.2,
shape = 22) +
scale_fill_viridis_d() +
guides(fill = guide_legend(title = "IDW")) +
ggtitle("Fold 04") +
xlab("RMSE") + ylab("MAE") +
tema + theme(legend.position = "bottom")
#
erro_fold5 <- dfx5 %>%
dplyr::mutate(b050_rmse = Metrics::rmse(dfx5$Chuva_real, dfx5$dfx5_050),
b100_rmse = Metrics::rmse(dfx5$Chuva_real, dfx5$dfx5_100),
b150_rmse = Metrics::rmse(dfx5$Chuva_real, dfx5$dfx5_150),
b200_rmse = Metrics::rmse(dfx5$Chuva_real, dfx5$dfx5_200),
b250_rmse = Metrics::rmse(dfx5$Chuva_real, dfx5$dfx5_250),
b300_rmse = Metrics::rmse(dfx5$Chuva_real, dfx5$dfx5_300),
b350_rmse = Metrics::rmse(dfx5$Chuva_real, dfx5$dfx5_350),
b400_rmse = Metrics::rmse(dfx5$Chuva_real, dfx5$dfx5_400),
b050_mae = Metrics::mae(dfx5$Chuva_real, dfx5$dfx5_050),
b100_mae = Metrics::mae(dfx5$Chuva_real, dfx5$dfx5_100),
b150_mae = Metrics::mae(dfx5$Chuva_real, dfx5$dfx5_150),
b200_mae = Metrics::mae(dfx5$Chuva_real, dfx5$dfx5_200),
b250_mae = Metrics::mae(dfx5$Chuva_real, dfx5$dfx5_250),
b300_mae = Metrics::mae(dfx5$Chuva_real, dfx5$dfx5_300),
b350_mae = Metrics::mae(dfx5$Chuva_real, dfx5$dfx5_350),
b400_mae = Metrics::mae(dfx5$Chuva_real, dfx5$dfx5_400)) %>%
dplyr::select(b050_rmse, b100_rmse, b150_rmse, b200_rmse,
b250_rmse, b300_rmse, b350_rmse, b400_rmse,
b050_mae, b100_mae, b150_mae, b200_mae,
b250_mae, b300_mae, b350_mae, b400_mae) %>%
dplyr::slice(1) %>%
reshape2::melt() %>%
dplyr::rename("IDW" = variable,
"Valor" = value) %>%
tidyr::separate(IDW, c("IDW", "Erro"))
erro_fold5$IDW <- as.factor(erro_fold5$IDW)
levels(erro_fold5$IDW) <- c("b = 0.50", "b = 1.00", "b = 1.50", "b = 2.00",
"b = 2.50", "b = 3.00", "b = 3.50", "b = 4.00")
erro_fold5$Erro <- as.factor(erro_fold5$Erro)
levels(erro_fold5$Erro) <- c("MAE", "RMSE")
erro_fold5x <- erro_fold5 %>%
tidyr::spread(Erro, Valor)
p5 <- erro_fold5x %>%
ggplot2::ggplot() +
geom_point(aes(x = RMSE,
y = MAE,
fill = IDW),
col = "black",
size = 3,
stroke = 1.2,
shape = 22) +
scale_fill_viridis_d() +
guides(fill = guide_legend(title = "IDW")) +
ggtitle("Fold 05") +
xlab("RMSE") + ylab("MAE") +
tema + theme(legend.position = "bottom")
xplot <- cowplot::plot_grid(p1 + theme(legend.position = "none"),
p2 + theme(legend.position = "none"),
p3 + theme(legend.position = "none"),
p4 + theme(legend.position = "none"),
p5 + theme(legend.position = "none"),
align = 'vh',
hjust = -1, nrow = 1)
legend <- cowplot::get_legend(p1)
xplot <- cowplot::plot_grid(xplot, legend, rel_widths = c(5, .3), nrow = 1)
shiny::div(plotly::ggplotly(xplot), align = "center")
erro_fold <- erro_fold1 %>%
dplyr::rename("CV1" = Valor) %>%
dplyr::mutate(CV2 = erro_fold2$Valor,
CV3 = erro_fold3$Valor,
CV4 = erro_fold4$Valor,
CV5 = erro_fold5$Valor) %>%
reshape2::melt(id.vars = c("IDW", "Erro")) %>%
dplyr::rename("Fold" = variable,
"Valor" = value)
plotx <- erro_fold %>%
ggplot2::ggplot() +
geom_line(aes(x = Fold,
y = Valor,
col = IDW,
group = IDW)) +
facet_wrap(~Erro, scales = "free") +
ggtitle("Erros na Validação Cruzada") +
xlab("Folds") + ylab("Valor dos Erros") +
tema
shiny::div(plotly::ggplotly(plotx), align = "center")
Autor:
Brenner Silva;
****