library(readr)
dacc_daily_tmin <- read_csv("data/dacc-daily-tmin.csv",
col_types = cols(X1 = col_skip(), date = col_date(format = "%Y-%m-%d")))
#View(dacc_daily_tmin)
estaciones <- c("junin","tunuyan","agua_amarga","las_paredes","la_llave")
get_dataset_for_regression <- function(station_name)
{
if(is.null(station_name) | !(station_name %in% estaciones) ) stop("station_name must be a valid name from estaciones")
junin <- dacc_daily_tmin %>%
select(starts_with(station_name)) %>%
rename_with( ~ tolower(gsub(paste0(station_name,"."), "", .x, fixed = TRUE)))
# quita el junin. de los nombres de las columnas
# Quiero predecir la temperatura mÃnima, si es o no es helada.
tmin <- junin %>%
select(temp_min)
colnames(tmin) <- "tmin"
# para regresion
data_tmin <- cbind(junin[1:(nrow(junin)-1),],tmin[2:nrow(tmin),])
return(data_tmin)
}
get_dataset_for_regression("tunuyan") %>% head()
## temp_max humedad_max temp_med radiacion_med humedad_med temp_min humedad_min
## 1 33.41 87 23.20833 249.0000 57.66667 13.65 25
## 2 35.16 83 23.85833 344.9583 54.54167 13.65 27
## 3 29.28 83 23.25417 310.2083 59.20833 17.48 41
## 4 34.57 90 22.92917 347.2917 61.29167 11.47 25
## 5 34.10 85 24.42083 332.4167 61.00000 15.83 4
## 6 32.40 87 22.95000 323.8750 60.50000 15.55 28
## tmin
## 1 13.65
## 2 17.48
## 3 11.47
## 4 15.83
## 5 15.55
## 6 13.21
estacion <- "tunuyan"
dataset <- get_dataset_for_regression(estacion)
Plot de tmin y temp_min
n <- seq(1:nrow(dataset))
plot(n, dataset$tmin, col="blue", pch=20, cex=.9)
points(n, dataset$temp_min, col="red", pch=5, cex=.9)
## split en training y testing
data_split <- initial_split(dataset, prop = 4/5, strata = NULL)
training_data <- training(data_split)
testing_data <- testing(data_split)
rec = recipe(tmin ~ ., data = training_data) %>%
step_center(all_predictors()) %>%
step_scale(all_predictors()) %>%
prep()
library(keras)
##
## Attaching package: 'keras'
## The following object is masked from 'package:yardstick':
##
## get_weights
build_model = function(){
model = keras_model_sequential() %>%
layer_dense(units = 64, activation = "relu", input_shape = ncol(training_data)-1) %>%
layer_dense(units = 64, activation = "relu") %>%
layer_dense(units = 1)
model %>%
compile(loss = "mse",
optimizer = "rmsprop",
metrics = list("mae","mse"))
return(model)
}
model1 = build_model()
summary(model1)
## Model: "sequential"
## ________________________________________________________________________________
## Layer (type) Output Shape Param #
## ================================================================================
## dense_2 (Dense) (None, 64) 512
## ________________________________________________________________________________
## dense_1 (Dense) (None, 64) 4160
## ________________________________________________________________________________
## dense (Dense) (None, 1) 65
## ================================================================================
## Total params: 4,737
## Trainable params: 4,737
## Non-trainable params: 0
## ________________________________________________________________________________
training_baked = bake(rec, training_data, composition = "matrix")
x = subset(training_baked, select = -tmin)
y = training_baked[,"tmin"]
history1 = model1 %>% fit(
x = x,
y = y,
batch_size = 50,
epoch = 100
)
plot(history1)+
labs(title = "Neural Net Metrics")+
theme(strip.placement = "outside",
strip.text.y = element_text(size = 9),
plot.title = element_text(hjust = 0.5))
## `geom_smooth()` using formula 'y ~ x'
testing_baked = bake(rec,testing_data, composition = "matrix")
xx = subset(testing_baked, select = -tmin)
y_pred = predict(model1, xx)[,1]
add predictions to dataset
predicted = testing_data %>%
mutate(tmin_pred = y_pred,
resid = tmin-tmin_pred)
calcular algunas métricas del ajuste (fit metrics)
mae(predicted, truth = tmin, estimate = tmin_pred)
## # A tibble: 1 x 3
## .metric .estimator .estimate
## <chr> <chr> <dbl>
## 1 mae standard 1.88
rmse(predicted, truth = tmin, estimate = tmin_pred)
## # A tibble: 1 x 3
## .metric .estimator .estimate
## <chr> <chr> <dbl>
## 1 rmse standard 2.44
rsq(predicted, truth = tmin, estimate = tmin_pred)
## # A tibble: 1 x 3
## .metric .estimator .estimate
## <chr> <chr> <dbl>
## 1 rsq standard 0.876
plot the predictions
ggplot(data = predicted, aes(x = tmin))+
geom_abline(intercept = 0, slope = 1, color = "navy")+
geom_point(aes(y = tmin_pred))+
coord_fixed()
ggplot() +
geom_line(data = predicted, aes(x = 1:(nrow(predicted)), y = tmin), color = "blue") +
geom_line(data = predicted, aes(x = 1:(nrow(predicted)), y = tmin_pred), color = "red") +
xlab('Dates') +
ylab('Tmin')
heladas <- predicted %>% select(tmin,tmin_pred) %>% filter(tmin <=0 )
mae(heladas, truth = tmin, estimate = tmin_pred)
## # A tibble: 1 x 3
## .metric .estimator .estimate
## <chr> <chr> <dbl>
## 1 mae standard 1.72
rmse(heladas, truth = tmin, estimate = tmin_pred)
## # A tibble: 1 x 3
## .metric .estimator .estimate
## <chr> <chr> <dbl>
## 1 rmse standard 2.31
rsq(heladas, truth = tmin, estimate = tmin_pred)
## # A tibble: 1 x 3
## .metric .estimator .estimate
## <chr> <chr> <dbl>
## 1 rsq standard 0.537
valor_real <- cut(predicted$tmin,breaks = c(-50,0,50))
valor_predicho <- cut(predicted$tmin_pred,breaks = c(-50,0,50))
table(valor_real,valor_predicho)
## valor_predicho
## valor_real (-50,0] (0,50]
## (-50,0] 253 64
## (0,50] 30 756
pesos <- case_when(
training_data$tmin <= 0 ~ 1,
TRUE ~ 0.01
)
history2 = model1 %>% fit(
x = x,
y = y,
batch_size = 50,
epoch = 100,
sample_weight = pesos
)
plot(history2)+
labs(title = "Neural Net Metrics")+
theme(strip.placement = "outside",
strip.text.y = element_text(size = 9),
plot.title = element_text(hjust = 0.5))
## `geom_smooth()` using formula 'y ~ x'
testing_baked = bake(rec,testing_data, composition = "matrix")
xx = subset(testing_baked, select = -tmin)
y_pred = predict(model1, xx)[,1]
add predictions to dataset
predicted = testing_data %>%
mutate(tmin_pred = y_pred,
resid = tmin-tmin_pred)
calcular algunas métricas del ajuste (fit metrics)
mae(predicted, truth = tmin, estimate = tmin_pred)
## # A tibble: 1 x 3
## .metric .estimator .estimate
## <chr> <chr> <dbl>
## 1 mae standard 2.73
rmse(predicted, truth = tmin, estimate = tmin_pred)
## # A tibble: 1 x 3
## .metric .estimator .estimate
## <chr> <chr> <dbl>
## 1 rmse standard 3.62
rsq(predicted, truth = tmin, estimate = tmin_pred)
## # A tibble: 1 x 3
## .metric .estimator .estimate
## <chr> <chr> <dbl>
## 1 rsq standard 0.808
plot the predictions
ggplot(data = predicted, aes(x = tmin))+
geom_abline(intercept = 0, slope = 1, color = "navy")+
geom_point(aes(y = tmin_pred))+
coord_fixed()
ggplot() +
geom_line(data = predicted, aes(x = 1:(nrow(predicted)), y = tmin), color = "blue") +
geom_line(data = predicted, aes(x = 1:(nrow(predicted)), y = tmin_pred), color = "red") +
xlab('Dates') +
ylab('Tmin')
heladas <- predicted %>% select(tmin,tmin_pred) %>% filter(tmin <=0 )
nrow(heladas)
## [1] 317
mae(heladas, truth = tmin, estimate = tmin_pred)
## # A tibble: 1 x 3
## .metric .estimator .estimate
## <chr> <chr> <dbl>
## 1 mae standard 1.14
rmse(heladas, truth = tmin, estimate = tmin_pred)
## # A tibble: 1 x 3
## .metric .estimator .estimate
## <chr> <chr> <dbl>
## 1 rmse standard 1.64
rsq(heladas, truth = tmin, estimate = tmin_pred)
## # A tibble: 1 x 3
## .metric .estimator .estimate
## <chr> <chr> <dbl>
## 1 rsq standard 0.515
valor_real <- cut(predicted$tmin,breaks = c(-50,0,50))
valor_predicho <- cut(predicted$tmin_pred,breaks = c(-50,0,50))
table(valor_real,valor_predicho)
## valor_predicho
## valor_real (-50,0] (0,50]
## (-50,0] 306 11
## (0,50] 141 645