Dataset

library(readr)
dacc_daily_tmin <- read_csv("data/dacc-daily-tmin.csv", 
    col_types = cols(X1 = col_skip(), date = col_date(format = "%Y-%m-%d")))
#View(dacc_daily_tmin)

Nombres de las estaciones meteorológicas

estaciones <- c("junin","tunuyan","agua_amarga","las_paredes","la_llave")

Función que genera el dataset para clasificación

get_dataset_for_regression <- function(station_name)
{
  
  if(is.null(station_name) | !(station_name %in% estaciones) ) stop("station_name must be a valid name from estaciones")

  junin <- dacc_daily_tmin %>% 
  select(starts_with(station_name)) %>% 
  rename_with( ~ tolower(gsub(paste0(station_name,"."), "", .x, fixed = TRUE))) 
  # quita el junin. de los nombres de las columnas

# Quiero predecir la temperatura mínima, si es o no es helada.

  tmin <- junin %>% 
    select(temp_min) 
  
  colnames(tmin) <- "tmin"
  
  
  
  
  # para regresion
   data_tmin <- cbind(junin[1:(nrow(junin)-1),],tmin[2:nrow(tmin),])
  
  
  
  return(data_tmin)
}
get_dataset_for_regression("tunuyan") %>% head()
##   temp_max humedad_max temp_med radiacion_med humedad_med temp_min humedad_min
## 1    33.41          87 23.20833      249.0000    57.66667    13.65          25
## 2    35.16          83 23.85833      344.9583    54.54167    13.65          27
## 3    29.28          83 23.25417      310.2083    59.20833    17.48          41
## 4    34.57          90 22.92917      347.2917    61.29167    11.47          25
## 5    34.10          85 24.42083      332.4167    61.00000    15.83           4
## 6    32.40          87 22.95000      323.8750    60.50000    15.55          28
##    tmin
## 1 13.65
## 2 17.48
## 3 11.47
## 4 15.83
## 5 15.55
## 6 13.21

Obtener dataset para regresión

estacion <- "tunuyan"
dataset <- get_dataset_for_regression(estacion)

Plot de tmin y temp_min

n <- seq(1:nrow(dataset))
plot(n, dataset$tmin, col="blue", pch=20, cex=.9)
points(n, dataset$temp_min, col="red", pch=5, cex=.9)

## split en training y testing

data_split <- initial_split(dataset, prop = 4/5, strata = NULL)
training_data <- training(data_split)
testing_data <- testing(data_split)

Receta

rec = recipe(tmin ~ ., data = training_data) %>% 
  step_center(all_predictors()) %>% 
  step_scale(all_predictors()) %>% 
  prep()

keras model

library(keras)
## 
## Attaching package: 'keras'
## The following object is masked from 'package:yardstick':
## 
##     get_weights
build_model = function(){
  
model = keras_model_sequential() %>% 
  layer_dense(units = 64, activation = "relu", input_shape = ncol(training_data)-1) %>% 
  layer_dense(units = 64, activation = "relu") %>% 
  layer_dense(units = 1)
  
model %>% 
  compile(loss = "mse",
          optimizer = "rmsprop",
          metrics = list("mae","mse"))

return(model)
}


model1 = build_model()

summary(model1)
## Model: "sequential"
## ________________________________________________________________________________
## Layer (type)                        Output Shape                    Param #     
## ================================================================================
## dense_2 (Dense)                     (None, 64)                      512         
## ________________________________________________________________________________
## dense_1 (Dense)                     (None, 64)                      4160        
## ________________________________________________________________________________
## dense (Dense)                       (None, 1)                       65          
## ================================================================================
## Total params: 4,737
## Trainable params: 4,737
## Non-trainable params: 0
## ________________________________________________________________________________

input and outputs

training_baked = bake(rec, training_data, composition = "matrix")
x = subset(training_baked, select = -tmin)
y = training_baked[,"tmin"]

fit the model

history1 = model1 %>% fit(
  x = x,
  y = y,
  batch_size = 50,
  epoch = 100
)
plot(history1)+
  labs(title = "Neural Net Metrics")+
  theme(strip.placement = "outside",
        strip.text.y = element_text(size = 9),
        plot.title = element_text(hjust = 0.5))
## `geom_smooth()` using formula 'y ~ x'

Predictions

testing_baked = bake(rec,testing_data, composition = "matrix")
xx = subset(testing_baked, select = -tmin)
y_pred = predict(model1, xx)[,1]

add predictions to dataset

predicted = testing_data %>% 
  mutate(tmin_pred = y_pred,
         resid = tmin-tmin_pred)

calcular algunas métricas del ajuste (fit metrics)

mae(predicted, truth = tmin, estimate = tmin_pred)
## # A tibble: 1 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 mae     standard        1.88
rmse(predicted, truth = tmin, estimate = tmin_pred)
## # A tibble: 1 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 rmse    standard        2.44
rsq(predicted, truth = tmin, estimate = tmin_pred)
## # A tibble: 1 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 rsq     standard       0.876

plot the predictions

ggplot(data = predicted, aes(x = tmin))+
  geom_abline(intercept = 0, slope = 1, color = "navy")+
  geom_point(aes(y = tmin_pred))+
  coord_fixed()

ggplot() + 
  geom_line(data = predicted, aes(x = 1:(nrow(predicted)), y = tmin), color = "blue") +
  geom_line(data = predicted, aes(x = 1:(nrow(predicted)), y = tmin_pred), color = "red") +
  xlab('Dates') +
  ylab('Tmin')

heladas <- predicted %>% select(tmin,tmin_pred) %>% filter(tmin <=0  )
mae(heladas, truth = tmin, estimate = tmin_pred)
## # A tibble: 1 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 mae     standard        1.72
rmse(heladas, truth = tmin, estimate = tmin_pred)
## # A tibble: 1 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 rmse    standard        2.31
rsq(heladas, truth = tmin, estimate = tmin_pred)
## # A tibble: 1 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 rsq     standard       0.537
valor_real <- cut(predicted$tmin,breaks = c(-50,0,50))
valor_predicho <- cut(predicted$tmin_pred,breaks = c(-50,0,50))
table(valor_real,valor_predicho)
##           valor_predicho
## valor_real (-50,0] (0,50]
##    (-50,0]     253     64
##    (0,50]       30    756

Ahora con entrenamiento dando más peso a los números negativos

fit the model

pesos <- case_when(
          training_data$tmin <= 0 ~ 1,
          TRUE ~ 0.01
        )
history2 = model1 %>% fit(
  x = x,
  y = y,
  batch_size = 50,
  epoch = 100,
  sample_weight = pesos
)
plot(history2)+
  labs(title = "Neural Net Metrics")+
  theme(strip.placement = "outside",
        strip.text.y = element_text(size = 9),
        plot.title = element_text(hjust = 0.5))
## `geom_smooth()` using formula 'y ~ x'

Predictions

testing_baked = bake(rec,testing_data, composition = "matrix")
xx = subset(testing_baked, select = -tmin)
y_pred = predict(model1, xx)[,1]

add predictions to dataset

predicted = testing_data %>% 
  mutate(tmin_pred = y_pred,
         resid = tmin-tmin_pred)

calcular algunas métricas del ajuste (fit metrics)

mae(predicted, truth = tmin, estimate = tmin_pred)
## # A tibble: 1 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 mae     standard        2.73
rmse(predicted, truth = tmin, estimate = tmin_pred)
## # A tibble: 1 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 rmse    standard        3.62
rsq(predicted, truth = tmin, estimate = tmin_pred)
## # A tibble: 1 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 rsq     standard       0.808

plot the predictions

ggplot(data = predicted, aes(x = tmin))+
  geom_abline(intercept = 0, slope = 1, color = "navy")+
  geom_point(aes(y = tmin_pred))+
  coord_fixed()

ggplot() + 
  geom_line(data = predicted, aes(x = 1:(nrow(predicted)), y = tmin), color = "blue") +
  geom_line(data = predicted, aes(x = 1:(nrow(predicted)), y = tmin_pred), color = "red") +
  xlab('Dates') +
  ylab('Tmin')

heladas <- predicted %>% select(tmin,tmin_pred) %>% filter(tmin <=0  )
nrow(heladas)
## [1] 317
mae(heladas, truth = tmin, estimate = tmin_pred)
## # A tibble: 1 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 mae     standard        1.14
rmse(heladas, truth = tmin, estimate = tmin_pred)
## # A tibble: 1 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 rmse    standard        1.64
rsq(heladas, truth = tmin, estimate = tmin_pred)
## # A tibble: 1 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 rsq     standard       0.515
valor_real <- cut(predicted$tmin,breaks = c(-50,0,50))
valor_predicho <- cut(predicted$tmin_pred,breaks = c(-50,0,50))
table(valor_real,valor_predicho)
##           valor_predicho
## valor_real (-50,0] (0,50]
##    (-50,0]     306     11
##    (0,50]      141    645