Dataset

library(readr)
dacc_daily_tmin <- read_csv("data/dacc-daily-tmin.csv", 
    col_types = cols(X1 = col_skip(), date = col_date(format = "%Y-%m-%d")))
#View(dacc_daily_tmin)

Nombres de las estaciones meteorológicas

estaciones <- c("junin","tunuyan","agua_amarga","las_paredes","la_llave")

Función que genera el dataset para clasificación

get_dataset_for_regression <- function(station_name)
{
  
  if(is.null(station_name) | !(station_name %in% estaciones) ) stop("station_name must be a valid name from estaciones")

  junin <- dacc_daily_tmin %>% 
  select(starts_with(station_name)) %>% 
  rename_with( ~ tolower(gsub(paste0(station_name,"."), "", .x, fixed = TRUE))) 
  # quita el junin. de los nombres de las columnas

# Quiero predecir la temperatura mínima, si es o no es helada.

  tmin <- junin %>% 
    select(temp_min) 
  
  colnames(tmin) <- "tmin"
  
  
  
  
  # para regresion
   data_tmin <- cbind(junin[1:(nrow(junin)-1),],tmin[2:nrow(tmin),])
  
  
  
  return(data_tmin)
}

get_dataset_for_regression("tunuyan") %>% head()

##   temp_max humedad_max temp_med radiacion_med humedad_med temp_min humedad_min
## 1    33.41          87 23.20833      249.0000    57.66667    13.65          25
## 2    35.16          83 23.85833      344.9583    54.54167    13.65          27
## 3    29.28          83 23.25417      310.2083    59.20833    17.48          41
## 4    34.57          90 22.92917      347.2917    61.29167    11.47          25
## 5    34.10          85 24.42083      332.4167    61.00000    15.83           4
## 6    32.40          87 22.95000      323.8750    60.50000    15.55          28
##    tmin
## 1 13.65
## 2 17.48
## 3 11.47
## 4 15.83
## 5 15.55
## 6 13.21

Obtener dataset para regresión

estacion <- "tunuyan"
dataset <- get_dataset_for_regression(estacion)

Plot de tmin y temp_min

n <- seq(1:nrow(dataset))
plot(n, dataset$tmin, col="blue", pch=20, cex=.9)
points(n, dataset$temp_min, col="red", pch=5, cex=.9)

## split en training y testing

data_split <- initial_split(dataset, prop = 4/5, strata = NULL)
training_data <- training(data_split)
testing_data <- testing(data_split)

Receta

rec = recipe(tmin ~ ., data = training_data) %>% 
  step_center(all_predictors()) %>% 
  step_scale(all_predictors()) %>% 
  prep()

keras model

library(keras)

## 
## Attaching package: 'keras'

## The following object is masked from 'package:yardstick':
## 
##     get_weights

build_model = function(){
  
model = keras_model_sequential() %>% 
  layer_dense(units = 64, activation = "relu", input_shape = ncol(training_data)-1) %>% 
  layer_dense(units = 64, activation = "relu") %>% 
  layer_dense(units = 1)
  
model %>% 
  compile(loss = "mse",
          optimizer = "rmsprop",
          metrics = list("mae","mse"))

return(model)
}


model1 = build_model()

summary(model1)

## Model: "sequential"
## ________________________________________________________________________________
## Layer (type)                        Output Shape                    Param #     
## ================================================================================
## dense_2 (Dense)                     (None, 64)                      512         
## ________________________________________________________________________________
## dense_1 (Dense)                     (None, 64)                      4160        
## ________________________________________________________________________________
## dense (Dense)                       (None, 1)                       65          
## ================================================================================
## Total params: 4,737
## Trainable params: 4,737
## Non-trainable params: 0
## ________________________________________________________________________________

input and outputs

training_baked = bake(rec, training_data, composition = "matrix")
x = subset(training_baked, select = -tmin)
y = training_baked[,"tmin"]

fit the model

history1 = model1 %>% fit(
  x = x,
  y = y,
  batch_size = 50,
  epoch = 100
)

plot(history1)+
  labs(title = "Neural Net Metrics")+
  theme(strip.placement = "outside",
        strip.text.y = element_text(size = 9),
        plot.title = element_text(hjust = 0.5))

## `geom_smooth()` using formula 'y ~ x'

Predictions

testing_baked = bake(rec,testing_data, composition = "matrix")
xx = subset(testing_baked, select = -tmin)

y_pred = predict(model1, xx)[,1]

add predictions to dataset

predicted = testing_data %>% 
  mutate(tmin_pred = y_pred,
         resid = tmin-tmin_pred)

calcular algunas métricas del ajuste (fit metrics)

mae(predicted, truth = tmin, estimate = tmin_pred)

## # A tibble: 1 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 mae     standard        1.88

rmse(predicted, truth = tmin, estimate = tmin_pred)

## # A tibble: 1 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 rmse    standard        2.44

rsq(predicted, truth = tmin, estimate = tmin_pred)

## # A tibble: 1 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 rsq     standard       0.876

plot the predictions

ggplot(data = predicted, aes(x = tmin))+
  geom_abline(intercept = 0, slope = 1, color = "navy")+
  geom_point(aes(y = tmin_pred))+
  coord_fixed()

ggplot() + 
  geom_line(data = predicted, aes(x = 1:(nrow(predicted)), y = tmin), color = "blue") +
  geom_line(data = predicted, aes(x = 1:(nrow(predicted)), y = tmin_pred), color = "red") +
  xlab('Dates') +
  ylab('Tmin')

heladas <- predicted %>% select(tmin,tmin_pred) %>% filter(tmin <=0  )

mae(heladas, truth = tmin, estimate = tmin_pred)

## # A tibble: 1 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 mae     standard        1.72

rmse(heladas, truth = tmin, estimate = tmin_pred)

## # A tibble: 1 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 rmse    standard        2.31

rsq(heladas, truth = tmin, estimate = tmin_pred)

## # A tibble: 1 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 rsq     standard       0.537

valor_real <- cut(predicted$tmin,breaks = c(-50,0,50))
valor_predicho <- cut(predicted$tmin_pred,breaks = c(-50,0,50))

table(valor_real,valor_predicho)

##           valor_predicho
## valor_real (-50,0] (0,50]
##    (-50,0]     253     64
##    (0,50]       30    756

Ahora con entrenamiento dando más peso a los números negativos

fit the model

pesos <- case_when(
          training_data$tmin <= 0 ~ 1,
          TRUE ~ 0.01
        )

history2 = model1 %>% fit(
  x = x,
  y = y,
  batch_size = 50,
  epoch = 100,
  sample_weight = pesos
)

plot(history2)+
  labs(title = "Neural Net Metrics")+
  theme(strip.placement = "outside",
        strip.text.y = element_text(size = 9),
        plot.title = element_text(hjust = 0.5))

## `geom_smooth()` using formula 'y ~ x'

Predictions

testing_baked = bake(rec,testing_data, composition = "matrix")
xx = subset(testing_baked, select = -tmin)

y_pred = predict(model1, xx)[,1]

add predictions to dataset

predicted = testing_data %>% 
  mutate(tmin_pred = y_pred,
         resid = tmin-tmin_pred)

calcular algunas métricas del ajuste (fit metrics)

mae(predicted, truth = tmin, estimate = tmin_pred)

## # A tibble: 1 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 mae     standard        2.73

rmse(predicted, truth = tmin, estimate = tmin_pred)

## # A tibble: 1 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 rmse    standard        3.62

rsq(predicted, truth = tmin, estimate = tmin_pred)

## # A tibble: 1 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 rsq     standard       0.808

plot the predictions

ggplot(data = predicted, aes(x = tmin))+
  geom_abline(intercept = 0, slope = 1, color = "navy")+
  geom_point(aes(y = tmin_pred))+
  coord_fixed()

ggplot() + 
  geom_line(data = predicted, aes(x = 1:(nrow(predicted)), y = tmin), color = "blue") +
  geom_line(data = predicted, aes(x = 1:(nrow(predicted)), y = tmin_pred), color = "red") +
  xlab('Dates') +
  ylab('Tmin')

heladas <- predicted %>% select(tmin,tmin_pred) %>% filter(tmin <=0  )

nrow(heladas)

## [1] 317

mae(heladas, truth = tmin, estimate = tmin_pred)

## # A tibble: 1 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 mae     standard        1.14

rmse(heladas, truth = tmin, estimate = tmin_pred)

## # A tibble: 1 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 rmse    standard        1.64

rsq(heladas, truth = tmin, estimate = tmin_pred)

## # A tibble: 1 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 rsq     standard       0.515

valor_real <- cut(predicted$tmin,breaks = c(-50,0,50))
valor_predicho <- cut(predicted$tmin_pred,breaks = c(-50,0,50))

table(valor_real,valor_predicho)

##           valor_predicho
## valor_real (-50,0] (0,50]
##    (-50,0]     306     11
##    (0,50]      141    645

weighted neural net regression

Ana Laura Diedrichs

2/2/2021

Dataset

Nombres de las estaciones meteorológicas

Función que genera el dataset para clasificación

Obtener dataset para regresión

Receta

keras model

input and outputs

fit the model

Predictions

Ahora con entrenamiento dando más peso a los números negativos

fit the model

Predictions