library(dplyr)##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(magrittr)Nota: no vamos a usar tildes.
En esta publicacion se muestra como construir modelos estadisticos para estimar el SOH de baterias de litio.
El primer paso es leer los datos.
bat_05 <- read.csv(file='raw_data_05.csv')
bat_06 <- read.csv(file='raw_data_06.csv')
bat_07 <- read.csv(file='raw_data_07.csv')
bat_18 <- read.csv(file='raw_data_18.csv')El SOH se puede obtener usando la siguiente expresion:
\[ \text{SOH} = \frac{\text{Capatity}}{\text{Initial capacity}} \]
Vamos a crear la variable SOH y la variable bateria, esas dos nuevas variables se van a agregar a los datos originales.
bat_05 %<>% mutate(soh = capacity/capacity[1], bateria=5)
bat_06 %<>% mutate(soh = capacity/capacity[1], bateria=6)
bat_07 %<>% mutate(soh = capacity/capacity[1], bateria=7)
bat_18 %<>% mutate(soh = capacity/capacity[1], bateria=18)Vamos a crear un marco de datos consolidado para hacer graficos comparativos.
datos <- rbind(bat_05, bat_06, bat_07, bat_18)
datos$bateria <- factor(datos$bateria)Vamos a explorar los datos.
library(tibble)
glimpse(datos)## Rows: 185,721
## Columns: 12
## $ cycle <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1~
## $ ambient_temperature <int> 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 2~
## $ datetime <chr> "2008-04-02 15:25:41", "2008-04-02 15:25:41", "20~
## $ capacity <dbl> 1.856487, 1.856487, 1.856487, 1.856487, 1.856487,~
## $ voltage_measured <dbl> 4.191492, 4.190749, 3.974871, 3.951717, 3.934352,~
## $ current_measured <dbl> -0.004901589, -0.001478006, -2.012528324, -2.0139~
## $ temperature_measured <dbl> 24.33003, 24.32599, 24.38909, 24.54475, 24.73139,~
## $ current_load <dbl> -0.0006, -0.0006, -1.9982, -1.9982, -1.9982, -1.9~
## $ voltage_load <dbl> 0.000, 4.206, 3.062, 3.030, 3.011, 2.991, 2.977, ~
## $ time <dbl> 0.000, 16.781, 35.703, 53.781, 71.922, 90.094, 10~
## $ soh <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1~
## $ bateria <fct> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5~
Vamos a explorar la cantidad de ciclos y el tiempo promedio de cada ciclo por bateria.
datos %>% group_by(bateria) %>% summarise(num_ciclos=max(cycle),
tiempo_prom_ciclo=mean(max(time)))## # A tibble: 4 x 3
## bateria num_ciclos tiempo_prom_ciclo
## <fct> <int> <dbl>
## 1 5 168 3690.
## 2 6 168 3690.
## 3 7 168 3690.
## 4 18 132 3435.
Cada bateria fue observada durante 168 ciclos y cada ciclo duro en promedio 3690 segundos o un poco mas de una hora.
Vamos a calcular el número de observaciones dentro de cada ciclo para la bateria 05.
bat_05 %>% group_by(cycle) %>% summarise(n())## # A tibble: 168 x 2
## cycle `n()`
## <int> <int>
## 1 1 197
## 2 2 196
## 3 3 195
## 4 4 194
## 5 5 194
## 6 6 195
## 7 7 195
## 8 8 191
## 9 9 190
## 10 10 190
## # ... with 158 more rows
Vamos a explorar los valores de capacidad, soh, time, voltage_load y current_load para las lineas 1, 2, 3, 4 y 168 de cada ciclo.
bat_05 %>% group_by(cycle) %>%
select(cycle, capacity, soh, time, voltage_load, current_load) %>%
slice(c(1:4, 168))## # A tibble: 840 x 6
## # Groups: cycle [168]
## cycle capacity soh time voltage_load current_load
## <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 1.86 1 0 0 -0.0006
## 2 1 1.86 1 16.8 4.21 -0.0006
## 3 1 1.86 1 35.7 3.06 -2.00
## 4 1 1.86 1 53.8 3.03 -2.00
## 5 1 1.86 1 3112. 2.36 -2.00
## 6 2 1.85 0.995 0 0 -0.0006
## 7 2 1.85 0.995 16.7 4.20 -0.0006
## 8 2 1.85 0.995 35.7 3.06 -2.00
## 9 2 1.85 0.995 53.8 3.02 -2.00
## 10 2 1.85 0.995 3113. 2.35 -2.00
## # ... with 830 more rows
De la tabla anterior se observa que la capacidad y el SOH son constantes dentro del ciclo. Vamos a construir una figura para apreciar mejor esta situacion.
bat_05 %>% filter(cycle <= 16) %>%
ggplot(mapping = aes(x=time, y=capacity, col=factor(cycle))) +
geom_line() +
geom_point() +
facet_wrap( ~ cycle)Vamos a crear un dataframe mas pequeno que solo van a tener la informacion en el tiempo 0 para todos los ciclos.
d0 <- datos %>% filter(time == 0)Vamos a graficar la capacidad versus ciclo.
ggplot(d0, mapping = aes(x=cycle, y=capacity, col=bateria)) +
geom_line()Vamos a graficar la SOH versus ciclo. La linea roja corresponde al umbral del 70% en el cual la bateria ya cumple su ciclo de vida y ya es recomendable hacer el cambio.
ggplot(d0, mapping = aes(x=cycle, y=soh, col=bateria)) +
geom_line() +
geom_hline(yintercept = 0.7, col='blue', lty='dashed') +
labs(title='SOH versus cycle. La lÃnea horizontal azul corresponde al umbral.')Vamos a explorar las variables temperature_measured, voltage_measured, current_measured, voltage_load, current_load en la base de datos completa.
datos %>%
select(temperature_measured, voltage_measured, current_measured, voltage_load, current_load) %>%
summary()## temperature_measured voltage_measured current_measured voltage_load
## Min. :22.35 Min. :1.737 Min. :-2.02910 Min. :0.000
## 1st Qu.:29.57 1st Qu.:3.378 1st Qu.:-2.01142 1st Qu.:2.410
## Median :32.36 Median :3.501 Median :-2.00902 Median :2.558
## Mean :32.38 Mean :3.497 Mean :-1.83257 Mean :2.366
## 3rd Qu.:35.42 3rd Qu.:3.656 3rd Qu.:-1.98997 3rd Qu.:2.718
## Max. :42.33 Max. :4.233 Max. : 0.01431 Max. :4.249
## current_load
## Min. :-2.000
## 1st Qu.: 1.998
## Median : 1.999
## Mean : 1.465
## 3rd Qu.: 1.999
## Max. : 2.000
Para hacer las predicciones del SOH vamos a utilizar solo la informacion de los ciclos 2 en adelante, el primer ciclo no se va a considerar porque la bateria esta nueva.
data_train <- datos %>% filter(cycle > 1)Vamos a iniciar con un modelo sencillo y que se puede resumir en la siguiente expresion:
\[ \begin{align*} SOH_i &\sim N(\mu_i, \sigma), \\ \mu_i &= \beta_0 + \beta_1 cycle + \beta_2 time + \beta_3 temp + \beta_4 volt + \beta_5 current + \beta_6 voltage_{load} + \beta_7 current_{load}, \\ \sigma &= constante. \end{align*} \]
mod1 <- lm(soh ~ cycle + time + temperature_measured + voltage_measured +
current_measured + voltage_load + current_load,
data=data_train)
summary(mod1)##
## Call:
## lm(formula = soh ~ cycle + time + temperature_measured + voltage_measured +
## current_measured + voltage_load + current_load, data = data_train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.126092 -0.035766 0.009155 0.034756 0.234710
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.702e-01 6.296e-03 58.79 <2e-16 ***
## cycle -1.831e-03 3.568e-06 -513.17 <2e-16 ***
## time 3.242e-05 3.905e-07 83.03 <2e-16 ***
## temperature_measured 1.709e-03 1.021e-04 16.74 <2e-16 ***
## voltage_measured 1.271e-01 1.143e-03 111.17 <2e-16 ***
## current_measured -1.608e-02 6.474e-04 -24.83 <2e-16 ***
## voltage_load 1.242e-02 5.356e-04 23.19 <2e-16 ***
## current_load -1.931e-03 1.116e-04 -17.30 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04757 on 184756 degrees of freedom
## Multiple R-squared: 0.8008, Adjusted R-squared: 0.8008
## F-statistic: 1.061e+05 on 7 and 184756 DF, p-value: < 2.2e-16
Vamos a comparar visualmente los resultados de SOH y \(\widehat{SOH}\).
data_train %<>% mutate(yhat1 = fitted(mod1))ggplot(data_train, aes(x=soh, y=yhat1, col=bateria)) +
geom_point(size=0.5) +
geom_abline(intercept = 0, slope = 1, size = 1.0, col='blue') +
geom_hline(yintercept = 1, col='red', lty='dashed') +
labs(main='Predicciones versus SOH, la linea azul es la referencia.')Vamos ahora a considerar otro modelo que tiene la siguiente expresion:
\[ \begin{align*} SOH_i &\sim Beta(\mu_i, \sigma), \\ \mu_i &= \beta_0 + \beta_1 cycle + \beta_2 time + \beta_3 temp + \beta_4 volt + \beta_5 current + \beta_6 voltage_{load} + \beta_7 current_{load}, \\ \sigma &= constante. \end{align*} \]
library(gamlss)
mod2 <- gamlss(soh ~ cycle + time + temperature_measured + voltage_measured +
current_measured + voltage_load + current_load,
family=BE,
data=data_train)## GAMLSS-RS iteration 1: Global Deviance = -415221.1
## GAMLSS-RS iteration 2: Global Deviance = -572800.4
## GAMLSS-RS iteration 3: Global Deviance = -601138.3
## GAMLSS-RS iteration 4: Global Deviance = -601789.4
## GAMLSS-RS iteration 5: Global Deviance = -601793.3
## GAMLSS-RS iteration 6: Global Deviance = -601793.4
## GAMLSS-RS iteration 7: Global Deviance = -601793.4
summary(mod2)## ******************************************************************
## Family: c("BE", "Beta")
##
## Call: gamlss(formula = soh ~ cycle + time + temperature_measured +
## voltage_measured + current_measured + voltage_load +
## current_load, family = BE, data = data_train)
##
## Fitting method: RS()
##
## ------------------------------------------------------------------
## Mu link function: logit
## Mu Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.368e-01 4.839e-02 -17.295 < 2e-16 ***
## cycle -1.303e-02 2.787e-05 -467.733 < 2e-16 ***
## time 2.508e-04 3.378e-06 74.246 < 2e-16 ***
## temperature_measured 4.703e-03 8.211e-04 5.727 1.02e-08 ***
## voltage_measured 7.656e-01 8.666e-03 88.341 < 2e-16 ***
## current_measured -2.552e-01 5.858e-03 -43.567 < 2e-16 ***
## voltage_load 1.094e-01 4.755e-03 23.001 < 2e-16 ***
## current_load -1.962e-01 1.521e-03 -128.933 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## ------------------------------------------------------------------
## Sigma link function: logit
## Sigma Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.738856 0.001894 -918 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## ------------------------------------------------------------------
## No. of observations in the fit: 184764
## Degrees of Freedom for the fit: 9
## Residual Deg. of Freedom: 184755
## at cycle: 7
##
## Global Deviance: -601793.4
## AIC: -601775.4
## SBC: -601684.2
## ******************************************************************
data_train %<>% mutate(yhat2 = predict(object=mod2, what='mu', type='response'))ggplot(data_train, aes(x=soh, y=yhat2, col=bateria)) +
geom_point(size=0.5) +
geom_abline(intercept = 0, slope = 1, size = 1.0, col='blue') +
labs(main='Predicciones versus SOH, la linea azul es la referencia.')Para resumir los resultados obtenidos usamos lo siguiente:
data_train %>% dplyr::select(soh, yhat1, yhat2) %>% cor() %>% round(digits=2)## soh yhat1 yhat2
## soh 1.00 0.89 0.87
## yhat1 0.89 1.00 0.98
## yhat2 0.87 0.98 1.00