| library(tidyverse) |
| library(ggridges) |
| library(patchwork) |
| library(viridis) |
| library(hrbrthemes) |
| library(forcats) |
| library(lubridate) |
| library(naniar) |
Turbine <- read.csv("Turbine_Data.csv" , header = TRUE , stringsAsFactors = TRUE)
dim(Turbine)
## [1] 118224 22
names(Turbine)
## [1] "X" "ActivePower"
## [3] "AmbientTemperatue" "BearingShaftTemperature"
## [5] "Blade1PitchAngle" "Blade2PitchAngle"
## [7] "Blade3PitchAngle" "ControlBoxTemperature"
## [9] "GearboxBearingTemperature" "GearboxOilTemperature"
## [11] "GeneratorRPM" "GeneratorWinding1Temperature"
## [13] "GeneratorWinding2Temperature" "HubTemperature"
## [15] "MainBoxTemperature" "NacellePosition"
## [17] "ReactivePower" "RotorRPM"
## [19] "TurbineStatus" "WTG"
## [21] "WindDirection" "WindSpeed"
summary(Turbine)
## X ActivePower AmbientTemperatue
## 2017-12-31 00:00:00+00:00: 1 Min. : -38.52 Min. : 0.00
## 2017-12-31 00:10:00+00:00: 1 1st Qu.: 79.64 1st Qu.:25.63
## 2017-12-31 00:20:00+00:00: 1 Median : 402.65 Median :28.34
## 2017-12-31 00:30:00+00:00: 1 Mean : 619.11 Mean :28.77
## 2017-12-31 00:40:00+00:00: 1 3rd Qu.:1074.59 3rd Qu.:31.66
## 2017-12-31 00:50:00+00:00: 1 Max. :1779.03 Max. :42.41
## (Other) :118218 NA's :23474 NA's :24407
## BearingShaftTemperature Blade1PitchAngle Blade2PitchAngle Blade3PitchAngle
## Min. : 0.00 Min. :-43.16 Min. :-26.44 Min. :-26.44
## 1st Qu.:39.84 1st Qu.: -0.94 1st Qu.: -0.43 1st Qu.: -0.43
## Median :42.91 Median : 0.39 Median : 0.89 Median : 0.89
## Mean :43.01 Mean : 9.75 Mean : 10.04 Mean : 10.04
## 3rd Qu.:47.01 3rd Qu.: 8.10 3rd Qu.: 8.48 3rd Qu.: 8.48
## Max. :55.09 Max. : 90.14 Max. : 90.02 Max. : 90.02
## NA's :55706 NA's :76228 NA's :76333 NA's :76333
## ControlBoxTemperature GearboxBearingTemperature GearboxOilTemperature
## Min. :0 Min. : 0.00 Min. : 0.00
## 1st Qu.:0 1st Qu.:57.87 1st Qu.:53.94
## Median :0 Median :64.83 Median :57.20
## Mean :0 Mean :64.23 Mean :57.56
## 3rd Qu.:0 3rd Qu.:71.08 3rd Qu.:61.31
## Max. :0 Max. :82.24 Max. :70.76
## NA's :56064 NA's :55684 NA's :55786
## GeneratorRPM GeneratorWinding1Temperature GeneratorWinding2Temperature
## Min. : 0 Min. : 0.00 Min. : 0.00
## 1st Qu.:1030 1st Qu.: 55.49 1st Qu.: 54.76
## Median :1125 Median : 65.79 Median : 65.00
## Mean :1102 Mean : 72.46 Mean : 71.83
## 3rd Qu.:1515 3rd Qu.: 85.87 3rd Qu.: 85.34
## Max. :1810 Max. :126.77 Max. :126.04
## NA's :55929 NA's :55797 NA's :55775
## HubTemperature MainBoxTemperature NacellePosition ReactivePower
## Min. : 0.00 Min. : 0.00 Min. : 0.0 Min. :-203.183
## 1st Qu.:33.94 1st Qu.:35.81 1st Qu.:145.0 1st Qu.: -0.432
## Median :37.00 Median :39.49 Median :182.0 Median : 35.884
## Mean :36.90 Mean :39.55 Mean :196.3 Mean : 88.134
## 3rd Qu.:40.01 3rd Qu.:43.36 3rd Qu.:271.0 3rd Qu.: 147.359
## Max. :48.00 Max. :54.25 Max. :357.0 Max. : 403.714
## NA's :55818 NA's :55717 NA's :45946 NA's :23476
## RotorRPM TurbineStatus WTG WindDirection
## Min. : 0.00 Min. : 0 G01:118224 Min. : 0.0
## 1st Qu.: 9.23 1st Qu.: 2 1st Qu.:145.0
## Median :10.10 Median : 2 Median :182.0
## Mean : 9.91 Mean : 2280 Mean :196.3
## 3rd Qu.:13.60 3rd Qu.: 2 3rd Qu.:271.0
## Max. :16.27 Max. :65746528 Max. :357.0
## NA's :56097 NA's :55316 NA's :45946
## WindSpeed
## Min. : 0.000
## 1st Qu.: 3.823
## Median : 5.558
## Mean : 5.879
## 3rd Qu.: 7.507
## Max. :22.971
## NA's :23629
## ActivePower
bw <- Turbine %>%
select(ActivePower) %>%
filter(!is.na(ActivePower))
bw <- 2 * IQR(bw$ActivePower) / length(bw$ActivePower)^(1/3)
g1 <- Turbine %>%
ggplot(aes(x = ActivePower)) +
geom_histogram(binwidth = bw,
fill = 'green',
color = 'black',
show.legend = F,
alpha = .5) +
labs(title = "(g1) Distribution of ActivePower",
x = "ActivePower",
y = "Frequency") +
hrbrthemes::theme_ft_rc()
## AmbientTemperature
bw <- Turbine %>%
select(AmbientTemperatue) %>%
filter(!is.na(AmbientTemperatue))
bw <- 2 * IQR(bw$AmbientTemperatue) / length(bw$AmbientTemperatue)^(1/3)
g2 <- Turbine %>%
ggplot(aes(x = AmbientTemperatue)) +
geom_histogram(binwidth = bw,
fill = 'green',
color = 'black',
show.legend = F,
alpha = .5) +
labs(title = "(g2) Distribution of AmbientTemperatue",
x = "AmbientTemperatue",
y = "Frequency") +
hrbrthemes::theme_ft_rc()
## BearingShaftTemperature
bw <- Turbine %>%
select(BearingShaftTemperature) %>%
filter(!is.na(BearingShaftTemperature))
bw <- 2 * IQR(bw$BearingShaftTemperature) / length(bw$BearingShaftTemperature)^(1/3)
g3 <- Turbine %>%
ggplot(aes(x = BearingShaftTemperature)) +
geom_histogram(binwidth = bw,
fill = 'green',
color = 'black',
show.legend = F,
alpha = .5) +
labs(title = "(g3) Distribution of BearingShaftTemperature",
x = "BearingShaftTemperature",
y = "Frequency") +
hrbrthemes::theme_ft_rc()
## Blade1PitchAngle
bw <- Turbine %>%
select(Blade1PitchAngle) %>%
filter(!is.na(Blade1PitchAngle))
bw <- 2 * IQR(bw$Blade1PitchAngle) / length(bw$Blade1PitchAngle)^(1/3)
g4 <- Turbine %>%
ggplot(aes(x = Blade1PitchAngle)) +
geom_histogram(binwidth = bw,
fill = 'green',
color = 'black',
show.legend = F,
alpha = .5) +
labs(title = "(g4) Distribution of Blade1PitchAngle",
x = "Blade1PitchAngle",
y = "Frequency") +
hrbrthemes::theme_ft_rc()
## Histo
(g1 /g2 | g3 / g4) +
plot_annotation(theme = theme(plot.title = element_text(size = 18,
colour = "black"))) +
theme(text = element_text('mono'))
class(Turbine)
## [1] "data.frame"
glimpse(Turbine)
## Rows: 118,224
## Columns: 22
## $ X <fct> 2017-12-31 00:00:00+00:00, 2017-12-31 00:…
## $ ActivePower <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ AmbientTemperatue <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ BearingShaftTemperature <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ Blade1PitchAngle <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ Blade2PitchAngle <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ Blade3PitchAngle <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ ControlBoxTemperature <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ GearboxBearingTemperature <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ GearboxOilTemperature <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ GeneratorRPM <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ GeneratorWinding1Temperature <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ GeneratorWinding2Temperature <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ HubTemperature <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ MainBoxTemperature <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ NacellePosition <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ ReactivePower <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ RotorRPM <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ TurbineStatus <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ WTG <fct> G01, G01, G01, G01, G01, G01, G01, G01, G…
## $ WindDirection <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ WindSpeed <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
Turbine <- Turbine %>%
rename(Date = X) %>%
mutate(Date = ymd_hms(Date)) %>%
select(-WTG) %>%
as_tsibble(index = Date)
glimpse(Turbine$Date)
## POSIXct[1:118224], format: "2017-12-31 00:00:00" "2017-12-31 00:10:00" "2017-12-31 00:20:00" ...
head(Turbine)
## # A tsibble: 6 x 21 [10m] <UTC>
## Date ActivePower AmbientTemperatue BearingShaftTemperature
## <dttm> <dbl> <dbl> <dbl>
## 1 2017-12-31 00:00:00 NA NA NA
## 2 2017-12-31 00:10:00 NA NA NA
## 3 2017-12-31 00:20:00 NA NA NA
## 4 2017-12-31 00:30:00 NA NA NA
## 5 2017-12-31 00:40:00 NA NA NA
## 6 2017-12-31 00:50:00 NA NA NA
## # … with 17 more variables: Blade1PitchAngle <dbl>, Blade2PitchAngle <dbl>,
## # Blade3PitchAngle <dbl>, ControlBoxTemperature <dbl>,
## # GearboxBearingTemperature <dbl>, GearboxOilTemperature <dbl>,
## # GeneratorRPM <dbl>, GeneratorWinding1Temperature <dbl>,
## # GeneratorWinding2Temperature <dbl>, HubTemperature <dbl>,
## # MainBoxTemperature <dbl>, NacellePosition <dbl>, ReactivePower <dbl>,
## # RotorRPM <dbl>, TurbineStatus <dbl>, WindDirection <dbl>, WindSpeed <dbl>
gg_miss_var(Turbine)
## Correlation Matrix Plot
corrmatrix <- as.data.frame(Turbine)
colfunc <- colorRampPalette(brewer.pal(9,"BrBG"))
heatmap.2(cor(Filter(is.numeric, corrmatrix), use = "complete.obs"), Rowv = FALSE,
Colv = FALSE, dendrogram = "none", lwid=c(0.1,2), lhei=c(0.1,2),
col = colfunc(15),
cellnote = round(cor(Filter(is.numeric, corrmatrix), use = "complete.obs"),2),
notecol = "black", key = FALSE, trace = 'none')
| ActivePower |
| BearingShaftTemperature |
| GearboxBearingTemperature |
| GeneratorRPM |
| GeneratorWinding2Temperature |
| HubTemperature |
| MainBoxTemperature |
| ReactivePower |
| WindDirection |
| WindSpeed |
Turbine <- Turbine %>%
select(ActivePower, BearingShaftTemperature, GearboxBearingTemperature, GeneratorRPM,
GeneratorWinding2Temperature, HubTemperature, MainBoxTemperature, ReactivePower,
WindDirection, WindSpeed) %>%
drop_na(ActivePower)
sum(is.na(Turbine))
## [1] 219543
gg_miss_var(Turbine)
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
Turbine <- Turbine %>%
mutate_if(is.numeric, function(x) ifelse(is.na(x), median(x, na.rm = T), x))
gg_miss_var(Turbine)
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
ggplot(Turbine) +
aes(x = Date, y = ActivePower) +
geom_line(size = .1, colour = "#1B901C") +
labs(
x = "Date [10 min]",
title = "Active Power ",
subtitle = "Every 10 min",
caption = "2018 - 2020"
) +
ggthemes::theme_solarized()
ggplot(Turbine) +
aes(
x = ActivePower,
y = WindSpeed,
color = WindSpeed
) +
geom_point(shape = "circle") +
geom_smooth(method = lm, color = "#17752a") +
labs(x = "ActivePower",
y = "WindSpeed",
title = "Power vs. Windspeed") +
scale_color_distiller(palette = "BuGn", direction = 1) +
ggthemes::theme_solarized() +
theme(legend.position = "bottom")
## `geom_smooth()` using formula 'y ~ x'
Turbine %>%
fill_gaps(.full = TRUE) %>%
gg_season(ActivePower)
gg_lag(Turbine,ActivePower)
Turbine %>%
fill_gaps(.full = TRUE) %>%
ACF(ActivePower, lag_max = 30) %>%
autoplot()
Turbine %>%
fill_gaps(.full = TRUE) %>%
PACF(ActivePower, lag_max = 30) %>%
autoplot()
Turbine %>%
features(ActivePower, unitroot_kpss)
## # A tibble: 1 × 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 6.99 0.01
Turbine %>%
mutate(diff_ActivePower = difference(ActivePower)) %>%
features(diff_ActivePower, unitroot_kpss)
## # A tibble: 1 × 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 0.000680 0.1
Turbine %>%
fill_gaps(.full = TRUE) %>%
mutate(diff_ActivePower = difference(ActivePower)) %>%
ACF(diff_ActivePower, lag_max = 30) %>%
autoplot()
Turbine %>%
fill_gaps(.full = TRUE) %>%
mutate(diff_ActivePower = difference(ActivePower)) %>%
PACF(diff_ActivePower, lag_max = 30) %>%
autoplot()
Turbine <- Turbine %>%
mutate(Diff_ActivePower = difference(ActivePower))
train <- Turbine %>%
filter(Date < '2019-08-22')
test <- Turbine %>%
filter(Date >= '2019-08-22')
models <- train %>%
fill_gaps(.full = TRUE) %>%
model(
lm = TSLM(ActivePower ~ trend()),
lm_pred = TSLM(ActivePower ~ BearingShaftTemperature + GearboxBearingTemperature + GeneratorRPM + GeneratorWinding2Temperature + HubTemperature + MainBoxTemperature + ReactivePower + WindDirection + WindSpeed),
Auto_Exhaustive = ARIMA(log(ActivePower), stepwise = F)
)
## Warning in log(ActivePower): NaNs produced
glance(models)
## # A tibble: 3 × 17
## .model r_squared adj_r_squared sigma2 statistic p_value df log_lik
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <dbl>
## 1 lm 0.0211 0.0211 394911. 1430. 1.17e-309 2 -521071.
## 2 lm_pred 0.890 0.890 44283. 59724. 0 10 -448559.
## 3 Auto_Exha… NA NA Inf NA NA NA NaN
## # … with 9 more variables: AIC <dbl>, AICc <dbl>, BIC <dbl>, CV <dbl>,
## # deviance <dbl>, df.residual <int>, rank <int>, ar_roots <list>,
## # ma_roots <list>
augment(models)
## # A tsibble: 258,408 x 6 [10m] <UTC>
## # Key: .model [3]
## .model Date ActivePower .fitted .resid .innov
## <chr> <dttm> <dbl> <dbl> <dbl> <dbl>
## 1 lm 2018-01-01 00:00:00 -5.36 492. -497. -497.
## 2 lm 2018-01-01 00:10:00 -5.82 492. -498. -498.
## 3 lm 2018-01-01 00:20:00 -5.28 492. -497. -497.
## 4 lm 2018-01-01 00:30:00 -4.65 492. -496. -496.
## 5 lm 2018-01-01 00:40:00 -4.68 492. -496. -496.
## 6 lm 2018-01-01 00:50:00 -4.76 492. -497. -497.
## 7 lm 2018-01-01 01:00:00 -5.09 492. -497. -497.
## 8 lm 2018-01-01 01:10:00 -5.11 492. -497. -497.
## 9 lm 2018-01-01 01:20:00 -5.28 492. -497. -497.
## 10 lm 2018-01-01 01:30:00 -4.44 492. -496. -496.
## # … with 258,398 more rows
aug <- augment(models)
aug %>%
ggplot(aes(x=Date)) +
geom_line(aes(y = ActivePower, color = "Data")) +
geom_line(aes(y = .fitted, color = "Fitted")) +
scale_color_manual(values = c(Data = "Black", Fitted = "Blue"))
## Warning: Removed 1 row(s) containing missing values (geom_path).
augment(models) %>%
features(.innov, unitroot_kpss)
## # A tibble: 3 × 3
## .model kpss_stat kpss_pvalue
## <chr> <dbl> <dbl>
## 1 Auto_Exhaustive NaN NaN
## 2 lm 13.8 0.01
## 3 lm_pred 15.7 0.01
Benchmarks <- train %>%
fill_gaps(.full = TRUE) %>%
model(
mean = MEAN(Diff_ActivePower),
naive = NAIVE(Diff_ActivePower),
drift = RW(Diff_ActivePower ~ drift())
)
glance(Benchmarks)
## # A tibble: 3 × 2
## .model sigma2
## <chr> <dbl>
## 1 mean 25429.
## 2 naive 56005.
## 3 drift 56005.
final <- train %>%
model(
lm = TSLM(ActivePower ~ trend())
)
fc <- final %>%
forecast(new_data = test)
#plot forecast
fc %>% autoplot(test, level = NULL)
#review accuracy to select final model
fc %>% accuracy(Turbine)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 lm Test -380. 659. 600. -Inf Inf 3.05 2.02 0.972