library(lubridate)
library(gplots)  # heatmap.2
library(forecast)
package 'forecast' was built under R version 3.5.2
frameData <- function(){
    dat <- read.csv("TrainData1.csv", header = T)
    vec_tStamp_raw <- ymd_hm(dat[2: 26305, 1], "%Y%m%d %H:%M", tz="UTC")
    vec_tStamp <- vec_tStamp_raw[1: 26304]
    seq(1, 26304)[is.na(vec_tStamp)]
    dat.f <- data.frame("series" = seq(1, 26304), "year" = year(vec_tStamp), "month" = month(vec_tStamp), 
                        "day" = day(vec_tStamp), "hour" = hour(vec_tStamp), "power" = dat[2: 26305, 2], 
                        "u10" = dat[2: 26305, 3], "v10" = dat[2: 26305, 4], "u100" = dat[2: 26305, 5], 
                        "v100" = dat[2: 26305, 6])
    return(dat.f)
}
dat.f <- frameData()
rm(frameData)
plotBoxPlot <- function(dat, index = T, str_fact, str_y, str_con = "all"){
    par(bty="n")
    plot(factor(dat[str_fact][index,1]), dat.f[str_y][index,1], xlab = paste(str_fact), ylab = "Power Output",
        main = paste("Box Plot of Power Output By (", str_fact, ") when (", str_con, ")", sep = ""))
    # Add an average line to the box plot
    # y.ave <- ave(dat.f[str_y][index,1])
    # lines(c(1, 12), c(y.ave, y.ave), lty = 2, col = "red", lwd = 3)
    # legend("bottomleft", inset = .02, legend = "Average", col = "red", lty = 2, lwd = 3)    
}
plotBoxPlot(dat=dat.f, index=dat.f$year == 2016, str_fact="month", str_y="power", str_con="year 2016")

plotBoxPlot(dat=dat.f, index=dat.f$year == 2017, str_fact="month", str_y="power", str_con="year 2017")

plotBoxPlot(dat=dat.f, str_fact="hour", str_y="power")

Assumptions

Regressional Model

calNominalWindSpeed <- function(vec_windSpeed_1, vec_windSpeed_2, vec_windSpeed_3, vec_windSpeed_4, alpha = 0.5){
    vec_windSpeed.max_10 <- sqrt(vec_windSpeed_1^2 + vec_windSpeed_2^2)
    vec_windSpeed.max_100 <- sqrt(vec_windSpeed_3^2 + vec_windSpeed_4^2)
    vec_windSpeed.nominal <- alpha * vec_windSpeed.max_10 + (1 - alpha) * vec_windSpeed.max_100
    return(vec_windSpeed.nominal)
}
vec_windSpeed.nominal <- calNominalWindSpeed(dat.f$u10, dat.f$v10, dat.f$u100, dat.f$v100, 0.6)

mod_regress <- glm(dat.f$power ~ dat.f$speed + I(dat.f$speed^2) + I(dat.f$speed^3))
plot(dat.f$speed, dat.f$power, xlab = 'Wind Speed', ylab = 'Normalized Power Production')
lines(seq(0, 25, by = 0.1), mod_regress$coefficients[1] + 
          mod_regress$coefficients[2] * seq(0, 25, by = 0.1) +
          mod_regress$coefficients[3] * seq(0, 25, by = 0.1)^2 + 
          mod_regress$coefficients[4] * seq(0, 25, by = 0.1)^3, col = "red")

mod_regress <- glm(dat.f$power ~ dat.f$speed + I(dat.f$speed^3))
plot(dat.f$speed, dat.f$power, xlab = 'Wind Speed', ylab = 'Normalized Power Production')
lines(seq(0, 25, by = 0.1), mod_regress$coefficients[1] + 
          mod_regress$coefficients[2] * seq(0, 25, by = 0.1) +
          mod_regress$coefficients[3] * seq(0, 25, by = 0.1)^3, col = "red")

ARIMA Model

# Plot the time series / residuals and its ACF, PACF, and Ljung-Box plot
plotTimeSeriesResidual <- function(dat, num_lag = 20, str_name = "Model", ...){
    # If the input dat is arima model, just take its residuals are the dat.
    if(class(dat) == "Arima")
        dat <- dat$residuals
    par(mfrow = c(4, 1), mgp = c(2, 0.7, 0), mar = c(1, 1, 1, 1), oma = c(2, 2, 4, 2), 
        cex.lab = 0.8, cex.axis = 0.8)
    plot(dat, type = "l", bty = "n")
    title(main = paste("TS, ACF, PACF and p Values for Ljung-Box Statistic of", str_name), 
          cex.main = 1.5, line = 1, outer = TRUE)
    # ACF and PACF
    acf(dat, lag.max = num_lag, xaxt = "n", bty = "n", na.action = na.pass)
    axis(1, at = seq(0, num_lag, by = 1), las = 0, outer = FALSE)
    pacf(dat, lag.max = num_lag, xaxt = "n", bty = "n", na.action = na.pass)
    axis(1, at = seq(1, num_lag, by = 1), las = 0, outer = FALSE)
    # Ljung-Box Plot
    value_p <- sapply(1: num_lag, function(i) Box.test(dat, i, type = "Ljung-Box")$p.value)
    plot(1L: num_lag, value_p, xlab = "lag", ylab = "p value", ylim = c(0, 1), bty = "n", xaxt = "n")
    axis(1, at = seq(1, num_lag, by = 1), las = 0, outer = FALSE)
    abline(h = 0.05, lty = 2, col = "blue")
}

mod_arima_1 <- arima(dat.f$power, order = c(0, 1, 0), include.mean = F)
plotTimeSeriesResidual(mod_arima_1$residuals, num_lag = 48)

mod_arima_2 <- arima(mod_arima_1$residuals, order = c(0, 0, 1), seasonal = list(order = c(0, 0, 1), period = 48), 
                     include.mean = F)
plotTimeSeriesResidual(mod_arima_2$residuals, num_lag = 48)

# acf(mod_arima_2$residuals, lag.max = 48, na.action = na.pass)
# ts_power <- ts(dat.f$power, frequency = 24)
plotTimeSeriesResidual(dat.f$speed, num_lag = 48)

mod_arima_3 <- arima(dat.f$speed, order = c(0, 1, 0), include.mean = F)
plotTimeSeriesResidual(mod_arima_3$residuals, num_lag = 48)
mod_arima_4 <- arima(mod_arima_3$residuals, order = c(3, 0, 0), include.mean = F)
plotTimeSeriesResidual(mod_arima_4$residuals, num_lag = 48)

ccf(dat.f$speed, dat.f$power, na.action = na.pass)

ts_power <- ts(dat.f$power, frequency = 24)
y <- residuals(Arima(ts_power, model = mod_arima_5))
ccf(mod_arima_5$residuals, y, na.action = na.pass)

ts_power <- ts(dat.f$power[dat.f$month == 1], frequency = 24)
y <- residuals(Arima(ts_power, model = mod_arima_7))
ccf(mod_arima_7$residuals, y, na.action = na.pass)

ts_power <- ts(dat.f$power[dat.f$month == 1], frequency = 24)
y <- residuals(Arima(ts_power, model = mod_arima_9))
ccf(mod_arima_9$residuals, y, na.action = na.pass)
ccf(dat.f$u10, dat.f$v10)

ccf(dat.f$u10, dat.f$u100)

Ridge Regression

calRidgeRegress <- function(mat_x, vec_y, lambda){
    n <- length(mat_x[1,])
    mat_x[is.na(mat_x)] <- 0
    vec_y[is.na(vec_y)] <- 0
    vec_theta <- solve(t(mat_x) %*% mat_x + lambda * diag(n)) %*% t(mat_x) %*% vec_y
    return(vec_theta)
}
mat_x <- cbind(dat.f$u10[2: 26304], dat.f$v10[2: 26304], dat.f$u100[2: 26304], dat.f$v100[2: 26304],
               dat.f$u10[1: 26303], dat.f$v10[1: 26303], dat.f$u100[1: 26303], dat.f$v100[1: 26303])
vec_theta <- calRidgeRegress(mat_x, dat.f$power[2: 26304], 0.1)
frameData <- function(){
    dat <- read.csv("WeatherForecastInput1.csv", header = T)
    vec_tStamp_raw <- ymd_hm(dat[1: 24, 1], "%Y%m%d %H:%M", tz="UTC")
    vec_tStamp <- vec_tStamp_raw[1: 24]
    dat.f <- data.frame("series" = seq(1, 24), "year" = year(vec_tStamp), "month" = month(vec_tStamp), 
                        "day" = day(vec_tStamp), "hour" = hour(vec_tStamp), "u10" = dat[, 2], "v10" = dat[, 3], 
                        "u100" = dat[, 4], "v100" = dat[, 5])
    return(dat.f)
}
dat.f_pred <- frameData()
rm(frameData)
predPowerOutput <- function(mat_x, vec_theta_hat){
    vec_y_hat <- mat_x %*% vec_theta_hat
    return(vec_y_hat)
}
mat_x_pred <- cbind(dat.f_pred$u10, dat.f_pred$v10, dat.f_pred$u100, dat.f_pred$v100, 
    c(dat.f[26304,]$u10, dat.f_pred$u10[1: 23]), c(dat.f[26304,]$v10, dat.f_pred$v10[1: 23]), 
    c(dat.f[26304,]$u100, dat.f_pred$u100[1: 23]), c(dat.f[26304,]$v100, dat.f_pred$v100[1: 23]))
predPowerOutput(mat_x_pred, vec_theta)
           [,1]
 [1,] 0.4838088
 [2,] 0.4525539
 [3,] 0.5559471
 [4,] 0.6226099
 [5,] 0.5641014
 [6,] 0.5609152
 [7,] 0.5590238
 [8,] 0.6647319
 [9,] 0.8218065
[10,] 0.8912047
[11,] 0.8617088
[12,] 0.8974107
[13,] 0.7678256
[14,] 0.6689318
[15,] 0.6808180
[16,] 0.6932964
[17,] 0.6466008
[18,] 0.6154091
[19,] 0.5979905
[20,] 0.5572132
[21,] 0.5009179
[22,] 0.4996018
[23,] 0.4775083
[24,] 0.4206646
vec_x <- 0.5 * sqrt(dat.f_pred$u10^2 +  dat.f_pred$v10^2) + 0.5 * sqrt(dat.f_pred$u100^2 + dat.f_pred$v100^2)
plot(vec_x, vec_y_hat, xlim = c(0, 20), ylim = c(0, 1))

---
title: "Wind Power Output Forecast"
output: html_notebook
author: Edward J. Xu
date: April 22th, 2019
---

```{r, include=FALSE}
# Clear variables
rm(list=ls())
library(knitr)
```

```{r}
library(lubridate)
library(gplots)  # heatmap.2
library(forecast)
```

```{r, warning=FALSE}
frameData <- function(){
    dat <- read.csv("TrainData1.csv", header = T)
    vec_tStamp_raw <- ymd_hm(dat[2: 26305, 1], "%Y%m%d %H:%M", tz="UTC")
    vec_tStamp <- vec_tStamp_raw[1: 26304]
    seq(1, 26304)[is.na(vec_tStamp)]
    dat.f <- data.frame("series" = seq(1, 26304), "year" = year(vec_tStamp), "month" = month(vec_tStamp), 
                        "day" = day(vec_tStamp), "hour" = hour(vec_tStamp), "power" = dat[2: 26305, 2], 
                        "u10" = dat[2: 26305, 3], "v10" = dat[2: 26305, 4], "u100" = dat[2: 26305, 5], 
                        "v100" = dat[2: 26305, 6])
    return(dat.f)
}
dat.f <- frameData()
rm(frameData)
```

```{r, fig.height = 6, fig.width = 11}
plotBoxPlot <- function(dat, index = T, str_fact, str_y, str_con = "all"){
    par(bty="n")
    plot(factor(dat[str_fact][index,1]), dat.f[str_y][index,1], xlab = paste(str_fact), ylab = "Power Output",
        main = paste("Box Plot of Power Output By (", str_fact, ") when (", str_con, ")", sep = ""))
    # Add an average line to the box plot
    # y.ave <- ave(dat.f[str_y][index,1])
    # lines(c(1, 12), c(y.ave, y.ave), lty = 2, col = "red", lwd = 3)
    # legend("bottomleft", inset = .02, legend = "Average", col = "red", lty = 2, lwd = 3)    
}
plotBoxPlot(dat=dat.f, index=dat.f$year == 2016, str_fact="month", str_y="power", str_con="year 2016")
```

```{r, fig.height = 6, fig.width = 11}
plotBoxPlot(dat=dat.f, index=dat.f$year == 2017, str_fact="month", str_y="power", str_con="year 2017")
```

```{r, fig.height = 6, fig.width = 11}
plotBoxPlot(dat=dat.f, str_fact="hour", str_y="power")
```

```{r, fig.height = 6, fig.width = 6}
num_day <- length(dat.f[(dat.f$hour == 2), ]$power)
mat_power.h <- matrix(nrow = num_day, ncol = 24)
for (i in 1: 24){
    # print(length(dat.f[(dat.f$hour == i-1), ]$power))
    mat_power.h[ , i] <- dat.f[(dat.f$hour == i-1), ]$power
}; rm(i)
# Using function heatmap.2 in library gplots to visualize the correlation matrix
heatmap.2(cor(mat_power.h[ , 1: 24], use = "complete.obs"), dendrogram = "none", trace="none", Rowv = F, Colv = F)
```

# Assumptions

- the wind turbine will always towards the direction with max wind speed
- directly related to the 3rd power of wind speed
- optimal representative wind speed is linear combination of max wind speed in different height

## Regressional Model

```{r}
calNominalWindSpeed <- function(vec_windSpeed_1, vec_windSpeed_2, vec_windSpeed_3, vec_windSpeed_4, alpha = 0.5){
    vec_windSpeed.max_10 <- sqrt(vec_windSpeed_1^2 + vec_windSpeed_2^2)
    vec_windSpeed.max_100 <- sqrt(vec_windSpeed_3^2 + vec_windSpeed_4^2)
    vec_windSpeed.nominal <- alpha * vec_windSpeed.max_10 + (1 - alpha) * vec_windSpeed.max_100
    return(vec_windSpeed.nominal)
}
dat.f["speed"] <- calNominalWindSpeed(dat.f$u10, dat.f$v10, dat.f$u100, dat.f$v100, 0.6)
```

```{r,  fig.height = 6, fig.width = 11}
mod_regress <- glm(dat.f$power ~ I(dat.f$speed^2) + I(dat.f$speed^3))
plot(dat.f$speed, dat.f$power, xlab = 'Wind Speed', ylab = 'Normalized Power Production')
```

```{r,  fig.height = 6, fig.width = 11}
mod_regress <- glm(dat.f$power ~ dat.f$speed + I(dat.f$speed^2) + I(dat.f$speed^3))
plot(dat.f$speed, dat.f$power, xlab = 'Wind Speed', ylab = 'Normalized Power Production')
lines(seq(0, 25, by = 0.1), mod_regress$coefficients[1] + 
          mod_regress$coefficients[2] * seq(0, 25, by = 0.1) +
          mod_regress$coefficients[3] * seq(0, 25, by = 0.1)^2 + 
          mod_regress$coefficients[4] * seq(0, 25, by = 0.1)^3, col = "red")
```

```{r,  fig.height = 6, fig.width = 11}
mod_regress <- glm(dat.f$power ~ dat.f$speed +3 I(dat.f$speed^3))
plot(dat.f$speed, dat.f$power, xlab = 'Wind Speed', ylab = 'Normalized Power Production')
lines(seq(0, 25, by = 0.1), mod_regress$coefficients[1] + 
          mod_regress$coefficients[2] * seq(0, 25, by = 0.1) +
          mod_regress$coefficients[3] * seq(0, 25, by = 0.1)^3, col = "red")
```

```{r, echo = FALSE}
# Function to plot the prediction result of regression model based on wind speed.
predictRegression.windSpeed <- function(x1.pred, x2, y, modelRegression){
    dataFramePredict <- data.frame(x1 = x1.pred, x2 = rep(mean(x2), length(x1.pred)))
    valuePred <- predict.lm(modelRegression, dataFramePredict, se.fit = TRUE)
    intervalPred <- predict.lm(modelRegression, newdata = dataFramePredict, interval = "pred", level = 0.95)
    intervalConf <- predict.lm(modelRegression, newdata = dataFramePredict, interval = "conf", level = 0.95)
    plot(dataFramePredict$x1, valuePred$fit, pch = 20, ylim = c(min(y, intervalPred), max(y, intervalPred)), 
         xlim = c(min(x1, dataFramePredict$x1), max(x1, dataFramePredict$x1)), 
         xlab = 'Wind Speed', ylab = 'Normalized Power Production')
    matlines(dataFramePredict$x1, intervalPred, lty = c(1, 2, 2), col = c("black", "blue", "blue"), lwd = 2)
    points(x1, y)  # Transformed observations
    lines(dataFramePredict$x1, intervalConf[, 2], lty = 3, col = "red", lwd = 2)
    lines(dataFramePredict$x1, intervalConf[, 3], lty = 3, col = "red", lwd = 2)
    legend("bottomright", inset = .02, legend = c("Regression Curve", "Prediction Interval", "Confidence Intercal"), 
           col = c("black", "blue", "red"), lty = c(1, 2, 3), lwd = c(2, 2, 2), cex = 0.8)
}
```

# ARIMA Model

```{r}
# Plot the time series / residuals and its ACF, PACF, and Ljung-Box plot
plotTimeSeriesResidual <- function(dat, num_lag = 20, str_name = "Model", ...){
    # If the input dat is arima model, just take its residuals are the dat.
    if(class(dat) == "Arima")
        dat <- dat$residuals
    par(mfrow = c(4, 1), mgp = c(2, 0.7, 0), mar = c(1, 1, 1, 1), oma = c(2, 2, 4, 2), 
        cex.lab = 0.8, cex.axis = 0.8)
    plot(dat, type = "l", bty = "n")
    title(main = paste("TS, ACF, PACF and p Values for Ljung-Box Statistic of", str_name), 
          cex.main = 1.5, line = 1, outer = TRUE)
    # ACF and PACF
    acf(dat, lag.max = num_lag, xaxt = "n", bty = "n", na.action = na.pass)
    axis(1, at = seq(0, num_lag, by = 1), las = 0, outer = FALSE)
    pacf(dat, lag.max = num_lag, xaxt = "n", bty = "n", na.action = na.pass)
    axis(1, at = seq(1, num_lag, by = 1), las = 0, outer = FALSE)
    # Ljung-Box Plot
    value_p <- sapply(1: num_lag, function(i) Box.test(dat, i, type = "Ljung-Box")$p.value)
    plot(1L: num_lag, value_p, xlab = "lag", ylab = "p value", ylim = c(0, 1), bty = "n", xaxt = "n")
    axis(1, at = seq(1, num_lag, by = 1), las = 0, outer = FALSE)
    abline(h = 0.05, lty = 2, col = "blue")
}
```

```{r, fig.height = 11, fig.width = 11}
# ts_power <- ts(dat.f$power, frequency = 24)
plotTimeSeriesResidual(dat.f$power, num_lag = 48)
```

```{r, fig.height = 11, fig.width = 11}
mod_arima_1 <- arima(dat.f$power, order = c(0, 1, 0), include.mean = F)
plotTimeSeriesResidual(mod_arima_1$residuals, num_lag = 48)
```

```{r, fig.height = 11, fig.width = 11}
mod_arima_2 <- arima(mod_arima_1$residuals, order = c(0, 0, 1), seasonal = list(order = c(0, 0, 1), period = 48), 
                     include.mean = F)
plotTimeSeriesResidual(mod_arima_2$residuals, num_lag = 48)
# acf(mod_arima_2$residuals, lag.max = 48, na.action = na.pass)
```

```{r, fig.height = 11, fig.width = 11}
# ts_power <- ts(dat.f$power, frequency = 24)
plotTimeSeriesResidual(dat.f$speed, num_lag = 48)
```

```{r, fig.height = 11, fig.width = 11}
mod_arima_3 <- arima(dat.f$speed, order = c(0, 1, 0), include.mean = F)
plotTimeSeriesResidual(mod_arima_3$residuals, num_lag = 48)
```

```{r, fig.height = 11, fig.width = 11}
mod_arima_4 <- arima(mod_arima_3$residuals, order = c(3, 0, 0), include.mean = F)
plotTimeSeriesResidual(mod_arima_4$residuals, num_lag = 48)
```

```{r, fig.height = 11, fig.width = 11}
ts_speed <- ts(dat.f$speed, frequency = 24)
mod_arima_5 <- auto.arima(ts_speed)
plotTimeSeriesResidual(mod_arima_5$residuals, num_lag = 48)
```

```{r, fig.height = 6, fig.width = 11}
ccf(dat.f$speed, dat.f$power, na.action = na.pass)
```

```{r, fig.height = 6, fig.width = 11}
ts_power <- ts(dat.f$power, frequency = 24)
y <- residuals(Arima(ts_power, model = mod_arima_5))
ccf(mod_arima_5$residuals, y, na.action = na.pass)
```

```{r, fig.height = 11, fig.width = 11}
ts_speed <- ts(dat.f$speed[dat.f$month == 1], frequency = 24)
mod_arima_6 <- arima(ts_speed, order = c(0, 0, 0), seasonal = list(order = c(0, 0, 1), period = 48))
plotTimeSeriesResidual(mod_arima_6$residuals, num_lag = 48)
mod_arima_7 <- auto.arima(mod_arima_6$residuals)
plotTimeSeriesResidual(mod_arima_7$residuals, num_lag = 48)
```

```{r, fig.height = 6, fig.width = 11}
ts_power <- ts(dat.f$power[dat.f$month == 1], frequency = 24)
y <- residuals(Arima(ts_power, model = mod_arima_7))
ccf(mod_arima_7$residuals, y, na.action = na.pass)
```

```{r, fig.height = 11, fig.width = 11}
ts_speed.3 <- ts(dat.f$speed[dat.f$month == 1]^3, frequency = 24)
mod_arima_9 <- auto.arima(ts_speed.3)
plotTimeSeriesResidual(mod_arima_9$residuals, num_lag = 48)
```

```{r, fig.height = 6, fig.width = 11}
ts_power <- ts(dat.f$power[dat.f$month == 1], frequency = 24)
y <- residuals(Arima(ts_power, model = mod_arima_9))
ccf(mod_arima_9$residuals, y, na.action = na.pass)
```

```{r, fig.height = 6, fig.width = 11}
ccf(dat.f$u10, dat.f$v10)
ccf(dat.f$u10, dat.f$u100)
```

# Ridge Regression

```{r}
calRidgeRegress <- function(mat_x, vec_y, lambda){
    n <- length(mat_x[1,])
    mat_x[is.na(mat_x)] <- 0
    vec_y[is.na(vec_y)] <- 0
    vec_theta <- solve(t(mat_x) %*% mat_x + lambda * diag(n)) %*% t(mat_x) %*% vec_y
    return(vec_theta)
}
mat_x <- cbind(dat.f$u10[2: 26304], dat.f$v10[2: 26304], dat.f$u100[2: 26304], dat.f$v100[2: 26304],
               dat.f$u10[1: 26303], dat.f$v10[1: 26303], dat.f$u100[1: 26303], dat.f$v100[1: 26303])
vec_theta <- calRidgeRegress(mat_x, dat.f$power[2: 26304], 0.1)
```

```{r, warning=FALSE}
frameData <- function(){
    dat <- read.csv("WeatherForecastInput1.csv", header = T)
    vec_tStamp_raw <- ymd_hm(dat[1: 24, 1], "%Y%m%d %H:%M", tz="UTC")
    vec_tStamp <- vec_tStamp_raw[1: 24]
    dat.f <- data.frame("series" = seq(1, 24), "year" = year(vec_tStamp), "month" = month(vec_tStamp), 
                        "day" = day(vec_tStamp), "hour" = hour(vec_tStamp), "u10" = dat[, 2], "v10" = dat[, 3], 
                        "u100" = dat[, 4], "v100" = dat[, 5])
    return(dat.f)
}
dat.f_pred <- frameData()
rm(frameData)
```

```{r}
predPowerOutput <- function(mat_x, vec_theta_hat){
    vec_y_hat <- mat_x %*% vec_theta_hat
    return(vec_y_hat)
}
mat_x_pred <- cbind(dat.f_pred$u10, dat.f_pred$v10, dat.f_pred$u100, dat.f_pred$v100, 
    c(dat.f[26304,]$u10, dat.f_pred$u10[1: 23]), c(dat.f[26304,]$v10, dat.f_pred$v10[1: 23]), 
    c(dat.f[26304,]$u100, dat.f_pred$u100[1: 23]), c(dat.f[26304,]$v100, dat.f_pred$v100[1: 23]))
vec_y_hat <- predPowerOutput(mat_x_pred, vec_theta)
```

```{r}
vec_x <- 0.5 * sqrt(dat.f_pred$u10^2 +  dat.f_pred$v10^2) + 0.5 * sqrt(dat.f_pred$u100^2 + dat.f_pred$v100^2)
plot(vec_x, vec_y_hat, xlim = c(0, 20), ylim = c(0, 1))
```

