##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Attaching package: 'xgboost'
## The following object is masked from 'package:dplyr':
##
## slice
data <- forecastML::fill_gaps(data_buoy_gaps, date_col = 1, frequency = '1 day',
groups = 'buoy_id', static_features = c('lat', 'lon'))
print(list(paste0("The original dataset with gaps in data collection is ", nrow(data_buoy_gaps), " rows."),
paste0("The modified dataset with no gaps in data collection from fill_gaps() is ", nrow(data), " rows.")))
## [[1]]
## [1] "The original dataset with gaps in data collection is 23646 rows."
##
## [[2]]
## [1] "The modified dataset with no gaps in data collection from fill_gaps() is 31225 rows."
## Warning: Removed 848 rows containing missing values (geom_path).

outcome_col <- 1 # The column position of our 'wind_spd' outcome.
horizons <- c(1, 7, 30) # Forecast 1, 1:7, and 1:30 days into the future.
lookback <- c(1:30, 360:370) # Features from 1 to 30 days in the past and annually.
dates <- data$date # Grouped time series forecasting requires dates.
data$date <- NULL # Dates, however, don't need to be in the input data.
frequency <- "1 day" # A string that works in base::seq(..., by = "frequency").
dynamic_features <- c("day", "year") # Features that change through time but which we will not lag.
groups <- "buoy_id" # 1 forecast for each group or buoy.
static_features <- c("lat", "lon") # Features that do not change through time.
type <- "train" # Create a model-training dataset.
data_train <- forecastML::create_lagged_df(data, type = type, outcome_col = outcome_col,
horizons = horizons, lookback = lookback,
dates = dates, frequency = frequency,
dynamic_features = dynamic_features,
groups = groups, static_features = static_features,
use_future = FALSE)
print(paste0("The class of `data_train` is ", class(data_train)))
## [1] "The class of `data_train` is grouped_lagged_df"
## [2] "The class of `data_train` is lagged_df"
## [3] "The class of `data_train` is list"

## Warning: Removed 4977 rows containing missing values (geom_path).
## Warning: Removed 11872 rows containing missing values (geom_point).

## Warning in min(data_plot[, 1], na.rm = TRUE): no non-missing arguments to
## min; returning Inf
## Warning in max(data_plot[, 1], na.rm = TRUE): no non-missing arguments to
## max; returning -Inf
## Warning in min(data_plot[, 1], na.rm = TRUE): no non-missing arguments to
## min; returning Inf
## Warning: Factor `ggplot_color_group` contains implicit NA, consider using
## `forcats::fct_explicit_na`
## Warning: Removed 3 rows containing missing values (geom_label).
