\[Y=β_0+β_1X+ε\]
library(TSstudio)
## Warning: package 'TSstudio' was built under R version 4.1.1
data("USgas")
ts_plot(USgas,
title = "US Monthly Natural Gas consumption",
Ytitle = "Billion Cubic Feet",
Xtitle = "Year")
ts_info(USgas)
## The USgas series is a ts object with 1 variable and 238 observations
## Frequency: 12
## Start time: 2000 1
## End time: 2019 10
ts_decompose(USgas)
USgas_df <- ts_to_prophet(USgas)
head(USgas_df)
## ds y
## 1 2000-01-01 2510.5
## 2 2000-02-01 2330.7
## 3 2000-03-01 2050.6
## 4 2000-04-01 1783.3
## 5 2000-05-01 1632.9
## 6 2000-06-01 1513.1
USgas_df$trend <- 1:nrow(USgas_df)
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.1.1
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
USgas_df$seasonal <- factor(month(USgas_df$ds, label = T), ordered = FALSE)
head(USgas_df)
## ds y trend seasonal
## 1 2000-01-01 2510.5 1 ene
## 2 2000-02-01 2330.7 2 feb
## 3 2000-03-01 2050.6 3 mar
## 4 2000-04-01 1783.3 4 abr
## 5 2000-05-01 1632.9 5 may
## 6 2000-06-01 1513.1 6 jun
h <- 12 # setting a testing partition length
train <- USgas_df[1:(nrow(USgas_df) - h), ]
test <- USgas_df[(nrow(USgas_df) - h + 1):nrow(USgas_df), ]
md_trend <- lm(y ~ trend, data = train)
summary(md_trend)
##
## Call:
## lm(formula = y ~ trend, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -547.2 -307.4 -153.2 333.1 1052.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1751.0074 52.6435 33.26 < 2e-16 ***
## trend 2.4489 0.4021 6.09 4.86e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 394.4 on 224 degrees of freedom
## Multiple R-squared: 0.1421, Adjusted R-squared: 0.1382
## F-statistic: 37.09 on 1 and 224 DF, p-value: 4.861e-09
train$yhat <- predict(md_trend, newdata = train)
test$yhat <- predict(md_trend, newdata = test)
library(plotly)
## Warning: package 'plotly' was built under R version 4.1.1
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
plot_lm <- function(data, train, test, title = NULL){
p <- plot_ly(data = data,
x = ~ ds,
y = ~ y,
type = "scatter",
mode = "line",
name = "Actual") %>%
add_lines(x = ~ train$ds,
y = ~ train$yhat,
line = list(color = "red"),
name = "Fitted") %>%
add_lines(x = ~ test$ds,
y = ~ test$yhat,
line = list(color = "green", dash = "dot", width = 3),
name = "Forecasted") %>%
layout(title = title,
xaxis = list(title = ""),
yaxis = list(title = "Billion Cubic Feet"),
legend = list(x = 0.05, y = 0.95))
return(p)
}
plot_lm(data = USgas_df,
train = train,
test = test,
title = "Predicting the Trend Component of the Series")
mape_trend <- c(mean(abs(train$y - train$yhat) / train$y),
mean(abs(test$y - test$yhat) / test$y))
mape_trend
## [1] 0.1644088 0.1299951
md_seasonal <- lm(y ~ seasonal, data = train)
summary(md_seasonal)
##
## Call:
## lm(formula = y ~ seasonal, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -608.98 -162.34 -50.77 148.40 566.89
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2774.28 49.75 55.759 < 2e-16 ***
## seasonalfeb -297.92 70.36 -4.234 3.41e-05 ***
## seasonalmar -479.10 70.36 -6.809 9.77e-11 ***
## seasonalabr -905.28 70.36 -12.866 < 2e-16 ***
## seasonalmay -1088.42 70.36 -15.468 < 2e-16 ***
## seasonaljun -1105.49 70.36 -15.711 < 2e-16 ***
## seasonaljul -939.35 70.36 -13.350 < 2e-16 ***
## seasonalago -914.12 70.36 -12.991 < 2e-16 ***
## seasonalsept -1114.74 70.36 -15.843 < 2e-16 ***
## seasonaloct -1022.21 70.36 -14.527 < 2e-16 ***
## seasonalnov -797.53 71.33 -11.180 < 2e-16 ***
## seasonaldic -256.67 71.33 -3.598 0.000398 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 216.9 on 214 degrees of freedom
## Multiple R-squared: 0.7521, Adjusted R-squared: 0.7394
## F-statistic: 59.04 on 11 and 214 DF, p-value: < 2.2e-16
train$yhat <- predict(md_seasonal, newdata = train)
test$yhat <- predict(md_seasonal, newdata = test)
plot_lm(data = USgas_df,
train = train,
test = test,
title = "Predicting the Seasonal Component of the Series")
mape_seasonal <- c(mean(abs(train$y - train$yhat) / train$y),
mean(abs(test$y - test$yhat) / test$y))
mape_seasonal
## [1] 0.08628973 0.21502100
md1 <- lm(y ~ seasonal + trend, data = train)
summary(md1)
##
## Call:
## lm(formula = y ~ seasonal + trend, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -514.73 -77.17 -17.70 85.80 336.95
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2488.8994 32.6011 76.344 < 2e-16 ***
## seasonalfeb -300.5392 41.4864 -7.244 7.84e-12 ***
## seasonalmar -484.3363 41.4870 -11.674 < 2e-16 ***
## seasonalabr -913.1334 41.4880 -22.010 < 2e-16 ***
## seasonalmay -1098.8884 41.4895 -26.486 < 2e-16 ***
## seasonaljun -1118.5855 41.4913 -26.960 < 2e-16 ***
## seasonaljul -955.0563 41.4936 -23.017 < 2e-16 ***
## seasonalago -932.4482 41.4962 -22.471 < 2e-16 ***
## seasonalsept -1135.6874 41.4993 -27.366 < 2e-16 ***
## seasonaloct -1045.7687 41.5028 -25.198 < 2e-16 ***
## seasonalnov -808.0016 42.0617 -19.210 < 2e-16 ***
## seasonaldic -269.7642 42.0635 -6.413 9.05e-10 ***
## trend 2.6182 0.1305 20.065 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 127.9 on 213 degrees of freedom
## Multiple R-squared: 0.9142, Adjusted R-squared: 0.9094
## F-statistic: 189.2 on 12 and 213 DF, p-value: < 2.2e-16
train$yhat <- predict(md1, newdata = train)
test$yhat <- predict(md1, newdata = test)
plot_lm(data = USgas_df,
train = train,
test = test,
title = "Predicting the Seasonal Component of the Series")
mape_md1 <- c(mean(abs(train$y - train$yhat) / train$y),
mean(abs(test$y - test$yhat) / test$y))
mape_md1
## [1] 0.04774945 0.09143290
md2 <- lm(y ~ seasonal + trend + I(trend^2), data = train)
summary(md2)
##
## Call:
## lm(formula = y ~ seasonal + trend + I(trend^2), data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -468.47 -54.66 -2.21 63.11 294.32
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.635e+03 3.224e+01 81.738 < 2e-16 ***
## seasonalfeb -3.004e+02 3.540e+01 -8.487 3.69e-15 ***
## seasonalmar -4.841e+02 3.540e+01 -13.676 < 2e-16 ***
## seasonalabr -9.128e+02 3.540e+01 -25.787 < 2e-16 ***
## seasonalmay -1.099e+03 3.540e+01 -31.033 < 2e-16 ***
## seasonaljun -1.118e+03 3.540e+01 -31.588 < 2e-16 ***
## seasonaljul -9.547e+02 3.540e+01 -26.968 < 2e-16 ***
## seasonalago -9.322e+02 3.541e+01 -26.329 < 2e-16 ***
## seasonalsept -1.136e+03 3.541e+01 -32.070 < 2e-16 ***
## seasonaloct -1.046e+03 3.541e+01 -29.532 < 2e-16 ***
## seasonalnov -8.001e+02 3.590e+01 -22.286 < 2e-16 ***
## seasonaldic -2.618e+02 3.590e+01 -7.293 5.95e-12 ***
## trend -1.270e+00 4.472e-01 -2.840 0.00494 **
## I(trend^2) 1.713e-02 1.908e-03 8.977 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 109.1 on 212 degrees of freedom
## Multiple R-squared: 0.9379, Adjusted R-squared: 0.9341
## F-statistic: 246.1 on 13 and 212 DF, p-value: < 2.2e-16
train$yhat <- predict(md2, newdata = train)
test$yhat <- predict(md2, newdata = test)
plot_lm(data = USgas_df,
train = train,
test = test,
title = "Predicting the Trend (Polynomial) and Seasonal Components
of the Series")
mape_md2 <- c(mean(abs(train$y - train$yhat) / train$y),
mean(abs(test$y - test$yhat) / test$y))
mape_md2
## [1] 0.03688770 0.04212618
USgas_split <- ts_split(USgas, sample.out = h)
train.ts <- USgas_split$train
test.ts <- USgas_split$test
library(forecast)
## Warning: package 'forecast' was built under R version 4.1.1
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
md3 <- tslm(train.ts ~ season + trend + I(trend^2))
summary(md3)
##
## Call:
## tslm(formula = train.ts ~ season + trend + I(trend^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -468.47 -54.66 -2.21 63.11 294.32
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.635e+03 3.224e+01 81.738 < 2e-16 ***
## season2 -3.004e+02 3.540e+01 -8.487 3.69e-15 ***
## season3 -4.841e+02 3.540e+01 -13.676 < 2e-16 ***
## season4 -9.128e+02 3.540e+01 -25.787 < 2e-16 ***
## season5 -1.099e+03 3.540e+01 -31.033 < 2e-16 ***
## season6 -1.118e+03 3.540e+01 -31.588 < 2e-16 ***
## season7 -9.547e+02 3.540e+01 -26.968 < 2e-16 ***
## season8 -9.322e+02 3.541e+01 -26.329 < 2e-16 ***
## season9 -1.136e+03 3.541e+01 -32.070 < 2e-16 ***
## season10 -1.046e+03 3.541e+01 -29.532 < 2e-16 ***
## season11 -8.001e+02 3.590e+01 -22.286 < 2e-16 ***
## season12 -2.618e+02 3.590e+01 -7.293 5.95e-12 ***
## trend -1.270e+00 4.472e-01 -2.840 0.00494 **
## I(trend^2) 1.713e-02 1.908e-03 8.977 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 109.1 on 212 degrees of freedom
## Multiple R-squared: 0.9379, Adjusted R-squared: 0.9341
## F-statistic: 246.1 on 13 and 212 DF, p-value: < 2.2e-16
r <- which(USgas_df$ds == as.Date("2014-01-01"))
USgas_df$s_break <- ifelse(year(USgas_df$ds) >= 2010, 1, 0)
USgas_df$s_break[r] <- 1
md3 <- tslm(USgas ~ season + trend + I(trend^2) + s_break, data = USgas_df)
summary(md3)
##
## Call:
## tslm(formula = USgas ~ season + trend + I(trend^2) + s_break,
## data = USgas_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -469.25 -50.68 -2.66 63.63 275.89
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.661e+03 3.200e+01 83.164 < 2e-16 ***
## season2 -3.054e+02 3.448e+01 -8.858 2.61e-16 ***
## season3 -4.849e+02 3.448e+01 -14.062 < 2e-16 ***
## season4 -9.272e+02 3.449e+01 -26.885 < 2e-16 ***
## season5 -1.108e+03 3.449e+01 -32.114 < 2e-16 ***
## season6 -1.127e+03 3.450e+01 -32.660 < 2e-16 ***
## season7 -9.568e+02 3.450e+01 -27.730 < 2e-16 ***
## season8 -9.340e+02 3.451e+01 -27.061 < 2e-16 ***
## season9 -1.138e+03 3.452e+01 -32.972 < 2e-16 ***
## season10 -1.040e+03 3.453e+01 -30.122 < 2e-16 ***
## season11 -7.896e+02 3.497e+01 -22.577 < 2e-16 ***
## season12 -2.649e+02 3.498e+01 -7.571 9.72e-13 ***
## trend -1.928e+00 4.479e-01 -4.304 2.51e-05 ***
## I(trend^2) 1.862e-02 1.676e-03 11.113 < 2e-16 ***
## s_break 6.060e+01 2.836e+01 2.137 0.0337 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 109 on 223 degrees of freedom
## Multiple R-squared: 0.9423, Adjusted R-squared: 0.9387
## F-statistic: 260.3 on 14 and 223 DF, p-value: < 2.2e-16
library(UKgrid)
## Warning: package 'UKgrid' was built under R version 4.1.1
UKdaily <- extract_grid(type = "data.frame",
columns = "ND",
aggregate = "daily")
head(UKdaily)
## TIMESTAMP ND
## 1 2005-04-01 1920069
## 2 2005-04-02 1674699
## 3 2005-04-03 1631352
## 4 2005-04-04 1916693
## 5 2005-04-05 1952082
## 6 2005-04-06 1964584
ts_plot(UKdaily,
title = "The UK National Demand for Electricity",
Ytitle = "MW",
Xtitle = "Year")
ts_heatmap(UKdaily[which(year(UKdaily$TIMESTAMP) >= 2016),],
title = "UK the Daily National Grid Demand Heatmap")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
UKdaily <- UKdaily %>%
mutate(wday = wday(TIMESTAMP, label = TRUE),
month = month(TIMESTAMP, label = TRUE),
lag365 = dplyr::lag(ND, 365)) %>%
filter(!is.na(lag365)) %>%
arrange(TIMESTAMP)
str(UKdaily)
## 'data.frame': 4939 obs. of 5 variables:
## $ TIMESTAMP: Date, format: "2006-04-01" "2006-04-02" ...
## $ ND : int 1718405 1691341 1960325 2023886 2026204 2008422 1981175 1770440 1749715 2012865 ...
## $ wday : Ord.factor w/ 7 levels "dom\\."<"lun\\."<..: 7 1 2 3 4 5 6 7 1 2 ...
## $ month : Ord.factor w/ 12 levels "ene"<"feb"<"mar"<..: 4 4 4 4 4 4 4 4 4 4 ...
## $ lag365 : int 1920069 1674699 1631352 1916693 1952082 1964584 1990895 2003982 1811436 1684720 ...
start_date <- min(UKdaily$TIMESTAMP)
start <- c(year(start_date), yday(start_date))
UK_ts <- ts(UKdaily$ND,
start = start,
frequency = 365)
library(TSstudio)
data(UK_ts)
## Warning in data(UK_ts): data set 'UK_ts' not found
acf(UK_ts, lag.max = 365 * 4)
h <- 365
UKpartitions <- ts_split(UK_ts, sample.out = h)
train_ts <- UKpartitions$train
test_ts <- UKpartitions$test
train_df <- UKdaily[1:(nrow(UKdaily) - h), ]
test_df <- UKdaily[(nrow(UKdaily) - h + 1):nrow(UKdaily), ]
md_tslm1 <- tslm(train_ts ~ season + trend)
fc_tslm1 <- forecast(md_tslm1, h = h)
test_forecast(actual = UK_ts,
forecast.obj = fc_tslm1,
test = test_ts)
accuracy(fc_tslm1, test_ts)
## ME RMSE MAE MPE MAPE MASE
## Training set -9.030002e-12 121551.4 100439.64 -0.5721337 6.294111 0.8418677
## Test set -1.740215e+04 123156.6 96785.27 -1.8735336 7.160573 0.8112374
## ACF1 Theil's U
## Training set 0.5277884 NA
## Test set 0.5106681 1.071899
md_tslm2 <- tslm(train_ts ~ season + trend + wday + month,
data = train_df)
accuracy(fc_tslm2, test_ts)
md_tslm3 <- tslm(train_ts ~ season + trend + wday + month + lag365,
data = train_df)
fc_tslm3 <- forecast(md_tslm3, h = h, newdata = test_df)
test_forecast(actual = UK_ts,
forecast.obj = fc_tslm3,
test = test_ts)
accuracy(fc_tslm3, test_ts)
## ME RMSE MAE MPE MAPE MASE
## Training set -1.175109e-11 69904.92 52006.75 -0.1717563 3.163385 0.4359116
## Test set -1.754061e+04 81783.55 65957.66 -1.3613252 4.722083 0.5528457
## ACF1 Theil's U
## Training set 0.7500146 NA
## Test set 0.6094666 0.6925767
summary(md_tslm3)$coefficients %>% tail(1)
## Estimate Std. Error t value Pr(>|t|)
## lag365 -0.06038328 0.01545495 -3.907051 9.490321e-05
anova(md_tslm3)
## Analysis of Variance Table
##
## Response: train_ts
## Df Sum Sq Mean Sq F value Pr(>F)
## season 364 1.4279e+14 3.9227e+11 73.5348 < 2.2e-16 ***
## trend 1 7.2634e+13 7.2634e+13 13615.7078 < 2.2e-16 ***
## wday 6 4.5009e+13 7.5016e+12 1406.2214 < 2.2e-16 ***
## month 11 1.3721e+11 1.2473e+10 2.3382 0.007266 **
## lag365 1 8.1432e+10 8.1432e+10 15.2650 9.49e-05 ***
## Residuals 4190 2.2352e+13 5.3345e+09
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final_md <- tslm(UK_ts ~ season + trend + wday + month + lag365,
data = UKdaily)
checkresiduals(final_md)
##
## Breusch-Godfrey test for serial correlation of order up to 730
##
## data: Residuals from Linear regression model
## LM test = 3301.1, df = 730, p-value < 2.2e-16
UK_fc_df <- data.frame(date = seq.Date(from = max(UKdaily$TIMESTAMP) +
days(1),
by = "day",
length.out = h))
UK_fc_df$wday <- factor(wday(UK_fc_df$date, label = TRUE), ordered = FALSE)
UK_fc_df$month <- factor(month(UK_fc_df$date, label = TRUE), ordered =
FALSE)
UK_fc_df$lag365 <- tail(UKdaily$ND, h)
UKgrid_fc <- forecast(final_md, h = h, newdata = UK_fc_df)
plot_forecast(UKgrid_fc,
title = "The UK National Demand for Electricity Forecast",
Ytitle = "MW",
Xtitle = "Year")
df<- read.csv("C:\\Users\\USUARIO\\Downloads\\database.csv")
library(EDAWR)
##
## Attaching package: 'EDAWR'
## The following object is masked from 'package:dplyr':
##
## storms
data <- dplyr::select(df, Date, Magnitude)
c <- as.Date(data$Date,format="%d/%m/%Y")
library(zoo)
## Warning: package 'zoo' was built under R version 4.1.1
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
dt <- ts(data$Magnitude, start = 1965, end = 2016, freq = 12)
library(TSstudio)
data(dt)
## Warning in data(dt): data set 'dt' not found
ts_plot(dt,
title = "Earthquakes",
Ytitle = "Magnitude",
Xtitle = "Date")
ts_info(dt)
## The dt series is a ts object with 1 variable and 613 observations
## Frequency: 12
## Start time: 1965 1
## End time: 2016 1
ts_decompose(dt)
dt_df <- ts_to_prophet(dt)
head(dt_df)
## ds y
## 1 1965-01-01 6.0
## 2 1965-02-01 5.8
## 3 1965-03-01 6.2
## 4 1965-04-01 5.8
## 5 1965-05-01 5.8
## 6 1965-06-01 6.7
dt_df$trend <- 1:nrow(dt_df)
library(lubridate)
dt_df$seasonal <- factor(months.Date(dt_df$ds), ordered = FALSE)
head(dt_df)
## ds y trend seasonal
## 1 1965-01-01 6.0 1 enero
## 2 1965-02-01 5.8 2 febrero
## 3 1965-03-01 6.2 3 marzo
## 4 1965-04-01 5.8 4 abril
## 5 1965-05-01 5.8 5 mayo
## 6 1965-06-01 6.7 6 junio
h <- 12
train <- dt_df[1:(nrow(dt_df) - h), ]
test <- dt_df[(nrow(dt_df) - h + 1):nrow(dt_df),]
md_trend <- lm(y ~ trend, data = train)
summary(md_trend)
##
## Call:
## lm(formula = y ~ trend, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5517 -0.3130 -0.1193 0.1691 2.7055
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.993e+00 3.712e-02 161.430 <2e-16 ***
## trend 9.875e-05 1.068e-04 0.924 0.356
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4545 on 599 degrees of freedom
## Multiple R-squared: 0.001424, Adjusted R-squared: -0.0002432
## F-statistic: 0.8541 on 1 and 599 DF, p-value: 0.3558
train$yhat <- predict(md_trend, newdata = train)
test$yhat <- predict(md_trend, newdata = test)
library(plotly)
plot_lm <- function(data, train, test, title = NULL){
p <- plot_ly(data = data,
x = ~ ds,
y = ~ y,
type = "scatter",
mode = "line",
name = "Actual") %>%
add_lines(x = ~ train$ds,
y = ~ train$yhat,
line = list(color = "red"),
name = "Fitted") %>%
add_lines(x = ~ test$ds,
y = ~ test$yhat,
line = list(color = "green", dash = "dot", width = 3),
name = "Forecasted") %>%
layout(title = title,
xaxis = list(title = "Date"),
yaxis = list(title = "Magnitude"),
legend = list(x = 0.05, y = 0.95))
return(p)
}
plot_lm(data = dt_df,
train = train,
test = test,
title = "Earthquakes")
mape_trend <- c(mean(abs(train$y - train$yhat) / train$y),
mean(abs(test$y - test$yhat) / test$y))
mape_trend
## [1] 0.05242448 0.06844580
md_seasonal <- lm(y ~ seasonal, data = train)
summary(md_seasonal)
##
## Call:
## lm(formula = y ~ seasonal, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.590 -0.316 -0.123 0.160 2.610
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.07000 0.06458 93.988 <2e-16 ***
## seasonalagosto -0.05600 0.09133 -0.613 0.540
## seasonaldiciembre -0.05400 0.09133 -0.591 0.555
## seasonalenero -0.09745 0.09088 -1.072 0.284
## seasonalfebrero -0.09160 0.09133 -1.003 0.316
## seasonaljulio -0.12800 0.09133 -1.401 0.162
## seasonaljunio 0.00800 0.09133 0.088 0.930
## seasonalmarzo -0.05200 0.09133 -0.569 0.569
## seasonalmayo 0.02000 0.09133 0.219 0.827
## seasonalnoviembre -0.03000 0.09133 -0.328 0.743
## seasonaloctubre -0.04700 0.09133 -0.515 0.607
## seasonalseptiembre -0.04000 0.09133 -0.438 0.662
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4567 on 589 degrees of freedom
## Multiple R-squared: 0.00862, Adjusted R-squared: -0.009895
## F-statistic: 0.4656 on 11 and 589 DF, p-value: 0.9243
train$yhat <- predict(md_seasonal, newdata = train)
test$yhat <- predict(md_seasonal, newdata = test)
plot_lm(data = dt_df,
train = train,
test = test,
title = "Earthquakes")
mape_seasonal <- c(mean(abs(train$y - train$yhat) / train$y),
mean(abs(test$y - test$yhat) / test$y))
mape_seasonal
## [1] 0.05245147 0.06608669
md1 <- lm(y ~ seasonal + trend, data = train)
summary(md1)
##
## Call:
## lm(formula = y ~ seasonal + trend, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6059 -0.3071 -0.1187 0.1611 2.6379
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.041e+00 7.209e-02 83.797 <2e-16 ***
## seasonalagosto -5.640e-02 9.135e-02 -0.617 0.537
## seasonaldiciembre -5.479e-02 9.135e-02 -0.600 0.549
## seasonalenero -9.775e-02 9.090e-02 -1.075 0.283
## seasonalfebrero -9.140e-02 9.135e-02 -1.001 0.317
## seasonaljulio -1.283e-01 9.135e-02 -1.405 0.161
## seasonaljunio 7.802e-03 9.135e-02 0.085 0.932
## seasonalmarzo -5.190e-02 9.135e-02 -0.568 0.570
## seasonalmayo 1.990e-02 9.135e-02 0.218 0.828
## seasonalnoviembre -3.069e-02 9.135e-02 -0.336 0.737
## seasonaloctubre -4.759e-02 9.135e-02 -0.521 0.603
## seasonalseptiembre -4.049e-02 9.135e-02 -0.443 0.658
## trend 9.892e-05 1.074e-04 0.921 0.357
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4567 on 588 degrees of freedom
## Multiple R-squared: 0.01005, Adjusted R-squared: -0.01015
## F-statistic: 0.4974 on 12 and 588 DF, p-value: 0.9167
train$yhat <- predict(md1, newdata = train)
test$yhat <- predict(md1, newdata = test)
plot_lm(data = dt_df,
train = train,
test = test,
title = "Predicting the Trend and Seasonal Components of the
Series")
mape_md1 <- c(mean(abs(train$y - train$yhat) / train$y),
mean(abs(test$y - test$yhat) / test$y))
mape_md1
## [1] 0.05228441 0.06808885
md2 <- lm(y ~ seasonal + trend + I(trend^2), data = train)
summary(md2)
##
## Call:
## lm(formula = y ~ seasonal + trend + I(trend^2), data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6106 -0.3074 -0.1171 0.1632 2.6330
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.046e+00 8.347e-02 72.439 <2e-16 ***
## seasonalagosto -5.639e-02 9.142e-02 -0.617 0.538
## seasonaldiciembre -5.479e-02 9.143e-02 -0.599 0.549
## seasonalenero -9.786e-02 9.098e-02 -1.076 0.283
## seasonalfebrero -9.140e-02 9.142e-02 -1.000 0.318
## seasonaljulio -1.283e-01 9.142e-02 -1.403 0.161
## seasonaljunio 7.803e-03 9.142e-02 0.085 0.932
## seasonalmarzo -5.190e-02 9.142e-02 -0.568 0.570
## seasonalmayo 1.990e-02 9.142e-02 0.218 0.828
## seasonalnoviembre -3.069e-02 9.142e-02 -0.336 0.737
## seasonaloctubre -4.759e-02 9.142e-02 -0.521 0.603
## seasonalseptiembre -4.049e-02 9.142e-02 -0.443 0.658
## trend 4.121e-05 4.306e-04 0.096 0.924
## I(trend^2) 9.586e-08 6.926e-07 0.138 0.890
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4571 on 587 degrees of freedom
## Multiple R-squared: 0.01008, Adjusted R-squared: -0.01184
## F-statistic: 0.4598 on 13 and 587 DF, p-value: 0.9461
train$yhat <- predict(md2, newdata = train)
test$yhat <- predict(md2, newdata = test)
plot_lm(data = dt_df,
train = train,
test = test,
title = "Predicting the Trend (Polynomial) and Seasonal Components
of the Series")
mape_md2 <- c(mean(abs(train$y - train$yhat) / train$y),
mean(abs(test$y - test$yhat) / test$y))
mape_md2
## [1] 0.05230284 0.06849585
dt_split <- ts_split(dt, sample.out = h)
train.ts <- dt_split$train
test.ts <- dt_split$test
library(forecast)
md3 <- tslm(train.ts ~ season + trend + I(trend^2))
summary(md3)
##
## Call:
## tslm(formula = train.ts ~ season + trend + I(trend^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6106 -0.3074 -0.1171 0.1632 2.6330
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.948e+00 8.268e-02 71.943 <2e-16 ***
## season2 6.459e-03 9.098e-02 0.071 0.943
## season3 4.596e-02 9.098e-02 0.505 0.614
## season4 9.786e-02 9.098e-02 1.076 0.283
## season5 1.178e-01 9.098e-02 1.294 0.196
## season6 1.057e-01 9.098e-02 1.161 0.246
## season7 -3.043e-02 9.098e-02 -0.335 0.738
## season8 4.147e-02 9.098e-02 0.456 0.649
## season9 5.737e-02 9.098e-02 0.631 0.529
## season10 5.027e-02 9.098e-02 0.553 0.581
## season11 6.717e-02 9.098e-02 0.738 0.461
## season12 4.307e-02 9.098e-02 0.473 0.636
## trend 4.121e-05 4.306e-04 0.096 0.924
## I(trend^2) 9.586e-08 6.926e-07 0.138 0.890
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4571 on 587 degrees of freedom
## Multiple R-squared: 0.01008, Adjusted R-squared: -0.01184
## F-statistic: 0.4598 on 13 and 587 DF, p-value: 0.9461
r <- which(dt_df$ds == as.Date("1965-02-01"))
dt_df$s_break <- ifelse(c(dt_df$ds) >= 2016, 1, 0)
dt_df$s_break[r] <- 1
library(forecast)
md3 <- tslm(dt ~ season + trend + I(trend^2) + s_break, data = dt_df)
summary(md3)
##
## Call:
## tslm(formula = dt ~ season + trend + I(trend^2) + s_break, data = dt_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5943 -0.3069 -0.1195 0.1672 2.6209
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.966e+00 8.350e-02 71.451 <2e-16 ***
## season2 -1.901e-02 9.026e-02 -0.211 0.833
## season3 3.556e-02 9.025e-02 0.394 0.694
## season4 7.665e-02 9.026e-02 0.849 0.396
## season5 1.119e-01 9.026e-02 1.239 0.216
## season6 7.650e-02 9.026e-02 0.848 0.397
## season7 -5.299e-02 9.026e-02 -0.587 0.557
## season8 1.346e-02 9.025e-02 0.149 0.881
## season9 2.907e-02 9.025e-02 0.322 0.747
## season10 3.193e-02 9.025e-02 0.354 0.724
## season11 3.872e-02 9.025e-02 0.429 0.668
## season12 1.707e-02 9.025e-02 0.189 0.850
## trend 4.336e-05 7.172e-04 0.060 0.952
## I(trend^2) 5.748e-08 9.667e-07 0.059 0.953
## s_break 6.683e-03 9.347e-02 0.072 0.943
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4579 on 598 degrees of freedom
## Multiple R-squared: 0.009974, Adjusted R-squared: -0.0132
## F-statistic: 0.4303 on 14 and 598 DF, p-value: 0.9651
df<- read.csv("C:\\Users\\USUARIO\\Downloads\\database.csv")
library(EDAWR)
data <- dplyr::select(df, Date, Magnitude)
c <- as.Date(data$Date,format="%d/%m/%Y")
library(zoo)
dt <- ts(data$Magnitude, start = 1965, end = 2016, freq = 12)
library(TSstudio)
data(dt)
## Warning in data(dt): data set 'dt' not found
ts_plot(dt,
title = "Earthquakes",
Ytitle = "Magnitude",
Xtitle = "Date")
library(dplyr)
UKdaily <- dt_df %>%
mutate(wday = wday(ds, label = TRUE),
month = month(ds, label = TRUE),
lag365 = dplyr::lag(y, 6)) %>%
filter(!is.na(lag365)) %>%
arrange(ds)
str(UKdaily)
## 'data.frame': 607 obs. of 8 variables:
## $ ds : Date, format: "1965-07-01" "1965-08-01" ...
## $ y : num 5.9 6 6 5.8 5.9 8.2 5.5 5.6 6 6.1 ...
## $ trend : int 7 8 9 10 11 12 13 14 15 16 ...
## $ seasonal: Factor w/ 12 levels "abril","agosto",..: 6 2 12 11 10 3 4 5 8 1 ...
## $ s_break : num 0 0 0 0 0 0 0 0 0 0 ...
## $ wday : Ord.factor w/ 7 levels "dom\\."<"lun\\."<..: 5 1 4 6 2 4 7 3 3 6 ...
## $ month : Ord.factor w/ 12 levels "ene"<"feb"<"mar"<..: 7 8 9 10 11 12 1 2 3 4 ...
## $ lag365 : num 6 5.8 6.2 5.8 5.8 6.7 5.9 6 6 5.8 ...
start_date <- min(UKdaily$ds)
start <- c(year(start_date), yday(start_date))
UK_ts <- ts(UKdaily$y, start = start, frequency = 6)
library(TSstudio)
data(UK_ts)
## Warning in data(UK_ts): data set 'UK_ts' not found
acf(UK_ts, lag.max = 6*4)
### Ahora, después de haber creado las nuevas características para el modelo y establecer el objeto ts, estamos estamos listos para dividir la serie de entrada y el objeto de características externas correspondiente que hemos creado (UKdaily) en una partición de entrenamiento y otra de prueba. Como nuestro objetivo es predecir las próximas 365 y la longitud de la serie es lo suficientemente grande (2.540 observaciones), podemos podemos permitirnos fijar la partición de prueba en la longitud del horizonte de previsión: 365 observaciones. Fijaremos h, una variable indicadora, en 365 y la utilizaremos para definir las particiones y, más adelante el horizonte de previsión:
h <- 6
UKpartitions <- ts_split(UK_ts, sample.out = h)
train_ts <- UKpartitions$train
test_ts <- UKpartitions$test
train_df <- dt_df[1:(nrow(dt_df) -h), ]
test_df <- dt_df[(nrow(dt_df) - h + 1):nrow(dt_df), ]
library(forecast)
md_tslm1 <- tslm(train_ts ~ season + trend)
fc_tslm1 <- forecast(md_tslm1,h=h)
test_forecast(actual = UK_ts,
forecast.obj = fc_tslm1,
test = test_ts)
accuracy(fc_tslm1, test_ts)
## ME RMSE MAE MPE MAPE MASE
## Training set 7.401461e-18 0.4530068 0.3276904 -0.5031674 5.258810 0.721972
## Test set -1.214004e-01 0.5493034 0.4654311 -2.7560871 7.684636 1.025444
## ACF1 Theil's U
## Training set 0.03579129 NA
## Test set -0.10643223 0.8592929
ts_heatmap (dt , last = NULL , wday = TRUE , color = "Blues" ,
title = "Earthquakes" , padding = TRUE )
library(TSstudio)
data(dt)
## Warning in data(dt): data set 'dt' not found
acf(dt, lag.max = 365*51)
checkresiduals(dt)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.