Chapter 9 Code

# -------- Code Chank 1 --------
library(TSstudio)

data("USgas")

ts_plot(USgas,
        title = "US Monthly Natural Gas consumption",
        Ytitle = "Billion Cubic Feet",
        Xtitle = "Year")
# -------- Code Chank 2 --------
ts_info(USgas)
##  The USgas series is a ts object with 1 variable and 238 observations
##  Frequency: 12 
##  Start time: 2000 1 
##  End time: 2019 10

The USgas series is a ts object with 1 variable and 227 observations

Frequency: 12

Start time: 2000 1

End time: 2018 11

# -------- Code Chank 3 --------
ts_decompose(USgas)
# -------- Code Chank 4 --------
USgas_df <- ts_to_prophet(USgas)

ds y

1 2000-01-01 2510.5

2 2000-02-01 2330.7

3 2000-03-01 2050.6

4 2000-04-01 1783.3

5 2000-05-01 1632.9

6 2000-06-01 1513.1

# -------- Code Chank 5 --------
head(USgas_df)
##           ds      y
## 1 2000-01-01 2510.5
## 2 2000-02-01 2330.7
## 3 2000-03-01 2050.6
## 4 2000-04-01 1783.3
## 5 2000-05-01 1632.9
## 6 2000-06-01 1513.1
# -------- Code Chank 6 --------
USgas_df$trend <- 1:nrow(USgas_df)


# -------- Code Chank 7 --------
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
USgas_df$seasonal <- month(USgas_df$ds, label = T)

ds y trend seasonal

1 2000-01-01 2510.5 1 Jan

2 2000-02-01 2330.7 2 Feb

3 2000-03-01 2050.6 3 Mar

4 2000-04-01 1783.3 4 Apr

5 2000-05-01 1632.9 5 May

6 2000-06-01 1513.1 6 Jun

# -------- Code Chank 8 --------
head(USgas_df)
##           ds      y trend seasonal
## 1 2000-01-01 2510.5     1      Jan
## 2 2000-02-01 2330.7     2      Feb
## 3 2000-03-01 2050.6     3      Mar
## 4 2000-04-01 1783.3     4      Apr
## 5 2000-05-01 1632.9     5      May
## 6 2000-06-01 1513.1     6      Jun
# -------- Code Chank 9 --------
h <- 12 # setting a testing partition length

train <- USgas_df[1:(nrow(USgas_df) - h), ]

test <- USgas_df[(nrow(USgas_df) - h + 1):nrow(USgas_df), ]


# -------- Code Chank 10 --------
md_trend <- lm(y ~ trend, data = train)

summary(md_trend)
## 
## Call:
## lm(formula = y ~ trend, data = train)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -547.2 -307.4 -153.2  333.1 1052.6 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1751.0074    52.6435   33.26  < 2e-16 ***
## trend          2.4489     0.4021    6.09 4.86e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 394.4 on 224 degrees of freedom
## Multiple R-squared:  0.1421, Adjusted R-squared:  0.1382 
## F-statistic: 37.09 on 1 and 224 DF,  p-value: 4.861e-09

Call:

lm(formula = y ~ trend, data = train)

——

Residuals:

Min 1Q Median 3Q Max

-537.6 -305.3 -150.1 317.1 1067.7

Coefficients:

Estimate Std. Error t value Pr(>|t|)

(Intercept) 1772.2648 53.3781 33.202 < 0.0000000000000002 ***

trend 2.1548 0.4285 5.029 0.00000105 ***

Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1

Residual standard error: 390 on 213 degrees of freedom

Multiple R-squared: 0.1061, Adjusted R-squared: 0.1019

F-statistic: 25.29 on 1 and 213 DF, p-value: 0.000001048

# -------- Code Chank 11 --------
train$yhat <- predict(md_trend, newdata = train)

test$yhat <- predict(md_trend, newdata = test)


# -------- Code Chank 12 --------
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
plot_lm <- function(data, train, test, title = NULL){
  p <- plot_ly(data = data, 
               x = ~ ds, 
               y = ~ y, 
               type = "scatter",
               mode = "line",
               name = "Actual") %>%
    add_lines(x =  ~ train$ds,
              y = ~ train$yhat,
              line = list(color = "red"),
              name = "Fitted") %>%
    add_lines(x =  ~ test$ds,
              y = ~ test$yhat,
              line = list(color = "green", dash = "dot", width = 3),
              name = "Forecasted") %>%
    layout(title = title,
           xaxis = list(title = ""),
           yaxis = list(title = "Billion Cubic Feet"),
           legend = list(x = 0.05, y = 0.95))
  return(p)
}



# -------- Code Chank 13 --------
plot_lm(data = USgas_df, 
        train = train, 
        test = test,
        title = "Predicting the Trend Component of the Series")
# -------- Code Chank 14 --------
mape_trend <- c(mean(abs(train$y - train$yhat) / train$y),
                mean(abs(test$y - test$yhat) / test$y))

mape_trend
## [1] 0.1644088 0.1299951

[1] 0.1646270 0.1201788

# -------- Code Chank 15 --------
md_seasonal <- lm(y ~ seasonal, data = train)

summary(md_seasonal)
## 
## Call:
## lm(formula = y ~ seasonal, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -608.98 -162.34  -50.77  148.40  566.89 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2030.88      14.43 140.747  < 2e-16 ***
## seasonal.L   -480.00      50.24  -9.554  < 2e-16 ***
## seasonal.Q   1103.83      50.17  22.000  < 2e-16 ***
## seasonal.C     72.42      50.05   1.447 0.149389    
## seasonal^4    174.60      50.07   3.487 0.000592 ***
## seasonal^5    288.01      50.13   5.745 3.13e-08 ***
## seasonal^6    -44.67      50.09  -0.892 0.373460    
## seasonal^7   -187.91      49.96  -3.762 0.000218 ***
## seasonal^8     84.95      49.84   1.705 0.089706 .  
## seasonal^9     46.16      49.78   0.927 0.354828    
## seasonal^10    77.55      49.76   1.559 0.120587    
## seasonal^11   -11.09      49.75  -0.223 0.823856    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 216.9 on 214 degrees of freedom
## Multiple R-squared:  0.7521, Adjusted R-squared:  0.7394 
## F-statistic: 59.04 on 11 and 214 DF,  p-value: < 2.2e-16

Call:

lm(formula = y ~ seasonal, data = train)

Residuals:

Min 1Q Median 3Q Max

-577.1 -141.1 -41.9 130.0 462.2

Coefficients:

Estimate Std. Error t value Pr(>|t|)

(Intercept) 2742.4 45.3 60.52 < 2e-16 ***

seasonalFeb -279.4 64.1 -4.36 2.1e-05 ***

seasonalMar -474.5 64.1 -7.41 3.4e-12 ***

seasonalApr -900.2 64.1 -14.05 < 2e-16 ***

seasonalMay -1076.6 64.1 -16.80 < 2e-16 ***

seasonalJun -1095.2 64.1 -17.09 < 2e-16 ***

seasonalJul -936.3 64.1 -14.61 < 2e-16 ***

seasonalAug -906.5 64.1 -14.15 < 2e-16 ***

seasonalSep -1110.1 64.1 -17.32 < 2e-16 ***

seasonalOct -1019.3 64.1 -15.91 < 2e-16 ***

seasonalNov -766.0 64.1 -11.95 < 2e-16 ***

seasonalDec -258.1 65.0 -3.97 1.0e-04 ***

Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1

Residual standard error: 192 on 203 degrees of freedom

Multiple R-squared: 0.793, Adjusted R-squared: 0.782

F-statistic: 70.7 on 11 and 203 DF, p-value: <2e-16

# -------- Code Chank 16 --------
train$yhat <- predict(md_seasonal, newdata = train)
test$yhat <- predict(md_seasonal, newdata = test)

plot_lm(data = USgas_df, 
        train = train, 
        test = test,
        title = "Predicting the Seasonal Component of the Series")
# -------- Code Chank 17 --------
mape_seasonal <- c(mean(abs(train$y - train$yhat) / train$y),
                   mean(abs(test$y - test$yhat) / test$y))

mape_seasonal
## [1] 0.08628973 0.21502100

[1] 0.07786439 0.19906796

# -------- Code Chank 18 --------
md1 <- lm(y ~ seasonal + trend, data = train)

summary(md1)
## 
## Call:
## lm(formula = y ~ seasonal + trend, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -514.73  -77.17  -17.70   85.80  336.95 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1733.7153    17.0794 101.509  < 2e-16 ***
## seasonal.L  -498.1709    29.6354 -16.810  < 2e-16 ***
## seasonal.Q  1115.2951    29.5872  37.695  < 2e-16 ***
## seasonal.C    78.9835    29.5103   2.676  0.00802 ** 
## seasonal^4   175.6505    29.5196   5.950 1.09e-08 ***
## seasonal^5   285.0192    29.5568   9.643  < 2e-16 ***
## seasonal^6   -49.3611    29.5319  -1.671  0.09610 .  
## seasonal^7  -192.3050    29.4540  -6.529 4.77e-10 ***
## seasonal^8    81.8787    29.3835   2.787  0.00581 ** 
## seasonal^9    44.4849    29.3480   1.516  0.13106    
## seasonal^10   76.8636    29.3372   2.620  0.00943 ** 
## seasonal^11  -11.2755    29.3353  -0.384  0.70109    
## trend          2.6182     0.1305  20.065  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 127.9 on 213 degrees of freedom
## Multiple R-squared:  0.9142, Adjusted R-squared:  0.9094 
## F-statistic: 189.2 on 12 and 213 DF,  p-value: < 2.2e-16

Call:

lm(formula = y ~ seasonal + trend, data = train)

Residuals:

Min 1Q Median 3Q Max

-506.7 -71.2 -13.8 79.0 328.5

Coefficients:

Estimate Std. Error t value Pr(>|t|)

(Intercept) 2500.682 31.620 79.09 < 2e-16 ***

seasonalFeb -281.769 40.302 -6.99 3.9e-11 ***

seasonalMar -479.227 40.303 -11.89 < 2e-16 ***

seasonalApr -907.201 40.304 -22.51 < 2e-16 ***

seasonalMay -1085.948 40.305 -26.94 < 2e-16 ***

seasonalJun -1106.933 40.307 -27.46 < 2e-16 ***

seasonalJul -950.374 40.310 -23.58 < 2e-16 ***

seasonalAug -922.932 40.312 -22.89 < 2e-16 ***

seasonalSep -1128.862 40.316 -28.00 < 2e-16 ***

seasonalOct -1040.442 40.319 -25.81 < 2e-16 ***

seasonalNov -789.461 40.324 -19.58 < 2e-16 ***

seasonalDec -269.863 40.895 -6.60 3.6e-10 ***

trend 2.347 0.133 17.64 < 2e-16 ***

Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1

Residual standard error: 121 on 202 degrees of freedom

Multiple R-squared: 0.919, Adjusted R-squared: 0.914

F-statistic: 190 on 12 and 202 DF, p-value: <2e-16

# -------- Code Chank 19 --------
train$yhat <- predict(md1, newdata = train)
test$yhat <- predict(md1, newdata = test)


plot_lm(data = USgas_df, 
        train = train, 
        test = test,
        title = "Predicting the Seasonal Component of the Series")
# -------- Code Chank 20 --------
mape_md1 <- c(mean(abs(train$y - train$yhat) / train$y),
              mean(abs(test$y - test$yhat) / test$y))
mape_md1
## [1] 0.04774945 0.09143290

[1] 0.04501471 0.09192438

# -------- Code Chank 21 --------
md2 <- lm(y ~ seasonal + trend + I(trend^2), data = train)

summary(md2)
## 
## Call:
## lm(formula = y ~ seasonal + trend + I(trend^2), data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -468.47  -54.66   -2.21   63.11  294.32 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.882e+03  2.199e+01  85.568  < 2e-16 ***
## seasonal.L  -4.917e+02  2.530e+01 -19.438  < 2e-16 ***
## seasonal.Q   1.121e+03  2.525e+01  44.381  < 2e-16 ***
## seasonal.C   8.247e+01  2.518e+01   3.275  0.00123 ** 
## seasonal^4   1.763e+02  2.519e+01   6.999 3.35e-11 ***
## seasonal^5   2.835e+02  2.522e+01  11.243  < 2e-16 ***
## seasonal^6  -5.175e+01  2.520e+01  -2.054  0.04123 *  
## seasonal^7  -1.946e+02  2.513e+01  -7.741 3.97e-13 ***
## seasonal^8   8.030e+01  2.507e+01   3.203  0.00157 ** 
## seasonal^9   4.362e+01  2.504e+01   1.742  0.08293 .  
## seasonal^10  7.651e+01  2.503e+01   3.057  0.00253 ** 
## seasonal^11 -1.137e+01  2.503e+01  -0.454  0.65005    
## trend       -1.270e+00  4.472e-01  -2.840  0.00494 ** 
## I(trend^2)   1.713e-02  1.908e-03   8.977  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 109.1 on 212 degrees of freedom
## Multiple R-squared:  0.9379, Adjusted R-squared:  0.9341 
## F-statistic: 246.1 on 13 and 212 DF,  p-value: < 2.2e-16

Call:

lm(formula = y ~ seasonal + trend + I(trend^2), data = train)

Residuals:

Min 1Q Median 3Q Max

-466.5 -55.5 -5.1 60.1 309.5

Coefficients:

Estimate Std. Error t value Pr(>|t|)

(Intercept) 2617.20174 33.00223 79.30 < 2e-16 ***

seasonalFeb -281.63380 36.26107 -7.77 4.0e-13 ***

seasonalMar -478.98651 36.26167 -13.21 < 2e-16 ***

seasonalApr -906.88591 36.26267 -25.01 < 2e-16 ***

seasonalMay -1085.58755 36.26406 -29.94 < 2e-16 ***

seasonalJun -1106.55810 36.26584 -30.51 < 2e-16 ***

seasonalJul -950.01422 36.26801 -26.19 < 2e-16 ***

seasonalAug -922.61703 36.27057 -25.44 < 2e-16 ***

seasonalSep -1128.62207 36.27352 -31.11 < 2e-16 ***

seasonalOct -1040.30714 36.27686 -28.68 < 2e-16 ***

seasonalNov -789.46112 36.28061 -21.76 < 2e-16 ***

seasonalDec -263.18413 36.80761 -7.15 1.6e-11 ***

trend -0.89541 0.48054 -1.86 0.064 .

I(trend^2) 0.01501 0.00215 6.97 4.5e-11 ***

Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1

Residual standard error: 109 on 201 degrees of freedom

Multiple R-squared: 0.934, Adjusted R-squared: 0.93

F-statistic: 220 on 13 and 201 DF, p-value: <2e-16

# -------- Code Chank 22 --------
train$yhat <- predict(md2, newdata = train)
test$yhat <- predict(md2, newdata = test)


plot_lm(data = USgas_df, 
        train = train, 
        test = test,
        title = "Predicting the Trend (Polynomial) and Seasonal Components
of the Series")
mape_md2 <- c(mean(abs(train$y - train$yhat) / train$y),
              mean(abs(test$y - test$yhat) / test$y))

mape_md2
## [1] 0.03688770 0.04212618

[1] 0.03706897 0.04559134

# -------- Code Chank 23 --------
USgas_split <- ts_split(USgas, sample.out = h)

train.ts <- USgas_split$train

test.ts <- USgas_split$test
# -------- Code Chank 24 --------
library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
md3 <- tslm(train.ts ~ season + trend + I(trend^2))

summary(md3)
## 
## Call:
## tslm(formula = train.ts ~ season + trend + I(trend^2))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -468.47  -54.66   -2.21   63.11  294.32 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.635e+03  3.224e+01  81.738  < 2e-16 ***
## season2     -3.004e+02  3.540e+01  -8.487 3.69e-15 ***
## season3     -4.841e+02  3.540e+01 -13.676  < 2e-16 ***
## season4     -9.128e+02  3.540e+01 -25.787  < 2e-16 ***
## season5     -1.099e+03  3.540e+01 -31.033  < 2e-16 ***
## season6     -1.118e+03  3.540e+01 -31.588  < 2e-16 ***
## season7     -9.547e+02  3.540e+01 -26.968  < 2e-16 ***
## season8     -9.322e+02  3.541e+01 -26.329  < 2e-16 ***
## season9     -1.136e+03  3.541e+01 -32.070  < 2e-16 ***
## season10    -1.046e+03  3.541e+01 -29.532  < 2e-16 ***
## season11    -8.001e+02  3.590e+01 -22.286  < 2e-16 ***
## season12    -2.618e+02  3.590e+01  -7.293 5.95e-12 ***
## trend       -1.270e+00  4.472e-01  -2.840  0.00494 ** 
## I(trend^2)   1.713e-02  1.908e-03   8.977  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 109.1 on 212 degrees of freedom
## Multiple R-squared:  0.9379, Adjusted R-squared:  0.9341 
## F-statistic: 246.1 on 13 and 212 DF,  p-value: < 2.2e-16

Call:

tslm(formula = train.ts ~ season + trend + I(trend^2))

Residuals:

Min 1Q Median 3Q Max

-466.52 -55.46 -5.13 60.06 309.53

Coefficients:

Estimate Std. Error t value Pr(>|t|)

(Intercept) 2617.201742 33.002235 79.304 < 0.0000000000000002 ***

season2 -281.633803 36.261068 -7.767 0.000000000000404 ***

season3 -478.986514 36.261672 -13.209 < 0.0000000000000002 ***

season4 -906.885912 36.262671 -25.009 < 0.0000000000000002 ***

season5 -1085.587550 36.264062 -29.936 < 0.0000000000000002 ***

season6 -1106.558097 36.265843 -30.512 < 0.0000000000000002 ***

season7 -950.014218 36.268012 -26.194 < 0.0000000000000002 ***

season8 -922.617026 36.270570 -25.437 < 0.0000000000000002 ***

season9 -1128.622074 36.273520 -31.114 < 0.0000000000000002 ***

season10 -1040.307142 36.276865 -28.677 < 0.0000000000000002 ***

season11 -789.461118 36.280610 -21.760 < 0.0000000000000002 ***

season12 -263.184126 36.807605 -7.150 0.000000000015734 ***

trend -0.895408 0.480540 -1.863 0.0639 .

I(trend^2) 0.015010 0.002155 6.966 0.000000000045450 ***

Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1

Residual standard error: 108.8 on 201 degrees of freedom

Multiple R-squared: 0.9344, Adjusted R-squared: 0.9301

F-statistic: 220.1 on 13 and 201 DF, p-value: < 0.00000000000000022

# -------- Code Chank 25 --------
r <- which(USgas_df$ds == as.Date("2014-01-01"))
USgas_df$s_break <- ifelse(year(USgas_df$ds) >= 2010, 1, 0)
USgas_df$s_break[r] <- 1
md3 <- tslm(USgas ~ season + trend + I(trend^2) + s_break, data = USgas_df)
summary(md3)
## 
## Call:
## tslm(formula = USgas ~ season + trend + I(trend^2) + s_break, 
##     data = USgas_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -469.25  -50.68   -2.66   63.63  275.89 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.661e+03  3.200e+01  83.164  < 2e-16 ***
## season2     -3.054e+02  3.448e+01  -8.858 2.61e-16 ***
## season3     -4.849e+02  3.448e+01 -14.062  < 2e-16 ***
## season4     -9.272e+02  3.449e+01 -26.885  < 2e-16 ***
## season5     -1.108e+03  3.449e+01 -32.114  < 2e-16 ***
## season6     -1.127e+03  3.450e+01 -32.660  < 2e-16 ***
## season7     -9.568e+02  3.450e+01 -27.730  < 2e-16 ***
## season8     -9.340e+02  3.451e+01 -27.061  < 2e-16 ***
## season9     -1.138e+03  3.452e+01 -32.972  < 2e-16 ***
## season10    -1.040e+03  3.453e+01 -30.122  < 2e-16 ***
## season11    -7.896e+02  3.497e+01 -22.577  < 2e-16 ***
## season12    -2.649e+02  3.498e+01  -7.571 9.72e-13 ***
## trend       -1.928e+00  4.479e-01  -4.304 2.51e-05 ***
## I(trend^2)   1.862e-02  1.676e-03  11.113  < 2e-16 ***
## s_break      6.060e+01  2.836e+01   2.137   0.0337 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 109 on 223 degrees of freedom
## Multiple R-squared:  0.9423, Adjusted R-squared:  0.9387 
## F-statistic: 260.3 on 14 and 223 DF,  p-value: < 2.2e-16

Call:

tslm(formula = USgas ~ season + trend + I(trend^2) + s_break,

data = USgas_df)

Residuals:

Min 1Q Median 3Q Max

-461.4 -55.9 -6.4 67.4 285.8

Coefficients:

Estimate Std. Error t value Pr(>|t|)

(Intercept) 2647.44816 32.30603 81.95 < 2e-16 ***

season2 -298.73237 35.05548 -8.52 2.9e-15 ***

season3 -481.80356 35.05753 -13.74 < 2e-16 ***

season4 -910.06095 35.06096 -25.96 < 2e-16 ***

season5 -1094.53085 35.06576 -31.21 < 2e-16 *

season6 -1114.15537 35.07195 -31.77 < 2e-16 ***

season7 -950.33977 35.07952 -27.09 < 2e-16 ***

season8 -925.83668 35.08849 -26.39 < 2e-16 ***

season9 -1129.21978 35.09886 -32.17 < 2e-16 ***

season10 -1039.14697 35.11065 -29.60 < 2e-16 ***

season11 -783.59194 35.12386 -22.31 < 2e-16 ***

season12 -256.28337 35.59344 -7.20 1.0e-11 ***

trend -1.67443 0.46052 -3.64 0.00035 ***

I(trend^2) 0.01678 0.00188 8.91 2.4e-16 ***

s_break 74.57388 28.93187 2.58 0.01063 *

Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1

Residual standard error: 108 on 212 degrees of freedom

Multiple R-squared: 0.939, Adjusted R-squared: 0.935

F-statistic: 234 on 14 and 212 DF, p-value: <2e-16

library(UKgrid)

UKdaily <- extract_grid(type = "data.frame",
                        columns = "ND",
                        aggregate = "daily")

head(UKdaily)
##    TIMESTAMP      ND
## 1 2005-04-01 1920069
## 2 2005-04-02 1674699
## 3 2005-04-03 1631352
## 4 2005-04-04 1916693
## 5 2005-04-05 1952082
## 6 2005-04-06 1964584

TIMESTAMP ND

1 2011-01-01 1671744

2 2011-01-02 1760123

3 2011-01-03 1878748

4 2011-01-04 2076052

5 2011-01-05 2103866

6 2011-01-06 2135202

ts_plot(UKdaily,
        title = "The UK National Demand for Electricity",
        Ytitle = "MW",
        Xtitle = "Year")
# -------- Code Chank 26 --------
ts_heatmap(UKdaily[which(year(UKdaily$TIMESTAMP) >= 2016),],
           title = "UK the Daily National Grid Demand Heatmap")
# -------- Code Chank 27 --------
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
UKdaily <- UKdaily %>%
  mutate(wday = wday(TIMESTAMP, label = TRUE),
         month = month(TIMESTAMP, label = TRUE),
         lag365 = dplyr::lag(ND, 365)) %>%
  filter(!is.na(lag365)) %>%
  arrange(TIMESTAMP)
str(UKdaily)
## 'data.frame':    4939 obs. of  5 variables:
##  $ TIMESTAMP: Date, format: "2006-04-01" "2006-04-02" ...
##  $ ND       : int  1718405 1691341 1960325 2023886 2026204 2008422 1981175 1770440 1749715 2012865 ...
##  $ wday     : Ord.factor w/ 7 levels "Sun"<"Mon"<"Tue"<..: 7 1 2 3 4 5 6 7 1 2 ...
##  $ month    : Ord.factor w/ 12 levels "Jan"<"Feb"<"Mar"<..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ lag365   : int  1920069 1674699 1631352 1916693 1952082 1964584 1990895 2003982 1811436 1684720 ...

‘data.frame’: 2540 obs. of 5 variables:

$ TIMESTAMP: Date, format: “2012-01-01” “2012-01-02” …

$ ND : int 1478868 1608394 1881072 1956360 1936635 1939424 1698505 1679311 1898593 1922898 …

$ wday : Factor w/ 7 levels “Sun”,“Mon”,“Tue”,..: 1 2 3 4 5 6 7 1 2 3 …

$ month : Factor w/ 12 levels “Jan”,“Feb”,“Mar”,..: 1 1 1 1 1 1 1 1 1 1 …

$ lag365 : int 1671744 1760123 1878748 2076052 2103866 2135202 2121523 1861515 1837427 2093269 …