library(fpp3)## Warning in system("timedatectl", intern = TRUE): running command 'timedatectl'
## had status 1
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
## ✔ tibble 3.1.8 ✔ tsibble 1.1.5
## ✔ dplyr 1.1.0 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.0 ✔ feasts 0.4.1
## ✔ lubridate 1.9.2 ✔ fable 0.4.0
## ✔ ggplot2 3.5.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(tidyverse)## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.4
## ✔ purrr 1.0.1 ✔ stringr 1.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
pacman::p_load(tidyverse, fpp3)
pacman::p_load(dplyr, ggplot2)
Sys.time()## [1] "2024-11-03 23:13:10 EST"
Consider the GDP information in data set called global_economy, which is already embedded in fpp3 package (no need to upload externally)
global_economy # see the data.## # A tsibble: 15,150 x 9 [1Y]
## # Key: Country [263]
## Country Code Year GDP Growth CPI Imports Exports Population
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Afghanistan AFG 1960 537777811. NA NA 7.02 4.13 8996351
## 2 Afghanistan AFG 1961 548888896. NA NA 8.10 4.45 9166764
## 3 Afghanistan AFG 1962 546666678. NA NA 9.35 4.88 9345868
## 4 Afghanistan AFG 1963 751111191. NA NA 16.9 9.17 9533954
## 5 Afghanistan AFG 1964 800000044. NA NA 18.1 8.89 9731361
## 6 Afghanistan AFG 1965 1006666638. NA NA 21.4 11.3 9938414
## 7 Afghanistan AFG 1966 1399999967. NA NA 18.6 8.57 10152331
## 8 Afghanistan AFG 1967 1673333418. NA NA 14.2 6.77 10372630
## 9 Afghanistan AFG 1968 1373333367. NA NA 15.2 8.90 10604346
## 10 Afghanistan AFG 1969 1408888922. NA NA 15.0 10.1 10854428
## # … with 15,140 more rows
# 1.Answer:
global_economy %>%
filter(Code == "ITA")%>% mutate(GDPpc = (GDP/Population)) %>% autoplot(GDPpc)For each of the following series, make a graph of the data. If transforming seems appropriate, do so and describe the effect. Comment below in answer:
# 2a.Answer:
Italy = global_economy %>% filter(Country == "Italy") %>% mutate(GDPpc = (GDP/Population))
Italy %>% features(GDPpc, features = guerrero) # the lamda is 0, so use 0 in box-cox transformation## # A tibble: 1 × 2
## Country lambda_guerrero
## <fct> <dbl>
## 1 Italy -0.0656
Italy %>% autoplot(box_cox(GDPpc, 0))United States GDP from global_economy.
# 2b.Answer:
us_economy <- global_economy %>%
filter(Country == "United States")
us_economy %>%
autoplot(box_cox(GDP, 0))us_economy %>%
autoplot(box_cox(GDP, 0.3))us_economy %>%
features(GDP, features = guerrero)## # A tibble: 1 × 2
## Country lambda_guerrero
## <fct> <dbl>
## 1 United States 0.282
us_economy %>%
autoplot(box_cox(GDP, 0.2819714))Slaughter of Victorian “Bulls, bullocks and steers” in aus_livestock
# 2c.Answer:
vic_bulls <- aus_livestock %>% filter(State == "Victoria", Animal == "Bulls, bullocks and steers")
vic_bulls %>%
autoplot(Count)vic_bulls %>%
features(Count, features = guerrero)## # A tibble: 1 × 3
## Animal State lambda_guerrero
## <fct> <fct> <dbl>
## 1 Bulls, bullocks and steers Victoria -0.0446
Victorian Electricity Demand from vic_elec.
# 2d.Answer:
vic_elec %>%
autoplot(box_cox(Demand, 0))Gas production from aus_production.
# 2e.Answer:
aus_production %>%
autoplot(Gas)aus_production %>%
autoplot(box_cox(Gas, 0))aus_production %>%
features(Gas, features = guerrero)## # A tibble: 1 × 1
## lambda_guerrero
## <dbl>
## 1 0.110
aus_production %>%
autoplot(box_cox(Gas, 0.1095))# 3a.Answer:
canadian_gas %>% autoplot(Volume)canadian_gas %>% gg_subseries(Volume) canadian_gas %>% gg_season(Volume) Do an STL decomposition of the data. You will need to choose a seasonal window to allow for the changing shape of the seasonal component.
# 3b.Answer:
# STL model:
fit <- canadian_gas %>%
model(STL(Volume)) %>%
components()
fit## # A dable: 542 x 7 [1M]
## # Key: .model [1]
## # : Volume = trend + season_year + remainder
## .model Month Volume trend season_year remainder season_adjust
## <chr> <mth> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 STL(Volume) 1960 Jan 1.43 1.08 0.520 -0.172 0.911
## 2 STL(Volume) 1960 Feb 1.31 1.11 0.215 -0.0178 1.09
## 3 STL(Volume) 1960 Mar 1.40 1.13 0.307 -0.0395 1.09
## 4 STL(Volume) 1960 Apr 1.17 1.16 0.0161 -0.00627 1.15
## 5 STL(Volume) 1960 May 1.12 1.18 -0.116 0.0476 1.23
## 6 STL(Volume) 1960 Jun 1.01 1.21 -0.356 0.159 1.37
## 7 STL(Volume) 1960 Jul 0.966 1.23 -0.403 0.136 1.37
## 8 STL(Volume) 1960 Aug 0.977 1.26 -0.349 0.0677 1.33
## 9 STL(Volume) 1960 Sep 1.03 1.28 -0.340 0.0870 1.37
## 10 STL(Volume) 1960 Oct 1.25 1.31 -0.0899 0.0329 1.34
## # … with 532 more rows
names(fit)## [1] ".model" "Month" "Volume" "trend"
## [5] "season_year" "remainder" "season_adjust"
fit %>% autoplot()How does the seasonal shape change over time? [Hint: Try plotting the seasonal component using gg_season().]
# 3c.Answer:
fit %>% gg_season(season_year)produce a plausible seasonally adjusted series? What are these numbers, plot the series.
# 3d.Answer:
SeasonallyAdj <- fit$season_adjust # seasonally adjusted series.
# original series:
canadian_gas %>%
autoplot(Volume) +
autolayer(fit, season_adjust, col = "blue")For retail time series, use the below code:
# run the code
set.seed(12345678)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
head(myseries,2)## # A tsibble: 2 x 5 [1M]
## # Key: State, Industry [1]
## State Industry Serie…¹ Month Turno…²
## <chr> <chr> <chr> <mth> <dbl>
## 1 Northern Territory Clothing, footwear and personal a… A33497… 1988 Apr 2.3
## 2 Northern Territory Clothing, footwear and personal a… A33497… 1988 May 2.9
## # … with abbreviated variable names ¹`Series ID`, ²Turnover
Create a training dataset consisting of observations before 2011
myseries_train <- myseries %>%
filter(year(Month) < 2011)Check that your data have been split appropriately by producing the following plot.
autoplot(myseries, Turnover) +
autolayer(myseries_train, Turnover, colour = "red")Fit a seasonal naïve model using SNAIVE() applied to your training data (myseries_train).
#Answer:
fit <- myseries_train %>%
model(SNAIVE(Turnover))Check the residuals.
# 4d Answer:
fit %>% gg_tsresiduals()## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 12 rows containing non-finite outside the scale range
## (`stat_bin()`).
# Do the residuals appear to be uncorrelated and normally distributed?
# Answ: yes, normally distributed.Produce forecasts for the test data with given code below:
# 4e Answer:
fc <- fit %>%
forecast(new_data = anti_join(myseries, myseries_train))## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`
fc %>% autoplot(myseries)Joining by = c(“State”, “Industry”, “Series ID”, “Month”, “Turnover”)
Compare the accuracy of your forecasts against the actual values with given code below:
fit %>% accuracy()## # A tibble: 1 × 12
## State Indus…¹ .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Northern… Clothi… SNAIV… Trai… 0.439 1.21 0.915 5.23 12.4 1 1 0.768
## # … with abbreviated variable name ¹Industry
fc %>% accuracy(myseries)## # A tibble: 1 × 12
## .model State Indus…¹ .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(Tu… Nort… Clothi… Test 0.836 1.55 1.24 5.94 9.06 1.36 1.28 0.601
## # … with abbreviated variable name ¹Industry
# 4f Answ:RMSE is lower in first series, as 1.21, and RMSE is 1.55 in myseries.How sensitive are the accuracy measures to the amount of training data used?
# 4g Answer:
#The RMSE values are 1.21 and 1.55, indicating some sensitivity. The model performs much better on the training data compared to the out-of-sample data. This difference is common and, in this case, is noticeable because the model hasn’t captured the trend in the data. This is clear from the mean error being above zero, showing that the predictions are falling short of the actual upward trend in the data.Create a training set for Australian takeaway food turnover (aus_retail) by withholding the last four years as a test set.
# 5a.Answer:
takeaway <- aus_retail %>%
filter(Industry == "Takeaway food services") %>%
summarise(Turnover = sum(Turnover))
train <- takeaway %>%
filter(year(Month) <= 2014)
tail(train)## # A tsibble: 6 x 2 [1M]
## Month Turnover
## <mth> <dbl>
## 1 2014 Jul 1328.
## 2 2014 Aug 1335.
## 3 2014 Sep 1338.
## 4 2014 Oct 1390.
## 5 2014 Nov 1391.
## 6 2014 Dec 1494.
Fit all the appropriate benchmark methods to the training set and forecast the periods covered by the test set.
# 5b.Answer:
fit <- train %>%
model(
naive = NAIVE(Turnover),
drift = RW(Turnover ~ drift()),
mean = MEAN(Turnover),
snaive = SNAIVE(Turnover)
)
fc <- fit %>% forecast(h = "4 years")Compute the accuracy of your forecasts. Which method does best?
# 5c.Answer:
fc %>%
accuracy(takeaway) %>%
arrange(MASE)## # A tibble: 4 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 naive Test -12.4 119. 96.4 -1.49 6.66 2.30 2.25 0.613
## 2 drift Test -93.7 130. 108. -6.82 7.67 2.58 2.46 0.403
## 3 snaive Test 177. 192. 177. 11.7 11.7 4.22 3.64 0.902
## 4 mean Test 829. 838. 829. 55.7 55.7 19.8 15.8 0.613
#The naive method is best here according to MASE value. It's minimum at naive.Do the residuals from the best method resemble white noise?
# 5d.Answer:
fit %>%
select(naive) %>%
gg_tsresiduals()## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).
# This is far from white noise. There is strong seasonality and increasing variance that has not been accounted for by the naive model.
# look at the acf values, it has peak at different lags, showing seasonality.Using the code below, get a series (it gets a series randomly by using sample() function):
set.seed(12345678)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))see head of your series to check it is a tsibble data, and remove NA’s if there is any with these commands:
head(myseries)## # A tsibble: 6 x 5 [1M]
## # Key: State, Industry [1]
## State Industry Serie…¹ Month Turno…²
## <chr> <chr> <chr> <mth> <dbl>
## 1 Northern Territory Clothing, footwear and personal a… A33497… 1988 Apr 2.3
## 2 Northern Territory Clothing, footwear and personal a… A33497… 1988 May 2.9
## 3 Northern Territory Clothing, footwear and personal a… A33497… 1988 Jun 2.6
## 4 Northern Territory Clothing, footwear and personal a… A33497… 1988 Jul 2.8
## 5 Northern Territory Clothing, footwear and personal a… A33497… 1988 Aug 2.9
## 6 Northern Territory Clothing, footwear and personal a… A33497… 1988 Sep 3
## # … with abbreviated variable names ¹`Series ID`, ²Turnover
myseries = myseries %>% filter(!is.na(`Series ID`))What is the name of the series you randomly choose? Write it.
# 6a.Answer:
# It is A3349767W .Run a linear regression of Turnover on trend.(Hint: use TSLM() and trend() functions)
# 6b.Answer:
fit = myseries %>% model(TSLM(Turnover~ trend()))See the regression result by report() command.
# 6c.Answer:
report(fit)## Series: Turnover
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.0795 -1.1704 -0.1640 0.9683 7.4514
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.5313376 0.1983464 17.80 <2e-16 ***
## trend() 0.0307747 0.0009291 33.12 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.901 on 367 degrees of freedom
## Multiple R-squared: 0.7493, Adjusted R-squared: 0.7486
## F-statistic: 1097 on 1 and 367 DF, p-value: < 2.22e-16
By using this model, forecast it for the next 3 years. What are the values of the next 3 years, monthly values?
# 6d.Answer:
fit %>% forecast(h=36)## # A fable: 36 x 6 [1M]
## # Key: State, Industry, .model [1]
## State Industry .model Month
## <chr> <chr> <chr> <mth>
## 1 Northern Territory Clothing, footwear and personal accessory… TSLM(… 2019 Jan
## 2 Northern Territory Clothing, footwear and personal accessory… TSLM(… 2019 Feb
## 3 Northern Territory Clothing, footwear and personal accessory… TSLM(… 2019 Mar
## 4 Northern Territory Clothing, footwear and personal accessory… TSLM(… 2019 Apr
## 5 Northern Territory Clothing, footwear and personal accessory… TSLM(… 2019 May
## 6 Northern Territory Clothing, footwear and personal accessory… TSLM(… 2019 Jun
## 7 Northern Territory Clothing, footwear and personal accessory… TSLM(… 2019 Jul
## 8 Northern Territory Clothing, footwear and personal accessory… TSLM(… 2019 Aug
## 9 Northern Territory Clothing, footwear and personal accessory… TSLM(… 2019 Sep
## 10 Northern Territory Clothing, footwear and personal accessory… TSLM(… 2019 Oct
## # … with 26 more rows, and 2 more variables: Turnover <dist>, .mean <dbl>
Plot the forecast values along with the original data.
# 6d.Answer:
fit %>% forecast(h=36 )%>% autoplot(myseries)fit %>% forecast(h=36 )%>% autoplot()Get the residuals from the model. And check the residuals to check whether or not it satisfies the requirements for white noise error terms.(hint: augment() and gg_tsresiduals() functions)
# 6e.Answer:
fit %>% gg_tsresiduals()Half-hourly electricity demand for Victoria, Australia is contained in vic_elec. Extract the January 2014 electricity demand, and aggregate this data to daily with daily total demands and maximum temperatures. Run the code below:
jan_vic_elec <- vic_elec %>%
filter(yearmonth(Time) == yearmonth("2014 Jan")) %>%
index_by(Date = as_date(Time)) %>%
summarise(Demand = sum(Demand), Temperature = max(Temperature))Plot the data and find the regression model for Demand with temperature as a predictor variable. Why is there a positive relationship?
# 7a.Answer:
jan_vic_elec %>%
ggplot(aes(x = Temperature, y = Demand)) +
geom_point()fit <- jan_vic_elec %>%
model(TSLM(Demand ~ Temperature))
fit %>% report()## Series: Demand
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -49978.2 -10218.9 -121.3 18533.2 35440.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 59083.9 17424.8 3.391 0.00203 **
## Temperature 6154.3 601.3 10.235 3.89e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 24540 on 29 degrees of freedom
## Multiple R-squared: 0.7832, Adjusted R-squared: 0.7757
## F-statistic: 104.7 on 1 and 29 DF, p-value: 3.8897e-11
Produce a residual plot. Is the model adequate? Are there any outliers or influential observations?
# 7b.Answer:
fit %>% gg_tsresiduals()#The residuals from this section of the data suggest that the model is lacking a trend (although looking at a larger window of the data, this is clearly not a real pattern). Some large variability suggests that there are some outliers in this data.Use the model to forecast the electricity demand that you would expect for the next day if the maximum temperature was 15∘C and compare it with the forecast if the with maximum temperature was 35∘C. Do you believe these forecasts?
# 7c.Answer:
Next_Day <- scenarios(
Coldday = new_data(jan_vic_elec, 1) %>% mutate(Temperature = 15),
Hotday = new_data(jan_vic_elec, 1) %>% mutate(Temperature = 35)
)
fc <- fit %>%
forecast(new_data = Next_Day)
autoplot(jan_vic_elec, Demand) +
autolayer(fc, series = "Forecast", PI = TRUE, alpha = 0.5) +
labs(title = "Demand Forecast for electricity",
x = "Date",
y = "Demand")## Warning in ggdist::geom_interval(intvl_mapping, data =
## dist_qi_frame(object[single_row[["TRUE"]], : Ignoring unknown parameters:
## `series` and `PI`
## Warning in ggplot2::geom_point(mapping = without(mapping, "linetype"), data =
## unpack_data(object[single_row[["TRUE"]], : Ignoring unknown parameters:
## `series` and `PI`
jan_vic_elec %>%
model(TSLM(Demand ~ Temperature)) %>%
forecast(
new_data(jan_vic_elec, 1) %>%
mutate(Temperature = 15)
) %>%
autoplot(jan_vic_elec)#The forecasts seem reasonable. However we should be aware that there is not much data to support the forecasts at these temperature extremes, especially in that no daily maximum below 20∘C
#is observed during January (a summer month in Victoria).Do you believe these forecasts? The following R code will get you started:
# 7d.Answer:
jan_vic_elec %>%
model(TSLM(Demand ~ Temperature)) %>%
forecast(new_data(jan_vic_elec, 1) %>%
mutate(Temperature = 15)
) %>%
autoplot(jan_vic_elec)Give prediction intervals for your forecasts.
# 7e.Answer:
fc %>%
hilo(95) %>%
select(-.model)## # A tsibble: 2 x 6 [1D]
## # Key: .scenario [2]
## .scenario Date
## <chr> <date>
## 1 Coldday 2014-02-01
## 2 Hotday 2014-02-01
## # … with 4 more variables: Demand <dist>, .mean <dbl>, Temperature <dbl>,
## # `95%` <hilo>
Read the shampoo data given in excel (Import Dataset as Excel)
# a. View the shampoo sales data. How many variables are there? Find how many rows and column in the data?
library(readxl)
shampoo_2 <- read_excel("shampoo-2.xlsx")#b. Is the data annual, monthly, quarterly?
str(shampoo_2) #data frame## tibble [36 × 2] (S3: tbl_df/tbl/data.frame)
## $ Month: POSIXct[1:36], format: "1995-01-01" "1995-02-01" ...
## $ sales: num [1:36] 266 146 183 119 180 ...
# Answ: Monthly data
#c. Convert the data into tibble , then tsibble
mydata = shampoo_2 %>%
mutate(MONTHLY = yearmonth(Month)) %>% as_tsibble(index = MONTHLY) %>% select(-Month)
str(mydata)## tbl_ts [36 × 2] (S3: tbl_ts/tbl_df/tbl/data.frame)
## $ sales : num [1:36] 266 146 183 119 180 ...
## $ MONTHLY: mth [1:36] 1995 Jan, 1995 Feb, 1995 Mar, 1995 Apr, 1995 May, 1995 Jun,...
## - attr(*, "key")= tibble [1 × 1] (S3: tbl_df/tbl/data.frame)
## ..$ .rows: list<int> [1:1]
## .. ..$ : int [1:36] 1 2 3 4 5 6 7 8 9 10 ...
## .. ..@ ptype: int(0)
## - attr(*, "index")= chr "MONTHLY"
## ..- attr(*, "ordered")= logi TRUE
## - attr(*, "index2")= chr "MONTHLY"
## - attr(*, "interval")= interval [1:1] 1M
## ..@ .regular: logi TRUE
mydata## # A tsibble: 36 x 2 [1M]
## sales MONTHLY
## <dbl> <mth>
## 1 266 1995 Jan
## 2 146. 1995 Feb
## 3 183. 1995 Mar
## 4 119. 1995 Apr
## 5 180. 1995 May
## 6 168. 1995 Jun
## 7 232. 1995 Jul
## 8 224. 1995 Aug
## 9 193. 1995 Sep
## 10 123. 1995 Oct
## # … with 26 more rows
#d. Plot the shampoo sales. What do you see from the data pattern? What does x-axis represent?
plot(shampoo_2$sales, type ="l")mydata %>%
autoplot(sales) +
labs(title = "Monthly Shampoo Sales", y= "in $")# Comment here. Use plot() and autoplot().Put the name for y axis, and a title for the graph.
#e. What is the average, and median of shampoo sales. Put it on a histogram.
meanSales = mean(shampoo_2$sales)
meanSales## [1] 312.6
median(shampoo_2$sales)## [1] 280.15
hist(shampoo_2$sales)mydata %>%
ggplot(aes(x=sales)) +
geom_histogram(bin=10)## Warning in geom_histogram(bin = 10): Ignoring unknown parameters: `bin`
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#f. Get seasonal plot. What do you see/ is there any pattern, is there any seasonality.
mydata %>% gg_season(sales) # yes, there is a trend. 1997 is higher than 1996 and than 1995.mydata %>% gg_subseries(sales) # it looks like there is monthly effect. it is higher in jul-december interval.#g. Get a linear regression line with trend and dummy for each month (Hint: use trend and season in regression equation).
fit = mydata %>% model(TSLM(sales ~ trend() + season()))
#h. Comment on each estimated coefficient of the model.Are they statistically significant at 5 % significance level?
report(fit)## Series: sales
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -129.60 -62.32 -4.84 53.76 152.72
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 113.867 55.740 2.043 0.0527 .
## trend() 11.754 1.534 7.664 8.88e-08 ***
## season()year2 -33.154 73.630 -0.450 0.6567
## season()year3 -53.808 73.678 -0.730 0.4726
## season()year4 -24.628 73.757 -0.334 0.7415
## season()year5 -56.015 73.869 -0.758 0.4560
## season()year6 -27.802 74.012 -0.376 0.7106
## season()year7 7.244 74.187 0.098 0.9231
## season()year8 -37.043 74.393 -0.498 0.6233
## season()year9 27.536 74.629 0.369 0.7155
## season()year10 -32.518 74.897 -0.434 0.6682
## season()year11 9.895 75.194 0.132 0.8964
## season()year12 -4.259 75.522 -0.056 0.9555
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 90.16 on 23 degrees of freedom
## Multiple R-squared: 0.7592, Adjusted R-squared: 0.6336
## F-statistic: 6.043 on 12 and 23 DF, p-value: 0.00011612
# Answ: no it is not significant. no monthly effect, but trend is statistically significant at 5 %.
#i. Which month has the highest sales?
# december and november.
#j. Forecast it for the next year. What are the values
forecasts = fit %>% forecast(h=12)
forecasts## # A fable: 12 x 4 [1M]
## # Key: .model [1]
## .model MONTHLY
## <chr> <mth>
## 1 TSLM(sales ~ trend() + season()) 1998 Jan
## 2 TSLM(sales ~ trend() + season()) 1998 Feb
## 3 TSLM(sales ~ trend() + season()) 1998 Mar
## 4 TSLM(sales ~ trend() + season()) 1998 Apr
## 5 TSLM(sales ~ trend() + season()) 1998 May
## 6 TSLM(sales ~ trend() + season()) 1998 Jun
## 7 TSLM(sales ~ trend() + season()) 1998 Jul
## 8 TSLM(sales ~ trend() + season()) 1998 Aug
## 9 TSLM(sales ~ trend() + season()) 1998 Sep
## 10 TSLM(sales ~ trend() + season()) 1998 Oct
## 11 TSLM(sales ~ trend() + season()) 1998 Nov
## 12 TSLM(sales ~ trend() + season()) 1998 Dec
## # … with 2 more variables: sales <dist>, .mean <dbl>
#k. Plot the forecast with original data.
fit %>% forecast(h=12) %>% autoplot(mydata) forecasts %>% autoplot(mydata)#l. Check if the residuals of the model is white noise.
fit %>% gg_tsresiduals # it is autocorrelation at 1 lag.
### Answ: not white noise.
#m By using the regression model, check the accuracy of the forecast. What is MSE, RMSE values?
accuracy(fit)## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 TSLM(sales ~ tren… Trai… -3.99e-16 72.1 59.6 -4.55 23.1 0.388 0.378 -0.0363