library(tidyverse)
library(rstatix)
library(fpp3)
library(naniar)
candy=read.csv("D:\\wallpapers and photos\\csv\\candy_production.csv")
library(lubridate)
vis_miss(candy)

candy_new=candy %>%
rename('production'='IPG3113N') %>% mutate(observation_date=yearmonth(observation_date))
cndy=candy_new %>%
as_tsibble(index =observation_date,regular = T)
cndy
## # A tsibble: 548 x 2 [1M]
## observation_date production
## <mth> <dbl>
## 1 1972 Jan 85.7
## 2 1972 Feb 71.8
## 3 1972 Mar 66.0
## 4 1972 Apr 64.6
## 5 1972 May 65.0
## 6 1972 Jun 67.6
## 7 1972 Jul 69.0
## 8 1972 Aug 70.8
## 9 1972 Sep 75.0
## 10 1972 Oct 107.
## # … with 538 more rows
cdy=candy_new %>%
tsibble(index =observation_date,regular = T) %>%
fill_gaps()
cndy
## # A tsibble: 548 x 2 [1M]
## observation_date production
## <mth> <dbl>
## 1 1972 Jan 85.7
## 2 1972 Feb 71.8
## 3 1972 Mar 66.0
## 4 1972 Apr 64.6
## 5 1972 May 65.0
## 6 1972 Jun 67.6
## 7 1972 Jul 69.0
## 8 1972 Aug 70.8
## 9 1972 Sep 75.0
## 10 1972 Oct 107.
## # … with 538 more rows
cndy %>%
autoplot(production)

cndy %>%
gg_season(production,labels = "both")

cndy %>%
gg_subseries(production)

cndy %>%
ACF(production) %>% autoplot()

dcmp=cndy %>%
model(stl=STL(production))
components(dcmp) %>% autoplot()

test=cndy %>%
filter(year(observation_date)>2010)
train=cndy %>%
filter(year(observation_date)<2010)
H=test %>%
length()
train
## # A tsibble: 456 x 2 [1M]
## observation_date production
## <mth> <dbl>
## 1 1972 Jan 85.7
## 2 1972 Feb 71.8
## 3 1972 Mar 66.0
## 4 1972 Apr 64.6
## 5 1972 May 65.0
## 6 1972 Jun 67.6
## 7 1972 Jul 69.0
## 8 1972 Aug 70.8
## 9 1972 Sep 75.0
## 10 1972 Oct 107.
## # … with 446 more rows
fit=train %>%
model(trend=TSLM(production~trend()+season())) %>%
forecast()
fit %>%
autoplot(train)

fit_1 =train %>%
model(
mean=MEAN(production),
naive=NAIVE(production),
snaive=SNAIVE(production),
drift=RW(production~drift())
) %>%
forecast(h="8 years")
fit_1 %>%
autoplot(train)

fit6=train %>%
model(stlf=decomposition_model(STL(production),NAIVE(season_adjust))) %>%
forecast(h=" 8 years")
fit6 %>%
autoplot(train)

accuracy(fit,cndy)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 trend Test -18.8 19.7 18.8 -18.7 18.7 3.74 2.96 0.654
train %>%
model(stlf=decomposition_model(STL(production),NAIVE(season_adjust))) %>%
gg_tsresiduals()

fit7=train %>%
model(ETS(production))
fit7 %>%
report()
## Series: production
## Model: ETS(A,N,A)
## Smoothing parameters:
## alpha = 0.5772439
## gamma = 0.2617478
##
## Initial states:
## l[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5] s[-6]
## 79.90465 22.93952 27.03025 19.34519 -5.877565 -9.032948 -9.927101 -9.638709
## s[-7] s[-8] s[-9] s[-10] s[-11]
## -13.23402 -15.45266 -10.59256 -3.604592 8.045193
##
## sigma^2: 15.0567
##
## AIC AICc BIC
## 4044.229 4045.320 4106.066
fit7 %>% forecast(h="7.8 years") %>% autoplot(train)

pred=fit7 %>% forecast(h="7 years")
accuracy(pred,cndy)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(production) Test 0.746 5.62 4.50 0.444 4.33 0.894 0.844 0.731
train %>%
mutate(production=difference(log(production),12) %>% difference(1)) %>%
features(production,unitroot_kpss)
## # A tibble: 1 × 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 0.0270 0.1
train %>%
mutate(production=difference(log(production),12) %>% difference(1)) %>%
gg_tsdisplay()

#features(production,unitroot_ndiffs)
train %>%
mutate(production=difference(log(production),12) %>% difference(1)) %>%
ACF(lag_max = 50) %>%
autoplot()

train %>%
mutate(production=difference(log(production),12) %>% difference(1)) %>%
PACF(lag_max = 50) %>%
autoplot()

fit8=train %>%
model(ARIMA(production,stepwise = F,approximation = F)) %>%
forecast(h="8 years")
fit8 %>%
autoplot(train)

train %>%
model(ARIMA(production,stepwise = F,approximation = F)) %>%
report()
## Series: production
## Model: ARIMA(3,1,2)(0,1,1)[12]
##
## Coefficients:
## ar1 ar2 ar3 ma1 ma2 sma1
## 0.1307 0.3908 0.2716 -0.4651 -0.5056 -0.6655
## s.e. 0.1446 0.1090 0.0497 0.1459 0.1391 0.0420
##
## sigma^2 estimated as 13.69: log likelihood=-1209.63
## AIC=2433.27 AICc=2433.53 BIC=2461.92
train %>%
model(ARIMA(production,stepwise = F,approximation = F)) %>%
gg_tsresiduals()

accuracy(fit8,cndy)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ARIMA(production, stepw… Test 11.9 15.1 12.3 11.2 11.6 2.45 2.27 0.853
accuracy(fit8,cndy)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ARIMA(production, stepw… Test 11.9 15.1 12.3 11.2 11.6 2.45 2.27 0.853
fit9=train %>%
model(ARIMA(production)) %>%
forecast(h="8 years")
fit9%>%
autoplot(train)

train %>%
model(ARIMA(production)) %>%
report()
## Series: production
## Model: ARIMA(1,1,2)(0,1,2)[12]
##
## Coefficients:
## ar1 ma1 ma2 sma1 sma2
## -0.6247 0.3294 -0.3375 -0.5802 -0.1393
## s.e. 0.1929 0.1871 0.0570 0.0507 0.0493
##
## sigma^2 estimated as 13.85: log likelihood=-1212.28
## AIC=2436.56 AICc=2436.76 BIC=2461.13
train %>%
model(ARIMA(production)) %>%
gg_tsresiduals()

accuracy(fit9,cndy)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ARIMA(production) Test 5.41 9.00 6.79 4.98 6.38 1.35 1.35 0.812