library(tidyverse)
library(rstatix)
library(fpp3)
library(naniar)
candy=read.csv("D:\\wallpapers and photos\\csv\\candy_production.csv")
library(lubridate)
vis_miss(candy)

candy_new=candy %>% 
  rename('production'='IPG3113N') %>%  mutate(observation_date=yearmonth(observation_date))
  




cndy=candy_new %>%
as_tsibble(index =observation_date,regular = T) 

cndy
## # A tsibble: 548 x 2 [1M]
##    observation_date production
##               <mth>      <dbl>
##  1         1972 Jan       85.7
##  2         1972 Feb       71.8
##  3         1972 Mar       66.0
##  4         1972 Apr       64.6
##  5         1972 May       65.0
##  6         1972 Jun       67.6
##  7         1972 Jul       69.0
##  8         1972 Aug       70.8
##  9         1972 Sep       75.0
## 10         1972 Oct      107. 
## # … with 538 more rows
cdy=candy_new %>%
tsibble(index =observation_date,regular = T) %>% 
  fill_gaps()
cndy
## # A tsibble: 548 x 2 [1M]
##    observation_date production
##               <mth>      <dbl>
##  1         1972 Jan       85.7
##  2         1972 Feb       71.8
##  3         1972 Mar       66.0
##  4         1972 Apr       64.6
##  5         1972 May       65.0
##  6         1972 Jun       67.6
##  7         1972 Jul       69.0
##  8         1972 Aug       70.8
##  9         1972 Sep       75.0
## 10         1972 Oct      107. 
## # … with 538 more rows
cndy %>% 
  autoplot(production)

cndy %>% 
  gg_season(production,labels = "both")

cndy %>% 
  gg_subseries(production)

cndy %>% 
  ACF(production) %>% autoplot()

dcmp=cndy %>% 
  model(stl=STL(production))
components(dcmp) %>% autoplot()

test=cndy %>% 
  filter(year(observation_date)>2010)
train=cndy %>% 
  filter(year(observation_date)<2010)
H=test %>% 
  length()
train
## # A tsibble: 456 x 2 [1M]
##    observation_date production
##               <mth>      <dbl>
##  1         1972 Jan       85.7
##  2         1972 Feb       71.8
##  3         1972 Mar       66.0
##  4         1972 Apr       64.6
##  5         1972 May       65.0
##  6         1972 Jun       67.6
##  7         1972 Jul       69.0
##  8         1972 Aug       70.8
##  9         1972 Sep       75.0
## 10         1972 Oct      107. 
## # … with 446 more rows
fit=train %>% 
  model(trend=TSLM(production~trend()+season())) %>% 
  forecast()
fit %>%
  autoplot(train)

fit_1 =train %>% 
  model(
    mean=MEAN(production),
    naive=NAIVE(production),
    snaive=SNAIVE(production),
    drift=RW(production~drift())
  ) %>% 
  forecast(h="8 years")


fit_1 %>% 
  
  
  autoplot(train) 

fit6=train %>% 
  model(stlf=decomposition_model(STL(production),NAIVE(season_adjust))) %>% 
  
  forecast(h=" 8 years") 
fit6 %>% 
  autoplot(train)

accuracy(fit,cndy)
## # A tibble: 1 × 10
##   .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 trend  Test  -18.8  19.7  18.8 -18.7  18.7  3.74  2.96 0.654
train %>% 
  model(stlf=decomposition_model(STL(production),NAIVE(season_adjust))) %>% 
  gg_tsresiduals()

fit7=train %>% 
  model(ETS(production))
fit7 %>% 
  report()
## Series: production 
## Model: ETS(A,N,A) 
##   Smoothing parameters:
##     alpha = 0.5772439 
##     gamma = 0.2617478 
## 
##   Initial states:
##      l[0]     s[0]    s[-1]    s[-2]     s[-3]     s[-4]     s[-5]     s[-6]
##  79.90465 22.93952 27.03025 19.34519 -5.877565 -9.032948 -9.927101 -9.638709
##      s[-7]     s[-8]     s[-9]    s[-10]   s[-11]
##  -13.23402 -15.45266 -10.59256 -3.604592 8.045193
## 
##   sigma^2:  15.0567
## 
##      AIC     AICc      BIC 
## 4044.229 4045.320 4106.066
fit7 %>% forecast(h="7.8 years") %>% autoplot(train)

pred=fit7 %>% forecast(h="7 years")
accuracy(pred,cndy)
## # A tibble: 1 × 10
##   .model          .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>           <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(production) Test  0.746  5.62  4.50 0.444  4.33 0.894 0.844 0.731
train %>% 
  mutate(production=difference(log(production),12) %>% difference(1)) %>% 
  features(production,unitroot_kpss) 
## # A tibble: 1 × 2
##   kpss_stat kpss_pvalue
##       <dbl>       <dbl>
## 1    0.0270         0.1
train %>% 
  mutate(production=difference(log(production),12) %>% difference(1)) %>% 
  gg_tsdisplay()

  #features(production,unitroot_ndiffs) 
train %>% 
  mutate(production=difference(log(production),12) %>% difference(1)) %>% 
  ACF(lag_max = 50) %>% 
  autoplot()

train %>% 
  mutate(production=difference(log(production),12) %>% difference(1)) %>% 
  PACF(lag_max = 50) %>% 
  autoplot()

fit8=train %>% 
  model(ARIMA(production,stepwise = F,approximation = F)) %>% 
  forecast(h="8 years")
fit8 %>% 
  autoplot(train)

train %>% 
  model(ARIMA(production,stepwise = F,approximation = F)) %>% 
  report()
## Series: production 
## Model: ARIMA(3,1,2)(0,1,1)[12] 
## 
## Coefficients:
##          ar1     ar2     ar3      ma1      ma2     sma1
##       0.1307  0.3908  0.2716  -0.4651  -0.5056  -0.6655
## s.e.  0.1446  0.1090  0.0497   0.1459   0.1391   0.0420
## 
## sigma^2 estimated as 13.69:  log likelihood=-1209.63
## AIC=2433.27   AICc=2433.53   BIC=2461.92
train %>% 
  model(ARIMA(production,stepwise = F,approximation = F)) %>% 
gg_tsresiduals()

accuracy(fit8,cndy)
## # A tibble: 1 × 10
##   .model                   .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>                    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ARIMA(production, stepw… Test   11.9  15.1  12.3  11.2  11.6  2.45  2.27 0.853
accuracy(fit8,cndy)
## # A tibble: 1 × 10
##   .model                   .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>                    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ARIMA(production, stepw… Test   11.9  15.1  12.3  11.2  11.6  2.45  2.27 0.853
fit9=train %>% 
  model(ARIMA(production)) %>% 
  forecast(h="8 years")
fit9%>% 
  autoplot(train)

train %>% 
  model(ARIMA(production)) %>% 
  report()
## Series: production 
## Model: ARIMA(1,1,2)(0,1,2)[12] 
## 
## Coefficients:
##           ar1     ma1      ma2     sma1     sma2
##       -0.6247  0.3294  -0.3375  -0.5802  -0.1393
## s.e.   0.1929  0.1871   0.0570   0.0507   0.0493
## 
## sigma^2 estimated as 13.85:  log likelihood=-1212.28
## AIC=2436.56   AICc=2436.76   BIC=2461.13
train %>% 
  model(ARIMA(production)) %>% 
gg_tsresiduals()

accuracy(fit9,cndy)
## # A tibble: 1 × 10
##   .model            .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>             <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ARIMA(production) Test   5.41  9.00  6.79  4.98  6.38  1.35  1.35 0.812