Base Chunks
Libraries
## Loading required package: dplyr
## Warning: package 'dplyr' was built under R version 4.3.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Warning: package 'fpp3' was built under R version 4.3.2
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble 3.2.1 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.0 ✔ feasts 0.3.2
## ✔ lubridate 1.9.3 ✔ fable 0.3.3
## ✔ ggplot2 3.5.1 ✔ fabletools 0.4.2
## ✔ tsibble 1.1.4
## Warning: package 'tibble' was built under R version 4.3.2
## Warning: package 'tidyr' was built under R version 4.3.2
## Warning: package 'lubridate' was built under R version 4.3.2
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tsibble' was built under R version 4.3.3
## Warning: package 'tsibbledata' was built under R version 4.3.2
## Warning: package 'feasts' was built under R version 4.3.3
## Warning: package 'fabletools' was built under R version 4.3.3
## Warning: package 'fable' was built under R version 4.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
## Loading required package: fredr
## Warning: package 'fredr' was built under R version 4.3.2
require(ggplot2)
require(httr)
## Loading required package: httr
## Warning: package 'httr' was built under R version 4.3.2
## Warning: package 'readxl' was built under R version 4.3.2
## Loading required package: tidyverse
## Warning: package 'tidyverse' was built under R version 4.3.2
## Warning: package 'readr' was built under R version 4.3.2
## Warning: package 'purrr' was built under R version 4.3.2
## Warning: package 'stringr' was built under R version 4.3.2
## Warning: package 'forcats' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.4
## ✔ purrr 1.0.2 ✔ stringr 1.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
API Keys (Hidden)
Load Data
setwd('c:/users/lfult/downloads/climate')
all=read.csv('all.csv')
all$date=as.Date(all$date, "%m/%d/%y")
train=all[1:1461,]
test=all[1462:nrow(all),]
Convert to tsibble
all$date=ymd(all$date)
train$date=ymd(train$date)
test$date=ymd(test$date)
ts=as_tsibble(all, index = date)
train=as_tsibble(train, index=date)
test=as_tsibble(test, index=date)
all2=all
all2$date=yearmonth(all2$date)
all2=all2%>% group_by(yearmonth(date)) %>%
summarise(across(everything(), ~ mean(.x, na.rm = TRUE)))
ts2=as_tsibble(all2, index = date)
train2=as_tsibble(all2[1:36,],index = date)
test2=as_tsibble(all2[37:52,],index = date)
Plots
## Plot variable not specified, automatically selected `.vars = meantemp`

ts%>%ACF(meantemp)%>%autoplot()

ts%>%PACF(meantemp)%>%autoplot()

ts2%>%ACF(meantemp)%>%autoplot()

ts2%>%PACF(meantemp)%>%autoplot()

Build Models
m1=train %>%model(ARIMA(meantemp))
## Warning in sqrt(diag(best$var.coef)): NaNs produced
m2=train %>% model(ETS(meantemp))
m1a=train2 %>%model(ARIMA(meantemp))
m2a=train2 %>% model(ETS(meantemp))
Forecast
f1=m1 %>% forecast(test)
f2=m2 %>% forecast(test)
f3=fitted(m1, h=12) #12 month forward forecasts
f4=fitted(m2,h=12) #12 month forward forecasts
f1a=m1a %>% forecast(test2)
f2a=m2a %>% forecast(test2)
f3a=fitted(m1a, h=12) #12 month forward forecasts
f4a=fitted(m2a,h=12) #12 month forward forecasts
Plot
ts |>
autoplot(meantemp) +
autolayer(f3, .fitted, col = "red") +
#autolayer(f4, .fitted, col = "blue") +
autolayer(f1, color='pink')+
#autolayer(f2, color='cyan')+
labs(title = "Daily Temp, New Delhi", y = "Temp")
## Warning: Removed 13 rows containing missing values or values outside the scale range
## (`geom_line()`).

ts |>
autoplot(meantemp) +
#autolayer(f3, .fitted, col = "red") +
autolayer(f4, .fitted, col = "blue") +
#autolayer(f1, color='pink')+
autolayer(f2, color='cyan')+
labs(title = "Daily Temp, New Delhi", y = "Temp")
## Warning: Removed 11 rows containing missing values or values outside the scale range
## (`geom_line()`).

ts2 |>
autoplot(meantemp) +
autolayer(f3a, .fitted, col = "red") +
#autolayer(f4, .fitted, col = "blue") +
autolayer(f1a, color='pink')+
#autolayer(f2, color='cyan')+
labs(title = "Monthly Temp, New Delhi", y = "Temp")
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_line()`).

ts2 |>
autoplot(meantemp) +
#autolayer(f3, .fitted, col = "red") +
autolayer(f4a, .fitted, col = "blue") +
#autolayer(f1, color='pink')+
autolayer(f2a, color='cyan')+
labs(title = "Monthly Temp, New Delhi", y = "Temp")
## Warning: Removed 11 rows containing missing values or values outside the scale range
## (`geom_line()`).

Accuracy on Test Set
a1=accuracy(f1, test)
a2=accuracy(f2, test)
rbind(a1, a2)
## # A tibble: 2 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ARIMA(meantemp) Test 6.15 8.82 6.67 22.0 26.2 NaN NaN 0.950
## 2 ETS(meantemp) Test 6.80 9.29 7.14 25.3 28.1 NaN NaN 0.949
a1a=accuracy(f1a, test2)
a2a=accuracy(f2a, test2)
rbind(a1a, a2a)
## # A tibble: 2 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ARIMA(meantemp) Test 2.07 2.49 2.16 8.80 9.09 NaN NaN 0.540
## 2 ETS(meantemp) Test 2.17 2.46 2.24 9.37 9.59 NaN NaN 0.471
Report
## Series: meantemp
## Model: ARIMA(1,1,4)(1,0,0)[7]
##
## Coefficients:
## ar1 ma1 ma2 ma3 ma4 sar1
## -0.8361 0.613 -0.3139 -0.2566 -0.1283 -0.0427
## s.e. NaN NaN NaN NaN NaN 0.0270
##
## sigma^2 estimated as 2.571: log likelihood=-2757.96
## AIC=5529.91 AICc=5529.99 BIC=5566.92
## # A mable: 1 x 1
## `ARIMA(meantemp)`
## <model>
## 1 <ARIMA(1,1,4)(1,0,0)[7]>
## Series: meantemp
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.7807523
##
## Initial states:
## l[0]
## 9.436231
##
## sigma^2: 2.6849
##
## AIC AICc BIC
## 12093.06 12093.08 12108.92
## # A mable: 1 x 1
## `ETS(meantemp)`
## <model>
## 1 <ETS(A,N,N)>
## Series: meantemp
## Model: ARIMA(0,0,0)(1,1,0)[12]
##
## Coefficients:
## sar1
## -0.7318
## s.e. 0.1342
##
## sigma^2 estimated as 0.9364: log likelihood=-37.35
## AIC=78.7 AICc=79.28 BIC=81.06
## # A mable: 1 x 1
## `ARIMA(meantemp)`
## <model>
## 1 <ARIMA(0,0,0)(1,1,0)[12]>
## Series: meantemp
## Model: ETS(A,N,A)
## Smoothing parameters:
## alpha = 0.0001196536
## gamma = 0.0002627982
##
## Initial states:
## l[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5] s[-6]
## 24.92901 -9.842285 -5.097958 1.520274 4.864411 5.479954 6.49505 8.685293
## s[-7] s[-8] s[-9] s[-10] s[-11]
## 7.787554 3.320113 -3.111843 -7.790092 -12.31047
##
## sigma^2: 0.9448
##
## AIC AICc BIC
## 139.2326 163.2326 162.9854
## # A mable: 1 x 1
## `ETS(meantemp)`
## <model>
## 1 <ETS(A,N,A)>
Plot
# Plot the forecasts
f1df <- as.data.frame(f1)
f2df <- as.data.frame(f2)
tsdf <- as.data.frame(ts)
# Plot using ggplot2
ggplot(tsdf, aes(x = date, y = meantemp)) +
geom_line(color = "black", linewidth = .5) +
geom_line(data = f1df, aes(x = date, y = .mean, color = "ARIMA Forecast"), linetype = "dashed") +
geom_line(data = f2df, aes(x = date, y = .mean, color = "ETS Forecast"), linetype = "dotted") +
ggtitle("Forecast Comparison") +
xlab("Year") +
ylab("Value") +
scale_color_manual(name = "Forecast",
values = c("ARIMA Forecast" = "blue", "ETS Forecast" = "red")) +
theme_minimal() +
theme(legend.position = "bottom")

f1adf <- as.data.frame(f1a)
f2adf <- as.data.frame(f2a)
tsadf <- as.data.frame(ts2)
# Plot using ggplot2
ggplot(tsadf, aes(x = date, y = meantemp)) +
geom_line(color = "black", linewidth = .5) +
geom_line(data = f1adf, aes(x = date, y = .mean, color = "ARIMA Forecast"), linetype = "dashed") +
geom_line(data = f2adf, aes(x = date, y = .mean, color = "ETS Forecast"), linetype = "dotted") +
ggtitle("Forecast Comparison") +
xlab("Year") +
ylab("Value") +
scale_color_manual(name = "Forecast",
values = c("ARIMA Forecast" = "blue", "ETS Forecast" = "red")) +
theme_minimal() +
theme(legend.position = "bottom")
