suppressWarnings({
  library(fpp3)
  library(dplyr)
  library(tidyr)
  library(fable)
  library(fabletools)
  library(lubridate)
  library(VIM)
})

## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr

## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.0 ──

## ✔ tibble      3.2.1     ✔ tsibble     1.1.5
## ✔ dplyr       1.1.2     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.0     ✔ feasts      0.3.2
## ✔ lubridate   1.9.2     ✔ fable       0.3.4
## ✔ ggplot2     3.5.1     ✔ fabletools  0.4.2

## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()

## Loading required package: colorspace

## Loading required package: grid

## VIM is ready to use.

## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues

## 
## Attaching package: 'VIM'

## The following object is masked from 'package:datasets':
## 
##     sleep

Part 1: ATM Data

Task: “In part A, I want you to forecast how much cash is taken out of 4 different ATM machines for May 2010. The data is given in a single file. The variable ‘Cash’ is provided in hundreds of dollars, other than that it is straight forward. I am being somewhat ambiguous on purpose to make this have a little more business feeling. Explain and demonstrate your process, techniques used and not used, and your actual forecast. I am giving you data via an excel file, please provide your written report on your findings, visuals, discussion and your R code via an RPubs link along with the actual.rmd file Also please submit the forecast which you will put in an Excel readable file.”

Data Cleaning and Exploratory Data Analysis

atm_url <- "https://raw.githubusercontent.com/mollysiebecker/DATA_624/refs/heads/main/ATM624Data.csv"
atm <- read.csv(atm_url)

# drop unnecessary columns
atm <- atm[,1:3]

summary(atm)

##      DATE               ATM                 Cash        
##  Length:1474        Length:1474        Min.   :    0.0  
##  Class :character   Class :character   1st Qu.:    0.5  
##  Mode  :character   Mode  :character   Median :   73.0  
##                                        Mean   :  155.6  
##                                        3rd Qu.:  114.0  
##                                        Max.   :10920.0  
##                                        NA's   :19

unique(atm$ATM)

## [1] "ATM1" "ATM2" ""     "ATM3" "ATM4"

There is some data that is missing the ATM variable, and there are 19 NA’s in the Cash variable.

# eliminate redundant time stamp from date column
atm$DATE <- as.Date(mdy_hms(atm$DATE))

From inspecting the data, we can see that all 4 ATMs have data up until the end of April 2010. There is some missing data for the first half of May 2010, but both the ATM and the cash amount are missing. Therefore I think it is reasonable to eliminate these 14 rows, since with both variables missing, we could not reasonably use imputation. Instead, I will forecast for the month that includes those two missing weeks.

# drop missing rows from May 2010
atm <- atm[-c(731:744),]

summary(atm)

##       DATE                ATM                 Cash        
##  Min.   :2009-05-01   Length:1460        Min.   :    0.0  
##  1st Qu.:2009-07-31   Class :character   1st Qu.:    0.5  
##  Median :2009-10-30   Mode  :character   Median :   73.0  
##  Mean   :2009-10-30                      Mean   :  155.6  
##  3rd Qu.:2010-01-29                      3rd Qu.:  114.0  
##  Max.   :2010-04-30                      Max.   :10920.0  
##                                          NA's   :5

unique(atm$ATM)

## [1] "ATM1" "ATM2" "ATM3" "ATM4"

ggplot(atm, aes(x = Cash)) +
  geom_boxplot(fill = "cornflowerblue", color = "black") +
  facet_wrap(~ ATM, scales = "free_x") +
  theme_minimal()

## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

ATMs 1 and 2 have roughly symmetric or slightly skewed data. ATM 3 is almost entirely 0, with three non-zero values at the end of the time series. ATM appears roughly symmetric, except for a significant outlier.

Before doing any transformations or imputation, I will pivot the data to a wide format.

atm <- pivot_wider(atm, names_from = "ATM", values_from = "Cash")
summary(atm)

##       DATE                 ATM1             ATM2             ATM3        
##  Min.   :2009-05-01   Min.   :  1.00   Min.   :  0.00   Min.   : 0.0000  
##  1st Qu.:2009-07-31   1st Qu.: 73.00   1st Qu.: 25.50   1st Qu.: 0.0000  
##  Median :2009-10-30   Median : 91.00   Median : 67.00   Median : 0.0000  
##  Mean   :2009-10-30   Mean   : 83.89   Mean   : 62.58   Mean   : 0.7206  
##  3rd Qu.:2010-01-29   3rd Qu.:108.00   3rd Qu.: 93.00   3rd Qu.: 0.0000  
##  Max.   :2010-04-30   Max.   :180.00   Max.   :147.00   Max.   :96.0000  
##                       NA's   :3        NA's   :2                         
##       ATM4      
##  Min.   :    2  
##  1st Qu.:  124  
##  Median :  404  
##  Mean   :  474  
##  3rd Qu.:  705  
##  Max.   :10920  
##

For the 5 remaining NA’s, I will use median imputation to fill in the missing data for ATM 1 and ATM 2 using the median of each column, since this should give a reasonable replacement/ estimate of those missing values.

atm1_med <- median(atm$ATM1, na.rm = TRUE)
atm$ATM1[is.na(atm$ATM1)] <- atm1_med

atm2_med <- median(atm$ATM2, na.rm = TRUE)
atm$ATM2[is.na(atm$ATM2)] <- atm2_med

summary(atm)

##       DATE                 ATM1             ATM2            ATM3        
##  Min.   :2009-05-01   Min.   :  1.00   Min.   :  0.0   Min.   : 0.0000  
##  1st Qu.:2009-07-31   1st Qu.: 73.00   1st Qu.: 26.0   1st Qu.: 0.0000  
##  Median :2009-10-30   Median : 91.00   Median : 67.0   Median : 0.0000  
##  Mean   :2009-10-30   Mean   : 83.95   Mean   : 62.6   Mean   : 0.7206  
##  3rd Qu.:2010-01-29   3rd Qu.:108.00   3rd Qu.: 93.0   3rd Qu.: 0.0000  
##  Max.   :2010-04-30   Max.   :180.00   Max.   :147.0   Max.   :96.0000  
##       ATM4      
##  Min.   :    2  
##  1st Qu.:  124  
##  Median :  404  
##  Mean   :  474  
##  3rd Qu.:  705  
##  Max.   :10920

I will also replace the outlier in ATM 4 with the median, to prevent the outlier from having an outsize effect on the model. The outlier only represents a single day, and it may be due to an error of representing the cash amount in dollars instead of hundreds of dollars.

atm4_med <- median(atm$ATM4, na.rm = TRUE)
atm$ATM4[atm$ATM4 == 10920] <- atm4_med

Next, I pivot the data longer again, convert to a tsibble, and visualize each time series.

atm <- pivot_longer(atm, cols = c("ATM1", "ATM2", "ATM3", "ATM4"), names_to = "ATM", values_to = "cash")

atm <- as_tsibble(atm, key = ATM, index = DATE)

atm |>
  ggplot(aes(x = DATE, y = cash, colour = ATM)) +
  geom_line() +
  facet_grid(ATM ~ ., scales = "free_y") +
  labs(title = "Cash Withdrawn at Four ATMs, May 2009 to April 2010",
       y = "Cash (Hundreds of Dollars)",
       x = "Date")

Fitting and Forecasting

I split the data into train and test sets to use for fitting models to ATMs 1, 2, and 4, using the first 10 months as the training set and the last 2 months as the test set.

atm_train <- atm |>
  filter(DATE < as.Date("2010-03-01"))

atm_test <- atm %>%
  filter(DATE >= as.Date("2010-03-01"))

ATM 1

Below, I fit both an ARIMA model and ETS model to the ATM 1 training set. I do not think the data requires a transformation such as a Box Cox transformation, since the variance appears relatively stable.

atm1_fit_arima <- atm_train |>
  filter(ATM == "ATM1") %>%
  model('arima' = ARIMA(cash))

atm1_fit_ets <- atm_train |>
  filter(ATM == "ATM1") %>%
  model('ets' = ETS(cash))

report(atm1_fit_arima)

## Series: cash 
## Model: ARIMA(0,0,1)(0,1,2)[7] 
## 
## Coefficients:
##          ma1     sma1     sma2
##       0.2069  -0.7431  -0.1752
## s.e.  0.0589   0.0618   0.0593
## 
## sigma^2 estimated as 577.5:  log likelihood=-1370.26
## AIC=2748.51   AICc=2748.65   BIC=2763.29

gg_tsresiduals(atm1_fit_arima)

report(atm1_fit_ets)

## Series: cash 
## Model: ETS(A,N,A) 
##   Smoothing parameters:
##     alpha = 0.07057582 
##     gamma = 0.0001126394 
## 
##   Initial states:
##      l[0]      s[0]    s[-1]    s[-2]    s[-3]    s[-4]    s[-5]    s[-6]
##  77.84934 -65.15314 -6.56159 22.45949 4.881169 16.59085 12.71855 15.06467
## 
##   sigma^2:  584.8749
## 
##      AIC     AICc      BIC 
## 3685.746 3686.496 3722.916

gg_tsresiduals(atm1_fit_ets)

For each model, the residuals appear to be white noise and approximately normally distributed, with a mean of 0. Next we can test the accuracy of each model on the test set.

atm1_fit_arima |> forecast(h = 61) |> accuracy(atm %>% filter(ATM == "ATM1"))

## # A tibble: 1 × 11
##   .model ATM   .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <chr>  <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 arima  ATM1  Test  -7.09  43.6  32.6 -343.  371.  1.72  1.47 -0.0533

atm1_fit_ets |> forecast(h = 61) |> accuracy(atm %>% filter(ATM == "ATM1"))

## # A tibble: 1 × 11
##   .model ATM   .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE     ACF1
##   <chr>  <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 ets    ATM1  Test  -15.9  51.6  38.2 -426.  451.  2.01  1.74 -0.00978

The ARIMA model is a better fit, due to its lower RMSE. Below I use this ARIMA model to forecast the next month.

atm1_fit_final <- atm |>
  filter(ATM == "ATM1") %>%
  model('arima' = ARIMA(cash))

atm1_fc <- atm1_fit_final %>%
  forecast(h = 31)

atm1_fc %>%
  autoplot(atm %>% filter(ATM == "ATM1"), level = NULL) +
  labs(title = "ATM 1 Forecast for May 2010",
       y = "Cash (hundreds of dollars)",
       x = "Date")

ATM 2

Below, I fit both an ARIMA model and ETS model to the ATM 2 training set. I do not think the data requires a transformation such as a Box Cox transformation, since the variance appears relatively stable.

atm2_fit_arima <- atm_train |>
  filter(ATM == "ATM2") %>%
  model('arima' = ARIMA(cash))

atm2_fit_ets <- atm_train |>
  filter(ATM == "ATM2") %>%
  model('ets' = ETS(cash))

report(atm2_fit_arima)

## Series: cash 
## Model: ARIMA(0,0,0)(0,1,1)[7] 
## 
## Coefficients:
##          sma1
##       -0.7993
## s.e.   0.0475
## 
## sigma^2 estimated as 644.5:  log likelihood=-1385.06
## AIC=2774.13   AICc=2774.17   BIC=2781.52

gg_tsresiduals(atm2_fit_arima)

report(atm2_fit_ets)

## Series: cash 
## Model: ETS(A,N,A) 
##   Smoothing parameters:
##     alpha = 0.004173448 
##     gamma = 0.1503045 
## 
##   Initial states:
##      l[0]      s[0]     s[-1]    s[-2]       s[-3]    s[-4]  s[-5]    s[-6]
##  71.56437 -59.97451 -43.77681 27.41279 -0.07127845 8.960524 26.995 40.45429
## 
##   sigma^2:  645.7035
## 
##      AIC     AICc      BIC 
## 3715.824 3716.575 3752.994

gg_tsresiduals(atm2_fit_ets)

For each model, the residuals appear to be white noise and approximately normally distributed, with a mean of 0. Next we can test the accuracy of each model on the test set.

atm2_fit_arima |> forecast(h = 61) |> accuracy(atm %>% filter(ATM == "ATM2"))

## # A tibble: 1 × 11
##   .model ATM   .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <chr>  <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 arima  ATM2  Test   1.39  43.9  40.0  -Inf   Inf  1.83  1.40 0.0416

atm2_fit_ets |> forecast(h = 61) |> accuracy(atm %>% filter(ATM == "ATM2"))

## # A tibble: 1 × 11
##   .model ATM   .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <chr>  <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 ets    ATM2  Test   1.17  48.4  44.0  -Inf   Inf  2.01  1.54 0.0319

The ARIMA model is a better fit, due to its lower RMSE. Below I use this ARIMA model to forecast the next month.

atm2_fit_final <- atm |>
  filter(ATM == "ATM2") %>%
  model('arima' = ARIMA(cash))

atm2_fc <- atm2_fit_final %>%
  forecast(h = 31)

atm2_fc %>%
  autoplot(atm %>% filter(ATM == "ATM2"), level = NULL) +
  labs(title = "ATM 2 Forecast for May 2010",
       y = "Cash (hundreds of dollars)",
       x = "Date")

ATM 3

I think the 0’s in the ATM 3 time series should be excluded, as it is likely that ATM 3 was only opened in the last three days that the data was recorded, and the 0’s will provide no meaningful insight into future cash withdrawals. Therefore there is insufficient data to do a train/test split, or to fit a complex model like ARIMA or ETS. Instead I will forecast the next month using the mean of the previous three days.

atm3_fit <- atm |>
  filter(ATM == "ATM3") %>%
  filter(DATE >= as.Date("2010-04-28")) %>%
  model('mean' = MEAN(cash))


atm3_fc <- atm3_fit %>%
  forecast(h = 31)

atm3_fc %>%
  autoplot(atm %>% filter(ATM == "ATM3"), level = NULL) +
  labs(title = "ATM 3 Forecast for May 2010",
       y = "Cash (hundreds of dollars)",
       x = "Date")

ATM 4

Below, I fit both an ARIMA model and ETS model to the ATM 4 training set. I do not think the data requires a transformation such as a Box Cox transformation, since the variance appears relatively stable.

atm4_fit_arima <- atm_train |>
  filter(ATM == "ATM4") %>%
  model('arima' = ARIMA(cash))

atm4_fit_ets <- atm_train |>
  filter(ATM == "ATM4") %>%
  model('ets' = ETS(cash))

report(atm4_fit_arima)

## Series: cash 
## Model: ARIMA(0,0,0)(1,0,0)[7] w/ mean 
## 
## Coefficients:
##         sar1  constant
##       0.1793  371.1252
## s.e.  0.0575   20.3080
## 
## sigma^2 estimated as 127468:  log likelihood=-2217.32
## AIC=4440.65   AICc=4440.73   BIC=4451.8

gg_tsresiduals(atm4_fit_arima)

report(atm4_fit_ets)

## Series: cash 
## Model: ETS(A,N,A) 
##   Smoothing parameters:
##     alpha = 0.00010001 
##     gamma = 0.0001000912 
## 
##   Initial states:
##      l[0]      s[0]     s[-1]    s[-2]    s[-3]   s[-4]    s[-5]    s[-6]
##  462.1316 -336.1958 -74.20708 84.35488 40.09682 110.683 61.92582 113.3424
## 
##   sigma^2:  111690.1
## 
##      AIC     AICc      BIC 
## 5282.379 5283.130 5319.550

gg_tsresiduals(atm4_fit_ets)

For each model, the residuals appear to be white noise and approximately normally distributed, with a mean of 0. Next we can test the accuracy of each model on the test set.

atm4_fit_arima |> forecast(h = 61) |> accuracy(atm %>% filter(ATM == "ATM4"))

## # A tibble: 1 × 11
##   .model ATM   .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <chr>  <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 arima  ATM4  Test  -41.9  293.  250. -802.  830. 0.708 0.635 -0.0918

atm4_fit_ets |> forecast(h = 61) |> accuracy(atm %>% filter(ATM == "ATM4"))

## # A tibble: 1 × 11
##   .model ATM   .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <chr>  <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 ets    ATM4  Test  -46.3  343.  292. -930.  969. 0.828 0.743 0.0482

The ARIMA model is a better fit, due to its lower RMSE. Below I use this ARIMA model to forecast the next month.

atm4_fit_final <- atm |>
  filter(ATM == "ATM4") %>%
  model('arima' = ARIMA(cash))

atm4_fc <- atm4_fit_final %>%
  forecast(h = 31)

atm4_fc %>%
  autoplot(atm %>% filter(ATM == "ATM4"), level = NULL) +
  labs(title = "ATM 4 Forecast for May 2010",
       y = "Cash (hundreds of dollars)",
       x = "Date")

Even though the ARIMA model had a lower RMSE, this forecast does not seem reasonable or accurate for the data (the variation is getting smaller in a way that looks almost harmonic), so I am going to try forecasting with the ETS model instead.

atm4_fit_final_ets <- atm |>
  filter(ATM == "ATM4") %>%
  model('ets' = ETS(cash))

atm4_fc_ets <- atm4_fit_final_ets %>%
  forecast(h = 31)

atm4_fc_ets %>%
  autoplot(atm %>% filter(ATM == "ATM4"), level = NULL) +
  labs(title = "ATM 4 Forecast for May 2010",
       y = "Cash (hundreds of dollars)",
       x = "Date")

atm_fc <- bind_rows(atm1_fc, atm2_fc, atm3_fc, atm4_fc_ets)

atm_fc <- atm_fc[,c(1,3,5)]

atm_fc <- pivot_wider(atm_fc, names_from = "ATM", values_from = ".mean")

write.csv(atm_fc, file = "/Users/mollysiebecker/DATA 624/project_1/siebecker_atm_fc.csv", row.names = FALSE)

Based on the forecasts, ATM 4 will continue to require more cash for withdrawal on a daily basis (daily forecasts exceed 500 dollars), than the other three ATMs, for which daily forecasts tend to stay below 100 dollars.

Part 2: Residential Power Usage Data

Task: “Part B consists of a simple dataset of residential power usage for January 1998 until December 2013. Your assignment is to model these data and a monthly forecast for 2014. The data is given in a single file. The variable ‘KWH’ is power consumption in Kilowatt hours, the rest is straight forward. Add this to your existing files above.”

power_url <- "https://raw.githubusercontent.com/mollysiebecker/DATA_624/refs/heads/main/ResidentialCustomerForecastLoad-624.csv"
power_usage <- read.csv(power_url)

# drop empty/ not useful columns
power_usage <- power_usage[,2:3]

summary(power_usage)

##    YYYY.MMM              KWH          
##  Length:192         Min.   :  770523  
##  Class :character   1st Qu.: 5429912  
##  Mode  :character   Median : 6283324  
##                     Mean   : 6502475  
##                     3rd Qu.: 7620524  
##                     Max.   :10655730  
##                     NA's   :1

# convert date into year-month format
power_usage <- power_usage %>%
  rename(Month = YYYY.MMM)
power_usage$Month <- yearmonth(power_usage$Month)

ggplot(power_usage, aes(x = KWH)) +
  geom_boxplot(fill = "cornflowerblue", color = "black") +
  theme_minimal()

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).

There is one outlier in the data, and one missing value, which is otherwise roughly symmetric.

power_usage <- as_tsibble(power_usage, index = Month)

autoplot(power_usage, KWH) +
  labs(title = "Monthly Power Usage, January 1998 to December 2013",
       y = "Power Consumption (Kilowatt Hours)",
       x = "Month")

Since the time series appears to have a strong seasonal component, I decided to replace the missing value with the value from the same month in the previous year. I chose to leave the outlier in the data set unchanged since as a monthly value, it is less likely it is due to an error or some other aberration that would make it easily disregarded.

suppressWarnings({
  power_usage$KWH[is.na(power_usage$KWH)] <- power_usage$KWH[power_usage$Month == as.Date("2007-09-01")]
})

power_usage |>
  model(
    classical_decomposition(KWH, type = "additive")
  ) |>
  components() |>
  autoplot() +
  labs(title = "Classical additive decomposition of power usage")

## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_line()`).

Below I split the data into train and test sets, fit ARIMA and ETS models to the training set, and test their accuracy against the test set.

suppressWarnings({
power_train <- power_usage |>
  filter(Month <= as.Date("2010-12-31"))

power_test <- power_usage %>%
  filter(Month > as.Date("2010-12-31"))
})

power_fit_arima <- power_train |>
  model('arima' = ARIMA(KWH))

power_fit_ets <- power_train |>
  model('ets' = ETS(KWH))

report(power_fit_arima)

## Series: KWH 
## Model: ARIMA(0,0,0)(0,1,2)[12] 
## 
## Coefficients:
##          sma1    sma2
##       -0.7823  0.2870
## s.e.   0.1087  0.1423
## 
## sigma^2 estimated as 6.421e+11:  log likelihood=-2164.66
## AIC=4335.31   AICc=4335.48   BIC=4344.22

gg_tsresiduals(power_fit_arima)

report(power_fit_ets)

## Series: KWH 
## Model: ETS(M,N,M) 
##   Smoothing parameters:
##     alpha = 0.04237548 
##     gamma = 0.0001005016 
## 
##   Initial states:
##     l[0]      s[0]    s[-1]     s[-2]    s[-3]    s[-4]    s[-5]     s[-6]
##  6137427 0.9223936 0.750095 0.8948627 1.207363 1.267996 1.194141 0.9755688
##      s[-7]     s[-8]     s[-9]   s[-10]   s[-11]
##  0.7661072 0.8158632 0.9250675 1.051925 1.228616
## 
##   sigma^2:  0.0136
## 
##      AIC     AICc      BIC 
## 5009.421 5012.850 5055.169

gg_tsresiduals(power_fit_ets)

power_fit_arima |> forecast(h = 36) |> accuracy(power_usage)

## # A tibble: 1 × 10
##   .model .type       ME     RMSE      MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr>    <dbl>    <dbl>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima  Test  1047509. 1643678. 1092006.  12.5  13.2  1.80  1.79 0.292

power_fit_ets |> forecast(h = 36) |> accuracy(power_usage)

## # A tibble: 1 × 10
##   .model .type       ME     RMSE      MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr>    <dbl>    <dbl>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ets    Test  1091008. 1387593. 1118115.  13.5  14.0  1.84  1.51 0.129

Since the ETS model has the lower RMSE, I will use this model to forecast the next year.

power_fit_final <- power_usage |>
  model('ets' = ETS(KWH))

power_fc <- power_fit_final %>%
  forecast(h = 12)

power_fc %>%
  autoplot(power_usage) +
  labs(title = "Power Usage Forecast",
       y = "KWH",
       x = "Month")

power_fc <- power_fc[,c(2,4)]

power_fc <- power_fc %>% rename(KWH = .mean)

write.csv(power_fc, file = "/Users/mollysiebecker/DATA 624/project_1/siebecker_power_fc.csv", row.names = FALSE)

Power consumption will continue to follow a slight upward trend and exhibit strong seasonality throughout 2014, with peaks in January and August, and lows in May and November.

DATA 624 Project 1

Molly Siebecker

2024-10-27

Part 1: ATM Data

Data Cleaning and Exploratory Data Analysis

Fitting and Forecasting

ATM 1

ATM 2

ATM 3

ATM 4

Part 2: Residential Power Usage Data