data_path="C:/Users/shanata/Downloads/Electric_Production.csv"
data <- read.csv(data_path)

Electricty Production dataset

Choose a column

I have chosen the column IPG2211A2N which indicates the electricity produced

Filtering

library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data_ <- data |>
  select(DATE, IPG2211A2N) |>
  distinct()

Creating a tsibble object

library(tsibble)
## Warning: package 'tsibble' was built under R version 4.3.2
## 
## Attaching package: 'tsibble'
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union
data$DATE <- as.Date(data$DATE, format = "%m/%d/%Y")
ts_data <- as_tsibble(data, key = IPG2211A2N, index = DATE)

Plotting data

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.2
ggplot(ts_data, aes(x = DATE, y = IPG2211A2N)) +
  geom_line() +
  labs(title = "Time Series Plot of IPG2211A2N",
       x = "Date",
       y = "IPG2211A2N") +
  theme_minimal()

We can see how the electricity consumption has increased over the years

Plotting over different windows of time

library(lubridate)
## Warning: package 'lubridate' was built under R version 4.3.2
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:tsibble':
## 
##     interval
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
ggplot(ts_data, aes(x = DATE, y = IPG2211A2N)) +
  geom_line() +
  facet_wrap(~ year(DATE), scales = "free_x", ncol = 1) +
  labs(title = "Time Series Plot of IPG2211A2N - Different Windows",
       x = "Date",
       y = "IPG2211A2N") +
  theme_minimal()
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

Using Linear regression

linear_model <- lm(IPG2211A2N ~ DATE, data = ts_data)
summary(linear_model)
## 
## Call:
## lm(formula = IPG2211A2N ~ DATE, data = ts_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -21.6166  -6.3156  -0.4768   5.5729  21.5213 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 4.660e+01  1.473e+00   31.64   <2e-16 ***
## DATE        3.672e-03  1.225e-04   29.97   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.514 on 395 degrees of freedom
## Multiple R-squared:  0.6946, Adjusted R-squared:  0.6938 
## F-statistic: 898.4 on 1 and 395 DF,  p-value: < 2.2e-16

The linear regression suggests a significant positive relationship between time (DATE) and the response variable (IPG2211A2N). The R-squared value indicates that the model explains a substantial proportion of the variability in the response variable. The p-values for both coefficients suggest that they are highly significant.

library(lmtest)
## Warning: package 'lmtest' was built under R version 4.3.2
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.3.2
## 
## Attaching package: 'zoo'
## The following object is masked from 'package:tsibble':
## 
##     index
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
coeftest(linear_model)
## 
## t test of coefficients:
## 
##               Estimate Std. Error t value  Pr(>|t|)    
## (Intercept) 46.6037120  1.4727232  31.645 < 2.2e-16 ***
## DATE         0.0036719  0.0001225  29.974 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

there is strong evidence to suggest that there is a linear relationship between the “DATE” variable and the response variable “IPG2211A2N.”

ggplot(ts_data, aes(x = DATE, y = IPG2211A2N)) +
  geom_line() +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  labs(title = "Time Series Plot with Linear Regression Line",
       x = "Date",
       y = "IPG2211A2N") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

library(fpp3)
## Warning: package 'fpp3' was built under R version 4.3.2
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble      3.2.1     ✔ feasts      0.3.1
## ✔ tidyr       1.3.0     ✔ fable       0.3.3
## ✔ tsibbledata 0.4.1     ✔ fabletools  0.3.4
## Warning: package 'tibble' was built under R version 4.3.2
## Warning: package 'tidyr' was built under R version 4.3.2
## Warning: package 'tsibbledata' was built under R version 4.3.2
## Warning: package 'feasts' was built under R version 4.3.2
## Warning: package 'fabletools' was built under R version 4.3.2
## Warning: package 'fable' was built under R version 4.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()     masks base::date()
## ✖ dplyr::filter()       masks stats::filter()
## ✖ zoo::index()          masks tsibble::index()
## ✖ tsibble::intersect()  masks base::intersect()
## ✖ lubridate::interval() masks tsibble::interval()
## ✖ dplyr::lag()          masks stats::lag()
## ✖ tsibble::setdiff()    masks base::setdiff()
## ✖ tsibble::union()      masks base::union()
library(fable)
data$DATE <- as.Date(data$DATE, format = "%m/%d/%Y")

# Create a ts object
ts_data <- ts(data$IPG2211A2N, frequency = 12, start = c(1985, 1))

# Decompose the time series to detect seasonality
stl_result <- stl(ts_data, s.window = "periodic")

# Plot the seasonal component
plot(stl_result$time.series[, "seasonal"], main = "Seasonal Component")

# Plot ACF
acf(ts_data)