data_path="C:/Users/shanata/Downloads/Electric_Production.csv"
data <- read.csv(data_path)
I have chosen the column IPG2211A2N which indicates the electricity produced
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data_ <- data |>
select(DATE, IPG2211A2N) |>
distinct()
library(tsibble)
## Warning: package 'tsibble' was built under R version 4.3.2
##
## Attaching package: 'tsibble'
## The following objects are masked from 'package:base':
##
## intersect, setdiff, union
data$DATE <- as.Date(data$DATE, format = "%m/%d/%Y")
ts_data <- as_tsibble(data, key = IPG2211A2N, index = DATE)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.2
ggplot(ts_data, aes(x = DATE, y = IPG2211A2N)) +
geom_line() +
labs(title = "Time Series Plot of IPG2211A2N",
x = "Date",
y = "IPG2211A2N") +
theme_minimal()
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.3.2
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:tsibble':
##
## interval
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
ggplot(ts_data, aes(x = DATE, y = IPG2211A2N)) +
geom_line() +
facet_wrap(~ year(DATE), scales = "free_x", ncol = 1) +
labs(title = "Time Series Plot of IPG2211A2N - Different Windows",
x = "Date",
y = "IPG2211A2N") +
theme_minimal()
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
linear_model <- lm(IPG2211A2N ~ DATE, data = ts_data)
summary(linear_model)
##
## Call:
## lm(formula = IPG2211A2N ~ DATE, data = ts_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.6166 -6.3156 -0.4768 5.5729 21.5213
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.660e+01 1.473e+00 31.64 <2e-16 ***
## DATE 3.672e-03 1.225e-04 29.97 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.514 on 395 degrees of freedom
## Multiple R-squared: 0.6946, Adjusted R-squared: 0.6938
## F-statistic: 898.4 on 1 and 395 DF, p-value: < 2.2e-16
The linear regression suggests a significant positive relationship between time (DATE) and the response variable (IPG2211A2N). The R-squared value indicates that the model explains a substantial proportion of the variability in the response variable. The p-values for both coefficients suggest that they are highly significant.
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.3.2
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.3.2
##
## Attaching package: 'zoo'
## The following object is masked from 'package:tsibble':
##
## index
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
coeftest(linear_model)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 46.6037120 1.4727232 31.645 < 2.2e-16 ***
## DATE 0.0036719 0.0001225 29.974 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
there is strong evidence to suggest that there is a linear relationship between the “DATE” variable and the response variable “IPG2211A2N.”
ggplot(ts_data, aes(x = DATE, y = IPG2211A2N)) +
geom_line() +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(title = "Time Series Plot with Linear Regression Line",
x = "Date",
y = "IPG2211A2N") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
library(fpp3)
## Warning: package 'fpp3' was built under R version 4.3.2
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble 3.2.1 ✔ feasts 0.3.1
## ✔ tidyr 1.3.0 ✔ fable 0.3.3
## ✔ tsibbledata 0.4.1 ✔ fabletools 0.3.4
## Warning: package 'tibble' was built under R version 4.3.2
## Warning: package 'tidyr' was built under R version 4.3.2
## Warning: package 'tsibbledata' was built under R version 4.3.2
## Warning: package 'feasts' was built under R version 4.3.2
## Warning: package 'fabletools' was built under R version 4.3.2
## Warning: package 'fable' was built under R version 4.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ zoo::index() masks tsibble::index()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ lubridate::interval() masks tsibble::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(fable)
data$DATE <- as.Date(data$DATE, format = "%m/%d/%Y")
# Create a ts object
ts_data <- ts(data$IPG2211A2N, frequency = 12, start = c(1985, 1))
# Decompose the time series to detect seasonality
stl_result <- stl(ts_data, s.window = "periodic")
# Plot the seasonal component
plot(stl_result$time.series[, "seasonal"], main = "Seasonal Component")
# Plot ACF
acf(ts_data)