This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
# Load libraries
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(tsibble)
## Warning: package 'tsibble' was built under R version 4.3.2
##
## Attaching package: 'tsibble'
## The following object is masked from 'package:lubridate':
##
## interval
## The following objects are masked from 'package:base':
##
## intersect, setdiff, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.2
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(modelr)
## Warning: package 'modelr' was built under R version 4.3.2
library(feasts)
## Warning: package 'feasts' was built under R version 4.3.2
## Loading required package: fabletools
## Warning: package 'fabletools' was built under R version 4.3.2
library(fpp3)
## Warning: package 'fpp3' was built under R version 4.3.2
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble 3.2.1 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.0 ✔ fable 0.3.3
## Warning: package 'tsibbledata' was built under R version 4.3.2
## Warning: package 'fable' was built under R version 4.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
# Read in data
bike_data <- read.csv('D:/dataset/db1bike.csv')
# Convert Date column to Date type
bike_data$Date <- as.Date(bike_data$Date, format = "%d-%m-%Y")
duplicates <- bike_data %>%
dplyr::select(Date, Rented_Bike_Count) %>%
duplicates()
## Using `Date` as index variable.
# Check if there are any duplicates
nrow(duplicates) > 0
## [1] TRUE
# Remove duplicate rows
# Remove duplicates based on Date column
bike_data <- bike_data[!duplicated(bike_data$Date),]
# Create tsibble
bike_tsibble <- bike_data %>%
dplyr::select(Date, Rented_Bike_Count) %>%
as_tsibble(index = Date)
nrow(bike_data) == nrow(distinct(bike_data))
## [1] TRUE
# Plot full time series
ggplot(bike_tsibble, aes(Date, Rented_Bike_Count)) +
geom_line()
# Plot 2017
bike_tsibble %>%
filter(year(Date) == 2017) %>%
ggplot(aes(Date, Rented_Bike_Count)) +
geom_line()
# Coefficient of 0.619 indicates upward trend
# But trend only explains 3% of variance
# Seasonal decomposition
library(tibble)
library(tsibble)
# Convert back to tsibble
bike_tsibble <- bike_data %>%
as_tsibble(index = Date)
# Now plot
gg_season(bike_tsibble)
## Plot variable not specified, automatically selected `y = Rented_Bike_Count`
# Strong seasonal component
gg_season(bike_tsibble)
## Plot variable not specified, automatically selected `y = Rented_Bike_Count`
# ACF plot
gg_lag(bike_tsibble, geom = "point")
## Plot variable not specified, automatically selected `y = Rented_Bike_Count`
# PACF plot
gg_lag(bike_tsibble, type = "pacf", geom = "point")
## Plot variable not specified, automatically selected `y = Rented_Bike_Count`
## Warning in geom_point(...): Ignoring unknown parameters: `type`
# The ACF and PACF plots confirm the strong seasonal component at lag 365 days