R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

# Load libraries
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(tsibble)
## Warning: package 'tsibble' was built under R version 4.3.2
## 
## Attaching package: 'tsibble'
## The following object is masked from 'package:lubridate':
## 
##     interval
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.2
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(modelr)
## Warning: package 'modelr' was built under R version 4.3.2
library(feasts)
## Warning: package 'feasts' was built under R version 4.3.2
## Loading required package: fabletools
## Warning: package 'fabletools' was built under R version 4.3.2
library(fpp3)
## Warning: package 'fpp3' was built under R version 4.3.2
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble      3.2.1     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.0     ✔ fable       0.3.3
## Warning: package 'tsibbledata' was built under R version 4.3.2
## Warning: package 'fable' was built under R version 4.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
# Read in data
bike_data <- read.csv('D:/dataset/db1bike.csv')

# Convert Date column to Date type
bike_data$Date <- as.Date(bike_data$Date, format = "%d-%m-%Y") 
duplicates <- bike_data %>% 
  dplyr::select(Date, Rented_Bike_Count) %>%
  duplicates()
## Using `Date` as index variable.
# Check if there are any duplicates
nrow(duplicates) > 0
## [1] TRUE
# Remove duplicate rows
# Remove duplicates based on Date column
bike_data <- bike_data[!duplicated(bike_data$Date),] 

# Create tsibble
bike_tsibble <- bike_data %>%
  dplyr::select(Date, Rented_Bike_Count) %>%
  as_tsibble(index = Date)
nrow(bike_data) == nrow(distinct(bike_data))
## [1] TRUE
# Plot full time series  
ggplot(bike_tsibble, aes(Date, Rented_Bike_Count)) +
  geom_line()

# Plot 2017
bike_tsibble %>%
  filter(year(Date) == 2017) %>%
  ggplot(aes(Date, Rented_Bike_Count)) +
  geom_line()

# Coefficient of 0.619 indicates upward trend 
# But trend only explains 3% of variance
# Seasonal decomposition
library(tibble)
library(tsibble)

# Convert back to tsibble 
bike_tsibble <- bike_data %>%
  as_tsibble(index = Date)

# Now plot 
gg_season(bike_tsibble)
## Plot variable not specified, automatically selected `y = Rented_Bike_Count`

# Strong seasonal component
gg_season(bike_tsibble)
## Plot variable not specified, automatically selected `y = Rented_Bike_Count`

# ACF plot
gg_lag(bike_tsibble, geom = "point")
## Plot variable not specified, automatically selected `y = Rented_Bike_Count`

# PACF plot 
gg_lag(bike_tsibble, type = "pacf", geom = "point") 
## Plot variable not specified, automatically selected `y = Rented_Bike_Count`
## Warning in geom_point(...): Ignoring unknown parameters: `type`

# The ACF and PACF plots confirm the strong seasonal component at lag 365 days