# Load required packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(pacman)
# Install and load additional packages
p_load(tidyverse, lubridate, readxl, highcharter, tidyquant, timetk, tibbletime, scales, reshape2, TTR, readr)
# Set the path for the data file
path_bike_orderlines <- "bike_orderlines.rds"
# Import the data
bike_orderlines_tbl <- read_rds(path_bike_orderlines)
# View the structure of the data
glimpse(bike_orderlines_tbl)
## Rows: 15,644
## Columns: 13
## $ order_date <dttm> 2011-01-07, 2011-01-07, 2011-01-10, 2011-01-10, 2011-0…
## $ order_id <dbl> 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7…
## $ order_line <dbl> 1, 2, 1, 2, 1, 2, 3, 4, 5, 1, 1, 2, 3, 4, 1, 2, 3, 4, 1…
## $ quantity <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1…
## $ price <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570,…
## $ total_price <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570,…
## $ model <chr> "Jekyll Carbon 2", "Trigger Carbon 2", "Beast of the Ea…
## $ category_1 <chr> "Mountain", "Mountain", "Mountain", "Mountain", "Road",…
## $ category_2 <chr> "Over Mountain", "Over Mountain", "Trail", "Over Mounta…
## $ frame_material <chr> "Carbon", "Carbon", "Aluminum", "Carbon", "Carbon", "Ca…
## $ bikeshop_name <chr> "Ithaca Mountain Climbers", "Ithaca Mountain Climbers",…
## $ city <chr> "Ithaca", "Ithaca", "Kansas City", "Kansas City", "Loui…
## $ state <chr> "NY", "NY", "KS", "KS", "KY", "KY", "KY", "KY", "KY", "…
# Check the first few rows of data
head(bike_orderlines_tbl)
## # A tibble: 6 × 13
## order_date order_id order_line quantity price total_price model
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 2011-01-07 00:00:00 1 1 1 6070 6070 Jekyll Car…
## 2 2011-01-07 00:00:00 1 2 1 5970 5970 Trigger Ca…
## 3 2011-01-10 00:00:00 2 1 1 2770 2770 Beast of t…
## 4 2011-01-10 00:00:00 2 2 1 5970 5970 Trigger Ca…
## 5 2011-01-10 00:00:00 3 1 1 10660 10660 Supersix E…
## 6 2011-01-10 00:00:00 3 2 1 3200 3200 Jekyll Car…
## # ℹ 6 more variables: category_1 <chr>, category_2 <chr>, frame_material <chr>,
## # bikeshop_name <chr>, city <chr>, state <chr>
# Rename total_price to Sales, extract month, and calculate total sales per month
monthly_sales <- bike_orderlines_tbl %>%
rename(Sales = total_price) %>% # Rename total_price to Sales
mutate(Month = month(order_date, label = TRUE)) %>% # Extract month as a labeled factor
group_by(Month) %>%
summarise(Sales = sum(Sales, na.rm = TRUE)) %>% # Summarise monthly sales
arrange(desc(Sales)) # Sort by highest sales
# View monthly sales
monthly_sales
## # A tibble: 12 × 2
## Month Sales
## <ord> <dbl>
## 1 Apr 8386170
## 2 May 7935055
## 3 Jun 7813105
## 4 Jul 7602005
## 5 Mar 7282280
## 6 Sep 5556055
## 7 Aug 5346125
## 8 Feb 5343295
## 9 Oct 4394300
## 10 Nov 4169755
## 11 Jan 4089460
## 12 Dec 3114725
# Install and load necessary libraries
# Define the required packages
required_packages <- c(
"tidyverse", "lubridate", "readxl", "highcharter", "tidyquant",
"timetk", "tibbletime", "scales", "quantmod", "PerformanceAnalytics",
"reshape2", "TTR"
)
# Install any missing packages
new_packages <- required_packages[!(required_packages %in% installed.packages()[, "Package"])]
if(length(new_packages)) install.packages(new_packages)
# Load all the packages
lapply(required_packages, library, character.only = TRUE)
## [[1]]
## [1] "reshape2" "scales" "tibbletime"
## [4] "timetk" "PerformanceAnalytics" "quantmod"
## [7] "TTR" "xts" "zoo"
## [10] "tidyquant" "highcharter" "readxl"
## [13] "pacman" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr"
## [19] "readr" "tidyr" "tibble"
## [22] "ggplot2" "tidyverse" "stats"
## [25] "graphics" "grDevices" "utils"
## [28] "datasets" "methods" "base"
##
## [[2]]
## [1] "reshape2" "scales" "tibbletime"
## [4] "timetk" "PerformanceAnalytics" "quantmod"
## [7] "TTR" "xts" "zoo"
## [10] "tidyquant" "highcharter" "readxl"
## [13] "pacman" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr"
## [19] "readr" "tidyr" "tibble"
## [22] "ggplot2" "tidyverse" "stats"
## [25] "graphics" "grDevices" "utils"
## [28] "datasets" "methods" "base"
##
## [[3]]
## [1] "reshape2" "scales" "tibbletime"
## [4] "timetk" "PerformanceAnalytics" "quantmod"
## [7] "TTR" "xts" "zoo"
## [10] "tidyquant" "highcharter" "readxl"
## [13] "pacman" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr"
## [19] "readr" "tidyr" "tibble"
## [22] "ggplot2" "tidyverse" "stats"
## [25] "graphics" "grDevices" "utils"
## [28] "datasets" "methods" "base"
##
## [[4]]
## [1] "reshape2" "scales" "tibbletime"
## [4] "timetk" "PerformanceAnalytics" "quantmod"
## [7] "TTR" "xts" "zoo"
## [10] "tidyquant" "highcharter" "readxl"
## [13] "pacman" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr"
## [19] "readr" "tidyr" "tibble"
## [22] "ggplot2" "tidyverse" "stats"
## [25] "graphics" "grDevices" "utils"
## [28] "datasets" "methods" "base"
##
## [[5]]
## [1] "reshape2" "scales" "tibbletime"
## [4] "timetk" "PerformanceAnalytics" "quantmod"
## [7] "TTR" "xts" "zoo"
## [10] "tidyquant" "highcharter" "readxl"
## [13] "pacman" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr"
## [19] "readr" "tidyr" "tibble"
## [22] "ggplot2" "tidyverse" "stats"
## [25] "graphics" "grDevices" "utils"
## [28] "datasets" "methods" "base"
##
## [[6]]
## [1] "reshape2" "scales" "tibbletime"
## [4] "timetk" "PerformanceAnalytics" "quantmod"
## [7] "TTR" "xts" "zoo"
## [10] "tidyquant" "highcharter" "readxl"
## [13] "pacman" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr"
## [19] "readr" "tidyr" "tibble"
## [22] "ggplot2" "tidyverse" "stats"
## [25] "graphics" "grDevices" "utils"
## [28] "datasets" "methods" "base"
##
## [[7]]
## [1] "reshape2" "scales" "tibbletime"
## [4] "timetk" "PerformanceAnalytics" "quantmod"
## [7] "TTR" "xts" "zoo"
## [10] "tidyquant" "highcharter" "readxl"
## [13] "pacman" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr"
## [19] "readr" "tidyr" "tibble"
## [22] "ggplot2" "tidyverse" "stats"
## [25] "graphics" "grDevices" "utils"
## [28] "datasets" "methods" "base"
##
## [[8]]
## [1] "reshape2" "scales" "tibbletime"
## [4] "timetk" "PerformanceAnalytics" "quantmod"
## [7] "TTR" "xts" "zoo"
## [10] "tidyquant" "highcharter" "readxl"
## [13] "pacman" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr"
## [19] "readr" "tidyr" "tibble"
## [22] "ggplot2" "tidyverse" "stats"
## [25] "graphics" "grDevices" "utils"
## [28] "datasets" "methods" "base"
##
## [[9]]
## [1] "reshape2" "scales" "tibbletime"
## [4] "timetk" "PerformanceAnalytics" "quantmod"
## [7] "TTR" "xts" "zoo"
## [10] "tidyquant" "highcharter" "readxl"
## [13] "pacman" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr"
## [19] "readr" "tidyr" "tibble"
## [22] "ggplot2" "tidyverse" "stats"
## [25] "graphics" "grDevices" "utils"
## [28] "datasets" "methods" "base"
##
## [[10]]
## [1] "reshape2" "scales" "tibbletime"
## [4] "timetk" "PerformanceAnalytics" "quantmod"
## [7] "TTR" "xts" "zoo"
## [10] "tidyquant" "highcharter" "readxl"
## [13] "pacman" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr"
## [19] "readr" "tidyr" "tibble"
## [22] "ggplot2" "tidyverse" "stats"
## [25] "graphics" "grDevices" "utils"
## [28] "datasets" "methods" "base"
##
## [[11]]
## [1] "reshape2" "scales" "tibbletime"
## [4] "timetk" "PerformanceAnalytics" "quantmod"
## [7] "TTR" "xts" "zoo"
## [10] "tidyquant" "highcharter" "readxl"
## [13] "pacman" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr"
## [19] "readr" "tidyr" "tibble"
## [22] "ggplot2" "tidyverse" "stats"
## [25] "graphics" "grDevices" "utils"
## [28] "datasets" "methods" "base"
##
## [[12]]
## [1] "reshape2" "scales" "tibbletime"
## [4] "timetk" "PerformanceAnalytics" "quantmod"
## [7] "TTR" "xts" "zoo"
## [10] "tidyquant" "highcharter" "readxl"
## [13] "pacman" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr"
## [19] "readr" "tidyr" "tibble"
## [22] "ggplot2" "tidyverse" "stats"
## [25] "graphics" "grDevices" "utils"
## [28] "datasets" "methods" "base"
# Install and load necessary libraries
required_packages <- c(
"tidyverse", "lubridate", "readxl", "highcharter", "tidyquant",
"timetk", "tibbletime", "scales", "quantmod", "PerformanceAnalytics",
"reshape2", "TTR"
)
# Install any missing packages
new_packages <- required_packages[!(required_packages %in% installed.packages()[, "Package"])]
if(length(new_packages)) install.packages(new_packages)
# Load all the packages
lapply(required_packages, library, character.only = TRUE)
## [[1]]
## [1] "reshape2" "scales" "tibbletime"
## [4] "timetk" "PerformanceAnalytics" "quantmod"
## [7] "TTR" "xts" "zoo"
## [10] "tidyquant" "highcharter" "readxl"
## [13] "pacman" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr"
## [19] "readr" "tidyr" "tibble"
## [22] "ggplot2" "tidyverse" "stats"
## [25] "graphics" "grDevices" "utils"
## [28] "datasets" "methods" "base"
##
## [[2]]
## [1] "reshape2" "scales" "tibbletime"
## [4] "timetk" "PerformanceAnalytics" "quantmod"
## [7] "TTR" "xts" "zoo"
## [10] "tidyquant" "highcharter" "readxl"
## [13] "pacman" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr"
## [19] "readr" "tidyr" "tibble"
## [22] "ggplot2" "tidyverse" "stats"
## [25] "graphics" "grDevices" "utils"
## [28] "datasets" "methods" "base"
##
## [[3]]
## [1] "reshape2" "scales" "tibbletime"
## [4] "timetk" "PerformanceAnalytics" "quantmod"
## [7] "TTR" "xts" "zoo"
## [10] "tidyquant" "highcharter" "readxl"
## [13] "pacman" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr"
## [19] "readr" "tidyr" "tibble"
## [22] "ggplot2" "tidyverse" "stats"
## [25] "graphics" "grDevices" "utils"
## [28] "datasets" "methods" "base"
##
## [[4]]
## [1] "reshape2" "scales" "tibbletime"
## [4] "timetk" "PerformanceAnalytics" "quantmod"
## [7] "TTR" "xts" "zoo"
## [10] "tidyquant" "highcharter" "readxl"
## [13] "pacman" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr"
## [19] "readr" "tidyr" "tibble"
## [22] "ggplot2" "tidyverse" "stats"
## [25] "graphics" "grDevices" "utils"
## [28] "datasets" "methods" "base"
##
## [[5]]
## [1] "reshape2" "scales" "tibbletime"
## [4] "timetk" "PerformanceAnalytics" "quantmod"
## [7] "TTR" "xts" "zoo"
## [10] "tidyquant" "highcharter" "readxl"
## [13] "pacman" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr"
## [19] "readr" "tidyr" "tibble"
## [22] "ggplot2" "tidyverse" "stats"
## [25] "graphics" "grDevices" "utils"
## [28] "datasets" "methods" "base"
##
## [[6]]
## [1] "reshape2" "scales" "tibbletime"
## [4] "timetk" "PerformanceAnalytics" "quantmod"
## [7] "TTR" "xts" "zoo"
## [10] "tidyquant" "highcharter" "readxl"
## [13] "pacman" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr"
## [19] "readr" "tidyr" "tibble"
## [22] "ggplot2" "tidyverse" "stats"
## [25] "graphics" "grDevices" "utils"
## [28] "datasets" "methods" "base"
##
## [[7]]
## [1] "reshape2" "scales" "tibbletime"
## [4] "timetk" "PerformanceAnalytics" "quantmod"
## [7] "TTR" "xts" "zoo"
## [10] "tidyquant" "highcharter" "readxl"
## [13] "pacman" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr"
## [19] "readr" "tidyr" "tibble"
## [22] "ggplot2" "tidyverse" "stats"
## [25] "graphics" "grDevices" "utils"
## [28] "datasets" "methods" "base"
##
## [[8]]
## [1] "reshape2" "scales" "tibbletime"
## [4] "timetk" "PerformanceAnalytics" "quantmod"
## [7] "TTR" "xts" "zoo"
## [10] "tidyquant" "highcharter" "readxl"
## [13] "pacman" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr"
## [19] "readr" "tidyr" "tibble"
## [22] "ggplot2" "tidyverse" "stats"
## [25] "graphics" "grDevices" "utils"
## [28] "datasets" "methods" "base"
##
## [[9]]
## [1] "reshape2" "scales" "tibbletime"
## [4] "timetk" "PerformanceAnalytics" "quantmod"
## [7] "TTR" "xts" "zoo"
## [10] "tidyquant" "highcharter" "readxl"
## [13] "pacman" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr"
## [19] "readr" "tidyr" "tibble"
## [22] "ggplot2" "tidyverse" "stats"
## [25] "graphics" "grDevices" "utils"
## [28] "datasets" "methods" "base"
##
## [[10]]
## [1] "reshape2" "scales" "tibbletime"
## [4] "timetk" "PerformanceAnalytics" "quantmod"
## [7] "TTR" "xts" "zoo"
## [10] "tidyquant" "highcharter" "readxl"
## [13] "pacman" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr"
## [19] "readr" "tidyr" "tibble"
## [22] "ggplot2" "tidyverse" "stats"
## [25] "graphics" "grDevices" "utils"
## [28] "datasets" "methods" "base"
##
## [[11]]
## [1] "reshape2" "scales" "tibbletime"
## [4] "timetk" "PerformanceAnalytics" "quantmod"
## [7] "TTR" "xts" "zoo"
## [10] "tidyquant" "highcharter" "readxl"
## [13] "pacman" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr"
## [19] "readr" "tidyr" "tibble"
## [22] "ggplot2" "tidyverse" "stats"
## [25] "graphics" "grDevices" "utils"
## [28] "datasets" "methods" "base"
##
## [[12]]
## [1] "reshape2" "scales" "tibbletime"
## [4] "timetk" "PerformanceAnalytics" "quantmod"
## [7] "TTR" "xts" "zoo"
## [10] "tidyquant" "highcharter" "readxl"
## [13] "pacman" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr"
## [19] "readr" "tidyr" "tibble"
## [22] "ggplot2" "tidyverse" "stats"
## [25] "graphics" "grDevices" "utils"
## [28] "datasets" "methods" "base"
# Clear environment
rm(list = ls())
# Load and prepare data from TEJ.csv
# Make sure to convert to UTF-8 if necessary
etf3 <- read_csv("TEJ.csv", col_names = TRUE)
## Rows: 11817 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): CO_ID, CoName, Date
## dbl (1): Close
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(etf3)
## spc_tbl_ [11,817 × 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ CO_ID : chr [1:11817] "0050" "0052" "0056" "0050" ...
## $ CoName: chr [1:11817] "Yuanta Taiwan Top50" "FB Technology" "PTD" "Yuanta Taiwan Top50" ...
## $ Date : chr [1:11817] "1/2/2008" "1/2/2008" "1/2/2008" "1/3/2008" ...
## $ Close : num [1:11817] 34.7 22.7 10.2 34.1 21.8 ...
## - attr(*, "spec")=
## .. cols(
## .. CO_ID = col_character(),
## .. CoName = col_character(),
## .. Date = col_character(),
## .. Close = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
# Drop unnecessary columns and rename remaining ones for clarity
etf3 <- etf3[, -2]
colnames(etf3) <- c('id', 'date', 'close')
# Convert from long to wide format
etf3.wide <- dcast(etf3, date ~ id)
## Using close as value column: use value.var to override.
str(etf3.wide)
## 'data.frame': 3939 obs. of 4 variables:
## $ date: chr "1/10/2008" "1/10/2009" "1/10/2011" "1/10/2012" ...
## $ 0050: num 33.4 19.6 38.8 34.1 38 ...
## $ 0052: num 20.3 11.3 22.7 18.4 21 ...
## $ 0056: num 10.23 5.44 12.09 10.82 11.58 ...
head(etf3.wide)
## date 0050 0052 0056
## 1 1/10/2008 33.4171 20.3423 10.2307
## 2 1/10/2009 19.5773 11.2579 5.4431
## 3 1/10/2011 38.7933 22.6616 12.0925
## 4 1/10/2012 34.1033 18.4469 10.8186
## 5 1/10/2013 37.9878 21.0288 11.5832
## 6 1/10/2014 41.0580 22.8617 11.9792