# Load some relevant libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(pacman)
p_load(tidyverse, lubridate, readxl, highcharter, tidyquant,
timetk, tibbletime, scales)
p_load(quantmod, PerformanceAnalytics) # these two packages cannot be installed!
p_load(reshape2)
p_load(TTR, readr)
path_bike_orderlines <- "bike_orderlines.rds"
bike_orderlines_tbl <- read_rds(path_bike_orderlines)
monthly_sales <- bike_orderlines_tbl %>%
mutate(Month = month(order_date, label = TRUE)) %>%
group_by(Month) %>%
summarise(Sales = sum(total_price)) %>%
arrange(desc(Sales))
# Display results
print(monthly_sales)
## # A tibble: 12 × 2
## Month Sales
## <ord> <dbl>
## 1 Apr 8386170
## 2 May 7935055
## 3 Jun 7813105
## 4 Jul 7602005
## 5 Mar 7282280
## 6 Sep 5556055
## 7 Aug 5346125
## 8 Feb 5343295
## 9 Oct 4394300
## 10 Nov 4169755
## 11 Jan 4089460
## 12 Dec 3114725
# Load necessary libraries
library(tidyverse)
library(lubridate)
library(readr)
# Step 1: Load the dataset
# Ensure the delimiter is correctly specified to split the columns
midterm_data <- read_delim("MidtermDataTEJ.csv", delim = "\t", col_types = cols(
CO_ID = col_character(),
CoName = col_character(),
Date = col_character(),
Close = col_character()
))
# Step 2: Clean the data
# Trim spaces in CO_ID, convert Date column to proper Date format, and clean Close column
midterm_data <- midterm_data %>%
mutate(
CO_ID = stringr::str_trim(CO_ID), # Trim spaces in CO_ID
Date = as.Date(Date, format = "%Y%m%d"), # Convert Date column
Close = as.numeric(stringr::str_trim(Close)) # Remove spaces and convert Close to numeric
)
# Step 3: Filter for specific IDs (0050, 0052, 0056) and relevant date range
midterm_filtered <- midterm_data %>%
filter(
CO_ID %in% c("0050", "0052", "0056"), # Filter for relevant CO_IDs
year(Date) >= 2008 & year(Date) <= 2023 # Restrict to 2008–2023
)
# Step 4: Pivot the data to reshape it
# Create separate columns for each CO_ID with their Close values
midterm_pivot <- midterm_filtered %>%
select(Date, CO_ID, Close) %>% # Keep only Date, CO_ID, and Close columns
pivot_wider(names_from = CO_ID, values_from = Close) %>% # Pivot CO_ID values into columns
arrange(Date) # Sort by Date
# 2008 data
midterm_pivot %>% filter(Date >= "2008-01-02", Date <= "2008-01-09")
## # A tibble: 6 × 4
## Date `0050` `0052` `0056`
## <date> <dbl> <dbl> <dbl>
## 1 2008-01-02 34.7 22.7 10.2
## 2 2008-01-03 34.1 21.8 10.1
## 3 2008-01-04 34.1 21.7 10.1
## 4 2008-01-07 32.6 20.2 9.95
## 5 2008-01-08 32.9 20.2 10.1
## 6 2008-01-09 33.5 20.3 10.2
# 2020 data
midterm_pivot %>% filter(Date >= "2020-04-23", Date <= "2020-04-30")
## # A tibble: 6 × 4
## Date `0050` `0052` `0056`
## <date> <dbl> <dbl> <dbl>
## 1 2020-04-23 70.8 52.6 18.5
## 2 2020-04-24 70.8 52.5 18.5
## 3 2020-04-27 72.2 53.5 18.8
## 4 2020-04-28 72.2 52.9 18.9
## 5 2020-04-29 73.2 53.5 19.0
## 6 2020-04-30 74.8 54.3 19.3
# Step 1: Compute discrete daily returns
daily_returns <- midterm_pivot %>%
arrange(Date) %>% # Ensure data is sorted by date
mutate(
`0050` = (`0050` / lag(`0050`) - 1),
`0052` = (`0052` / lag(`0052`) - 1),
`0056` = (`0056` / lag(`0056`) - 1)
) %>%
# Remove rows with NA in '0050' column (or any column that contains NA)
filter(!is.na(`0050`))
# 2008 data
daily_returns %>% filter(Date >= "2008-01-02", Date <= "2008-01-09")
## # A tibble: 5 × 4
## Date `0050` `0052` `0056`
## <date> <dbl> <dbl> <dbl>
## 1 2008-01-03 -0.0166 -0.0380 -0.0136
## 2 2008-01-04 0 -0.00510 0.00197
## 3 2008-01-07 -0.0457 -0.0692 -0.0157
## 4 2008-01-08 0.00975 0 0.0124
## 5 2008-01-09 0.0184 0.00551 0.00789
# 2020 data
daily_returns %>% filter(Date >= "2020-04-23", Date <= "2020-04-30")
## # A tibble: 6 × 4
## Date `0050` `0052` `0056`
## <date> <dbl> <dbl> <dbl>
## 1 2020-04-23 0.00372 0.00401 0.00419
## 2 2020-04-24 0 -0.00160 -0.000379
## 3 2020-04-27 0.0204 0.0192 0.0175
## 4 2020-04-28 0 -0.0102 0.00448
## 5 2020-04-29 0.0139 0.0111 0.00594
## 6 2020-04-30 0.0215 0.0149 0.0144
weekly_data <- midterm_pivot %>%
mutate(Week = floor_date(Date, "week")) %>% # Assign each row to the start of its week
group_by(Week) %>% # Group by the start of the week
summarise(
`0050` = last(`0050`), # Get the last closing price for `0050` in each week
`0052` = last(`0052`), # Get the last closing price for `0052` in each week
`0056` = last(`0056`) # Get the last closing price for `0056` in each week
) %>%
ungroup() # Remove the groupings after summarising
# Step 2: View the weekly data
print(weekly_data)
## # A tibble: 823 × 4
## Week `0050` `0052` `0056`
## <date> <dbl> <dbl> <dbl>
## 1 2007-12-30 34.1 21.7 10.1
## 2 2008-01-06 33.3 20.4 10.3
## 3 2008-01-13 33.8 20.0 9.95
## 4 2008-01-20 32.6 19.8 9.46
## 5 2008-01-27 32.6 19.7 9.16
## 6 2008-02-10 33.1 20.1 9.34
## 7 2008-02-17 33.9 20.5 9.46
## 8 2008-02-24 35.1 21.4 9.83
## 9 2008-03-02 35.4 21.6 10.1
## 10 2008-03-09 33.7 21.0 9.79
## # ℹ 813 more rows
# Step 1: Compute weekly returns for each stock
weekly_returns <- weekly_data %>%
mutate(
`X0050` = (`0050` / lag(`0050`) - 1), # Compute the weekly return for `0050`
`X0052` = (`0052` / lag(`0052`) - 1), # Compute the weekly return for `0052`
`X0056` = (`0056` / lag(`0056`) - 1) # Compute the weekly return for `0056`
) %>%
select(Week, `X0050`, `X0052`, `X0056`) # Select only the Week and return columns
# Step 2: Remove the first row where returns are NA (since no previous week exists)
weekly_returns_clean <- weekly_returns %>%
filter(!is.na(`X0050`))
# Step 3: View the cleaned weekly returns
print(weekly_returns_clean)
## # A tibble: 822 × 4
## Week X0050 X0052 X0056
## <date> <dbl> <dbl> <dbl>
## 1 2008-01-06 -0.0245 -0.0618 0.0137
## 2 2008-01-13 0.0147 -0.0194 -0.0291
## 3 2008-01-20 -0.0359 -0.0106 -0.0499
## 4 2008-01-27 0 -0.00676 -0.0315
## 5 2008-02-10 0.0160 0.0210 0.0195
## 6 2008-02-17 0.0262 0.0194 0.0128
## 7 2008-02-24 0.0349 0.0463 0.0399
## 8 2008-03-02 0.00904 0.0104 0.0222
## 9 2008-03-09 -0.0480 -0.0309 -0.0265
## 10 2008-03-16 0.0530 0.00293 0.0146
## # ℹ 812 more rows
# Convert daily data to monthly data (end of the month)
monthly_data <- midterm_pivot %>%
mutate(EndOfMonth = ceiling_date(Date, "month") - days(1)) %>% # Get the last day of the month
group_by(EndOfMonth) %>% # Group by the end of the month
summarise(
`0050` = last(`0050`, na.rm = TRUE), # Get the last closing price for `0050`
`0052` = last(`0052`, na.rm = TRUE), # Get the last closing price for `0052`
`0056` = last(`0056`, na.rm = TRUE) # Get the last closing price for `0056`
) %>%
ungroup() # Remove groupings
# View the monthly data
print(monthly_data)
## # A tibble: 192 × 4
## EndOfMonth `0050` `0052` `0056`
## <date> <dbl> <dbl> <dbl>
## 1 2008-01-31 31.7 19.5 9.03
## 2 2008-02-29 35.1 21.4 9.83
## 3 2008-03-31 34.9 21.1 10.1
## 4 2008-04-30 36.9 22.4 10.4
## 5 2008-05-31 35.9 21.9 10.2
## 6 2008-06-30 31.7 19.4 9.10
## 7 2008-07-31 30.2 18.2 8.53
## 8 2008-08-31 31.0 19.5 8.70
## 9 2008-09-30 25.7 15.7 6.75
## 10 2008-10-31 22.3 13.1 5.82
## # ℹ 182 more rows
# Compute monthly returns for each stock
monthly_returns <- monthly_data %>%
mutate(
`R_0050` = (`0050` / lag(`0050`) - 1), # Return for `0050`
`R_0052` = (`0052` / lag(`0052`) - 1), # Return for `0052`
`R_0056` = (`0056` / lag(`0056`) - 1) # Return for `0056`
) %>%
select(EndOfMonth, `R_0050`, `R_0052`, `R_0056`) # Keep only the return columns
# Display the computed monthly returns
print(monthly_returns)
## # A tibble: 192 × 4
## EndOfMonth R_0050 R_0052 R_0056
## <date> <dbl> <dbl> <dbl>
## 1 2008-01-31 NA NA NA
## 2 2008-02-29 0.107 0.0975 0.0889
## 3 2008-03-31 -0.00657 -0.0156 0.0303
## 4 2008-04-30 0.0571 0.0635 0.0314
## 5 2008-05-31 -0.0266 -0.0236 -0.0217
## 6 2008-06-30 -0.116 -0.113 -0.110
## 7 2008-07-31 -0.0473 -0.0621 -0.0624
## 8 2008-08-31 0.0239 0.0738 0.0200
## 9 2008-09-30 -0.169 -0.196 -0.224
## 10 2008-10-31 -0.132 -0.165 -0.138
## # ℹ 182 more rows