# Load some relevant libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(pacman)
p_load(tidyverse, lubridate, readxl, highcharter, tidyquant,
       timetk, tibbletime, scales)
p_load(quantmod, PerformanceAnalytics) # these two packages cannot be installed!
p_load(reshape2)
p_load(TTR, readr)
path_bike_orderlines <- "bike_orderlines.rds"
bike_orderlines_tbl <- read_rds(path_bike_orderlines)
monthly_sales <- bike_orderlines_tbl %>%
  mutate(Month = month(order_date, label = TRUE)) %>%
  group_by(Month) %>%
  summarise(Sales = sum(total_price)) %>%
  arrange(desc(Sales))

# Display results
print(monthly_sales)
## # A tibble: 12 × 2
##    Month   Sales
##    <ord>   <dbl>
##  1 Apr   8386170
##  2 May   7935055
##  3 Jun   7813105
##  4 Jul   7602005
##  5 Mar   7282280
##  6 Sep   5556055
##  7 Aug   5346125
##  8 Feb   5343295
##  9 Oct   4394300
## 10 Nov   4169755
## 11 Jan   4089460
## 12 Dec   3114725
# Load necessary libraries
library(tidyverse)
library(lubridate)
library(readr)

# Step 1: Load the dataset
# Ensure the delimiter is correctly specified to split the columns
midterm_data <- read_delim("MidtermDataTEJ.csv", delim = "\t", col_types = cols(
  CO_ID = col_character(),
  CoName = col_character(),
  Date = col_character(),
  Close = col_character()
))

# Step 2: Clean the data
# Trim spaces in CO_ID, convert Date column to proper Date format, and clean Close column
midterm_data <- midterm_data %>%
  mutate(
    CO_ID = stringr::str_trim(CO_ID),                  # Trim spaces in CO_ID
    Date = as.Date(Date, format = "%Y%m%d"),           # Convert Date column
    Close = as.numeric(stringr::str_trim(Close))       # Remove spaces and convert Close to numeric
  )

# Step 3: Filter for specific IDs (0050, 0052, 0056) and relevant date range
midterm_filtered <- midterm_data %>%
  filter(
    CO_ID %in% c("0050", "0052", "0056"),              # Filter for relevant CO_IDs
    year(Date) >= 2008 & year(Date) <= 2023           # Restrict to 2008–2023
  )

# Step 4: Pivot the data to reshape it
# Create separate columns for each CO_ID with their Close values
midterm_pivot <- midterm_filtered %>%
  select(Date, CO_ID, Close) %>%                      # Keep only Date, CO_ID, and Close columns
  pivot_wider(names_from = CO_ID, values_from = Close) %>%  # Pivot CO_ID values into columns
  arrange(Date)                                       # Sort by Date


# 2008 data
midterm_pivot %>% filter(Date >= "2008-01-02", Date <= "2008-01-09")
## # A tibble: 6 × 4
##   Date       `0050` `0052` `0056`
##   <date>      <dbl>  <dbl>  <dbl>
## 1 2008-01-02   34.7   22.7  10.2 
## 2 2008-01-03   34.1   21.8  10.1 
## 3 2008-01-04   34.1   21.7  10.1 
## 4 2008-01-07   32.6   20.2   9.95
## 5 2008-01-08   32.9   20.2  10.1 
## 6 2008-01-09   33.5   20.3  10.2
# 2020 data
midterm_pivot %>% filter(Date >= "2020-04-23", Date <= "2020-04-30")
## # A tibble: 6 × 4
##   Date       `0050` `0052` `0056`
##   <date>      <dbl>  <dbl>  <dbl>
## 1 2020-04-23   70.8   52.6   18.5
## 2 2020-04-24   70.8   52.5   18.5
## 3 2020-04-27   72.2   53.5   18.8
## 4 2020-04-28   72.2   52.9   18.9
## 5 2020-04-29   73.2   53.5   19.0
## 6 2020-04-30   74.8   54.3   19.3
# Step 1: Compute discrete daily returns
daily_returns <- midterm_pivot %>%
  arrange(Date) %>% # Ensure data is sorted by date
  mutate(
    `0050` = (`0050` / lag(`0050`) - 1),
    `0052` = (`0052` / lag(`0052`) - 1),
    `0056` = (`0056` / lag(`0056`) - 1)
  ) %>%
  # Remove rows with NA in '0050' column (or any column that contains NA)
  filter(!is.na(`0050`))

# 2008 data
daily_returns %>% filter(Date >= "2008-01-02", Date <= "2008-01-09")
## # A tibble: 5 × 4
##   Date         `0050`   `0052`   `0056`
##   <date>        <dbl>    <dbl>    <dbl>
## 1 2008-01-03 -0.0166  -0.0380  -0.0136 
## 2 2008-01-04  0       -0.00510  0.00197
## 3 2008-01-07 -0.0457  -0.0692  -0.0157 
## 4 2008-01-08  0.00975  0        0.0124 
## 5 2008-01-09  0.0184   0.00551  0.00789
# 2020 data
daily_returns %>% filter(Date >= "2020-04-23", Date <= "2020-04-30")
## # A tibble: 6 × 4
##   Date        `0050`   `0052`    `0056`
##   <date>       <dbl>    <dbl>     <dbl>
## 1 2020-04-23 0.00372  0.00401  0.00419 
## 2 2020-04-24 0       -0.00160 -0.000379
## 3 2020-04-27 0.0204   0.0192   0.0175  
## 4 2020-04-28 0       -0.0102   0.00448 
## 5 2020-04-29 0.0139   0.0111   0.00594 
## 6 2020-04-30 0.0215   0.0149   0.0144
weekly_data <- midterm_pivot %>%
  mutate(Week = floor_date(Date, "week")) %>%  # Assign each row to the start of its week
  group_by(Week) %>%                          # Group by the start of the week
  summarise(
    `0050` = last(`0050`),                    # Get the last closing price for `0050` in each week
    `0052` = last(`0052`),                    # Get the last closing price for `0052` in each week
    `0056` = last(`0056`)                     # Get the last closing price for `0056` in each week
  ) %>%
  ungroup()                                   # Remove the groupings after summarising

# Step 2: View the weekly data
print(weekly_data)
## # A tibble: 823 × 4
##    Week       `0050` `0052` `0056`
##    <date>      <dbl>  <dbl>  <dbl>
##  1 2007-12-30   34.1   21.7  10.1 
##  2 2008-01-06   33.3   20.4  10.3 
##  3 2008-01-13   33.8   20.0   9.95
##  4 2008-01-20   32.6   19.8   9.46
##  5 2008-01-27   32.6   19.7   9.16
##  6 2008-02-10   33.1   20.1   9.34
##  7 2008-02-17   33.9   20.5   9.46
##  8 2008-02-24   35.1   21.4   9.83
##  9 2008-03-02   35.4   21.6  10.1 
## 10 2008-03-09   33.7   21.0   9.79
## # ℹ 813 more rows
# Step 1: Compute weekly returns for each stock
weekly_returns <- weekly_data %>%
  mutate(
    `X0050` = (`0050` / lag(`0050`) - 1),  # Compute the weekly return for `0050`
    `X0052` = (`0052` / lag(`0052`) - 1),  # Compute the weekly return for `0052`
    `X0056` = (`0056` / lag(`0056`) - 1)   # Compute the weekly return for `0056`
  ) %>%
  select(Week, `X0050`, `X0052`, `X0056`)  # Select only the Week and return columns

# Step 2: Remove the first row where returns are NA (since no previous week exists)
weekly_returns_clean <- weekly_returns %>%
  filter(!is.na(`X0050`))

# Step 3: View the cleaned weekly returns
print(weekly_returns_clean)
## # A tibble: 822 × 4
##    Week          X0050    X0052   X0056
##    <date>        <dbl>    <dbl>   <dbl>
##  1 2008-01-06 -0.0245  -0.0618   0.0137
##  2 2008-01-13  0.0147  -0.0194  -0.0291
##  3 2008-01-20 -0.0359  -0.0106  -0.0499
##  4 2008-01-27  0       -0.00676 -0.0315
##  5 2008-02-10  0.0160   0.0210   0.0195
##  6 2008-02-17  0.0262   0.0194   0.0128
##  7 2008-02-24  0.0349   0.0463   0.0399
##  8 2008-03-02  0.00904  0.0104   0.0222
##  9 2008-03-09 -0.0480  -0.0309  -0.0265
## 10 2008-03-16  0.0530   0.00293  0.0146
## # ℹ 812 more rows
# Convert daily data to monthly data (end of the month)
monthly_data <- midterm_pivot %>%
  mutate(EndOfMonth = ceiling_date(Date, "month") - days(1)) %>%  # Get the last day of the month
  group_by(EndOfMonth) %>%                                       # Group by the end of the month
  summarise(
    `0050` = last(`0050`, na.rm = TRUE),                         # Get the last closing price for `0050`
    `0052` = last(`0052`, na.rm = TRUE),                         # Get the last closing price for `0052`
    `0056` = last(`0056`, na.rm = TRUE)                          # Get the last closing price for `0056`
  ) %>%
  ungroup()                                                      # Remove groupings

# View the monthly data
print(monthly_data)
## # A tibble: 192 × 4
##    EndOfMonth `0050` `0052` `0056`
##    <date>      <dbl>  <dbl>  <dbl>
##  1 2008-01-31   31.7   19.5   9.03
##  2 2008-02-29   35.1   21.4   9.83
##  3 2008-03-31   34.9   21.1  10.1 
##  4 2008-04-30   36.9   22.4  10.4 
##  5 2008-05-31   35.9   21.9  10.2 
##  6 2008-06-30   31.7   19.4   9.10
##  7 2008-07-31   30.2   18.2   8.53
##  8 2008-08-31   31.0   19.5   8.70
##  9 2008-09-30   25.7   15.7   6.75
## 10 2008-10-31   22.3   13.1   5.82
## # ℹ 182 more rows
# Compute monthly returns for each stock
monthly_returns <- monthly_data %>%
  mutate(
    `R_0050` = (`0050` / lag(`0050`) - 1),  # Return for `0050`
    `R_0052` = (`0052` / lag(`0052`) - 1),  # Return for `0052`
    `R_0056` = (`0056` / lag(`0056`) - 1)   # Return for `0056`
  ) %>%
  select(EndOfMonth, `R_0050`, `R_0052`, `R_0056`)  # Keep only the return columns

# Display the computed monthly returns
print(monthly_returns)
## # A tibble: 192 × 4
##    EndOfMonth   R_0050  R_0052  R_0056
##    <date>        <dbl>   <dbl>   <dbl>
##  1 2008-01-31 NA       NA      NA     
##  2 2008-02-29  0.107    0.0975  0.0889
##  3 2008-03-31 -0.00657 -0.0156  0.0303
##  4 2008-04-30  0.0571   0.0635  0.0314
##  5 2008-05-31 -0.0266  -0.0236 -0.0217
##  6 2008-06-30 -0.116   -0.113  -0.110 
##  7 2008-07-31 -0.0473  -0.0621 -0.0624
##  8 2008-08-31  0.0239   0.0738  0.0200
##  9 2008-09-30 -0.169   -0.196  -0.224 
## 10 2008-10-31 -0.132   -0.165  -0.138 
## # ℹ 182 more rows