library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(pacman)
p_load(tidyverse, lubridate, readxl, highcharter, tidyquant,
       timetk, tibbletime, scales)
p_load(quantmod, PerformanceAnalytics) # these two packages cannot be installed!
p_load(reshape2)
p_load(TTR, readr)
path_bike_orderlines <- "bike_orderlines (2).rds"
bike_orderlines_tbl <- read_rds(path_bike_orderlines)
monthly_sales <- bike_orderlines_tbl %>%
  select(order_date, total_price) %>%
  mutate(month = order_date %>% floor_date("month") %>% month(label = TRUE)) %>%
  group_by(month) %>%
  summarise(sales = sum(total_price)) %>%
  ungroup() %>%
  mutate(sales = scales::dollar(sales)) %>%
  arrange(month) %>%
  
  rename(
    "Month" = month,
    "Sales" = sales
  )
print(monthly_sales)
## # A tibble: 12 × 2
##    Month Sales     
##    <ord> <chr>     
##  1 Jan   $4,089,460
##  2 Feb   $5,343,295
##  3 Mar   $7,282,280
##  4 Apr   $8,386,170
##  5 May   $7,935,055
##  6 Jun   $7,813,105
##  7 Jul   $7,602,005
##  8 Aug   $5,346,125
##  9 Sep   $5,556,055
## 10 Oct   $4,394,300
## 11 Nov   $4,169,755
## 12 Dec   $3,114,725
midterm_data <- read_delim("MidtermDataTEJ.csv", delim = "\t")  # For tab-delimited files
## Rows: 11817 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): CO_ID, CoName, Close
## dbl (1): Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Step 1: Summarize duplicates to ensure uniqueness
midterm_data_clean <- midterm_data %>%
  mutate(
    Close = as.numeric(str_trim(Close)),  # Clean and convert Close to numeric
    Date = ymd(Date)                     # Convert Date to proper format
  ) %>%
  group_by(Date, CO_ID) %>%              # Group by Date and CO_ID
  summarise(Close = mean(Close, na.rm = TRUE), .groups = "drop")  # Resolve duplicates by taking the mean

# Step 2: Pivot data to wide format
midterm_data_wide <- midterm_data_clean %>%
  pivot_wider(names_from = CO_ID, values_from = Close) %>%  # Pivot to wide format
  arrange(Date)  # Arrange by Date

# Print the resulting dataset
print(midterm_data_wide)
## # A tibble: 3,939 × 4
##    Date       `0050   ` `0052   ` `0056   `
##    <date>         <dbl>     <dbl>     <dbl>
##  1 2008-01-02      34.7      22.7     10.2 
##  2 2008-01-03      34.1      21.8     10.1 
##  3 2008-01-04      34.1      21.7     10.1 
##  4 2008-01-07      32.6      20.2      9.95
##  5 2008-01-08      32.9      20.2     10.1 
##  6 2008-01-09      33.5      20.3     10.2 
##  7 2008-01-10      33.4      20.3     10.2 
##  8 2008-01-11      33.3      20.4     10.3 
##  9 2008-01-14      33.9      20.3     10.3 
## 10 2008-01-15      35.2      20.6     10.4 
## # ℹ 3,929 more rows
midterm_data_wide %>%
  filter(year(Date) == 2023)
## # A tibble: 239 × 4
##    Date       `0050   ` `0052   ` `0056   `
##    <date>         <dbl>     <dbl>     <dbl>
##  1 2023-01-03      104.      86.2      21.8
##  2 2023-01-04      104.      85.8      21.8
##  3 2023-01-05      104.      86.5      21.8
##  4 2023-01-06      105.      87.0      21.9
##  5 2023-01-09      109.      90.5      22.2
##  6 2023-01-10      109.      91.2      22.3
##  7 2023-01-11      109.      91.0      22.3
##  8 2023-01-12      109.      91.0      22.2
##  9 2023-01-13      110.      92.4      22.2
## 10 2023-01-16      111.      93.0      22.2
## # ℹ 229 more rows
daily_returns <- midterm_data_wide %>%
  mutate(across(where(is.numeric), 
                ~round((. - lag(.)) / lag(.), 8),
                .names = "ret_{col}")) %>%
  select(Date, starts_with("ret"))
print(daily_returns, n = 10)
## # A tibble: 3,939 × 4
##    Date       `ret_0050   ` `ret_0052   ` `ret_0056   `
##    <date>             <dbl>         <dbl>         <dbl>
##  1 2008-01-02      NA            NA            NA      
##  2 2008-01-03      -0.0166       -0.0380       -0.0136 
##  3 2008-01-04       0            -0.00510       0.00197
##  4 2008-01-07      -0.0457       -0.0692       -0.0157 
##  5 2008-01-08       0.00975       0             0.0124 
##  6 2008-01-09       0.0184        0.00551       0.00789
##  7 2008-01-10      -0.00172       0             0.00743
##  8 2008-01-11      -0.00432       0.00246       0.00194
##  9 2008-01-14       0.0199       -0.00246       0.00388
## 10 2008-01-15       0.0383        0.0110        0.0124 
## # ℹ 3,929 more rows
weekly_prices <- midterm_data_wide %>%
  mutate(week = floor_date(Date, unit = "week")) %>%
  group_by(week) %>%
  slice_tail(n = 1) %>%
  ungroup() %>%
  select(-Date) %>%
  rename(Date = week) %>%
  select(Date, everything())  # This puts Date first

# Show 2008 period
weekly_prices %>%
  filter(Date >= as.Date("2008-01-04"), 
         Date <= as.Date("2008-02-15"))
## # A tibble: 5 × 4
##   Date       `0050   ` `0052   ` `0056   `
##   <date>         <dbl>     <dbl>     <dbl>
## 1 2008-01-06      33.3      20.4     10.3 
## 2 2008-01-13      33.8      20.0      9.95
## 3 2008-01-20      32.6      19.8      9.46
## 4 2008-01-27      32.6      19.7      9.16
## 5 2008-02-10      33.1      20.1      9.34
# Show 2020 period
weekly_prices %>%
  filter(Date >= as.Date("2020-03-27"), 
         Date <= as.Date("2020-04-24"))
## # A tibble: 4 × 4
##   Date       `0050   ` `0052   ` `0056   `
##   <date>         <dbl>     <dbl>     <dbl>
## 1 2020-03-29      66.4      47.8      17.2
## 2 2020-04-05      69.4      49.8      18.1
## 3 2020-04-12      72.8      53.6      18.6
## 4 2020-04-19      70.8      52.5      18.5
# Step 1: Convert daily prices to weekly (using last price of each week)
weekly_prices <- midterm_data_wide %>%
  mutate(week = floor_date(Date, unit = "week")) %>%  # Create week grouping
  group_by(week) %>%
  slice_tail(n = 1) %>%  # Take the last price of each week
  ungroup() %>%
  select(-Date) %>%
  rename(Date = week)

# Step 2: Calculate weekly returns
weekly_returns <- weekly_prices %>%
  mutate(across(
    where(is.numeric),
    ~(. - lag(.)) / lag(.),  # Calculate returns
    .names = "{.col}"
  )) %>%
  select(Date, everything()) %>%
  # Round to 8 decimal places as shown in the image
  mutate(across(where(is.numeric), ~round(., 8)))

# To display specific time periods as shown in the image:
# For 2008 period
weekly_returns %>%
  filter(Date >= as.Date("2008-01-04"), 
         Date <= as.Date("2008-02-15")) %>%
  print(n = 10)
## # A tibble: 5 × 4
##   Date       `0050   ` `0052   ` `0056   `
##   <date>         <dbl>     <dbl>     <dbl>
## 1 2008-01-06   -0.0245  -0.0618     0.0137
## 2 2008-01-13    0.0147  -0.0194    -0.0291
## 3 2008-01-20   -0.0359  -0.0106    -0.0499
## 4 2008-01-27    0       -0.00676   -0.0315
## 5 2008-02-10    0.0160   0.0210     0.0195
# For 2020 period
weekly_returns %>%
  filter(Date >= as.Date("2020-03-27"), 
         Date <= as.Date("2020-04-24")) %>%
  print(n = 10)
## # A tibble: 4 × 4
##   Date       `0050   ` `0052   ` `0056   `
##   <date>         <dbl>     <dbl>     <dbl>
## 1 2020-03-29  -0.00914   -0.0143   0.00699
## 2 2020-04-05   0.0448     0.0417   0.0493 
## 3 2020-04-12   0.0498     0.0752   0.0299 
## 4 2020-04-19  -0.0282    -0.0203  -0.00641
monthly_prices <- midterm_data_wide %>%
  mutate(month = ceiling_date(Date, unit = "month") - days(1)) %>%  # Assign end-of-month dates
  group_by(month) %>%
  slice_tail(n = 1) %>%                                            # Get the last row for each month
  ungroup() %>%
  select(-Date) %>%                                                # Drop the original Date column
  rename(Date = month) %>%                                         # Rename 'month' to 'Date'
  select(Date, everything())                                       # Put Date as the first column

# Show 2008 period
monthly_prices %>%
  filter(Date >= as.Date("2008-01-31"), 
         Date <= as.Date("2008-06-30"))
## # A tibble: 6 × 4
##   Date       `0050   ` `0052   ` `0056   `
##   <date>         <dbl>     <dbl>     <dbl>
## 1 2008-01-31      31.7      19.5      9.03
## 2 2008-02-29      35.1      21.4      9.83
## 3 2008-03-31      34.9      21.1     10.1 
## 4 2008-04-30      36.9      22.4     10.4 
## 5 2008-05-31      35.9      21.9     10.2 
## 6 2008-06-30      31.7      19.4      9.10
# Show 2020 period
monthly_prices %>%
  filter(Date >= as.Date("2019-11-30"), 
         Date <= as.Date("2020-04-30"))
## # A tibble: 6 × 4
##   Date       `0050   ` `0052   ` `0056   `
##   <date>         <dbl>     <dbl>     <dbl>
## 1 2019-11-30      77.5      54.3      19.5
## 2 2019-12-31      82.2      58.8      20.3
## 3 2020-01-31      78.7      56.7      19.6
## 4 2020-02-29      77.6      55.3      19.6
## 5 2020-03-31      66.6      47.5      17.2
## 6 2020-04-30      74.8      54.3      19.3
# Compute monthly returns
monthly_returns <- monthly_prices %>%
  mutate(across(-Date, ~ (. - lag(.)) / lag(.), .names = "return_{.col}")) %>%
  select(Date, starts_with("return"))

# Show 2008 period
monthly_returns %>%
  filter(Date >= as.Date("2008-02-29"), 
         Date <= as.Date("2008-07-31"))
## # A tibble: 6 × 4
##   Date       `return_0050   ` `return_0052   ` `return_0056   `
##   <date>                <dbl>            <dbl>            <dbl>
## 1 2008-02-29          0.107             0.0975           0.0889
## 2 2008-03-31         -0.00657          -0.0156           0.0303
## 3 2008-04-30          0.0571            0.0635           0.0314
## 4 2008-05-31         -0.0266           -0.0236          -0.0217
## 5 2008-06-30         -0.116            -0.113           -0.110 
## 6 2008-07-31         -0.0473           -0.0621          -0.0624
# Show 2020 period
monthly_returns %>%
  filter(Date >= as.Date("2019-11-30"), 
         Date <= as.Date("2020-04-30"))
## # A tibble: 6 × 4
##   Date       `return_0050   ` `return_0052   ` `return_0056   `
##   <date>                <dbl>            <dbl>            <dbl>
## 1 2019-11-30           0.0155           0.0191         0.00616 
## 2 2019-12-31           0.0596           0.0825         0.0440  
## 3 2020-01-31          -0.0421          -0.0346        -0.0362  
## 4 2020-02-29          -0.0145          -0.0258        -0.000714
## 5 2020-03-31          -0.141           -0.140         -0.122   
## 6 2020-04-30           0.123            0.143          0.121