midterm

# Load some relevant libraries
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(lubridate)
library(pacman)
p_load(tidyverse, lubridate, readxl, highcharter, tidyquant,
timetk, tibbletime, scales)
p_load(quantmod, PerformanceAnalytics) # these two packages cannot be installed!
p_load(reshape2)
p_load(TTR, readr)

path_bike_orderlines <- "bike_orderlines.rds"
bike_orderlines_tbl <- read_rds(path_bike_orderlines)

# Which month has the highest bike sales?
colnames(bike_orderlines_tbl)

##  [1] "order_date"     "order_id"       "order_line"     "quantity"      
##  [5] "price"          "total_price"    "model"          "category_1"    
##  [9] "category_2"     "frame_material" "bikeshop_name"  "city"          
## [13] "state"

glimpse(bike_orderlines_tbl)

## Rows: 15,644
## Columns: 13
## $ order_date     <dttm> 2011-01-07, 2011-01-07, 2011-01-10, 2011-01-10, 2011-0…
## $ order_id       <dbl> 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7…
## $ order_line     <dbl> 1, 2, 1, 2, 1, 2, 3, 4, 5, 1, 1, 2, 3, 4, 1, 2, 3, 4, 1…
## $ quantity       <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1…
## $ price          <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570,…
## $ total_price    <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570,…
## $ model          <chr> "Jekyll Carbon 2", "Trigger Carbon 2", "Beast of the Ea…
## $ category_1     <chr> "Mountain", "Mountain", "Mountain", "Mountain", "Road",…
## $ category_2     <chr> "Over Mountain", "Over Mountain", "Trail", "Over Mounta…
## $ frame_material <chr> "Carbon", "Carbon", "Aluminum", "Carbon", "Carbon", "Ca…
## $ bikeshop_name  <chr> "Ithaca Mountain Climbers", "Ithaca Mountain Climbers",…
## $ city           <chr> "Ithaca", "Ithaca", "Kansas City", "Kansas City", "Loui…
## $ state          <chr> "NY", "NY", "KS", "KS", "KY", "KY", "KY", "KY", "KY", "…

bike_orderlines_tbl <- bike_orderlines_tbl %>%
    mutate(sales = price * quantity)  # Replace 'price' and 'quantity' with correct column names
bike_sales_by_month <- bike_orderlines_tbl %>%
    mutate(Month = month(order_date, label = TRUE, abbr = FALSE)) %>%
    group_by(Month) %>%
    summarise(Sales = sum(sales, na.rm = TRUE)) %>%
    arrange(Month) %>%
    mutate(Sales = scales::dollar(Sales))

print(bike_sales_by_month)

## # A tibble: 12 × 2
##    Month     Sales     
##    <ord>     <chr>     
##  1 January   $4,089,460
##  2 February  $5,343,295
##  3 March     $7,282,280
##  4 April     $8,386,170
##  5 May       $7,935,055
##  6 June      $7,813,105
##  7 July      $7,602,005
##  8 August    $5,346,125
##  9 September $5,556,055
## 10 October   $4,394,300
## 11 November  $4,169,755
## 12 December  $3,114,725

#2. Download daily closing prices of ETF0050, 0052 and 0056 from TEJ database in AU library from 2008 to 2023.
#2.1 Following outputs are based on data from 2008-2020. You need to update the outputs based on data 2008-2023.

library(quantmod)

# For example, download the data for ETF0050 from 2008 to 2023
getSymbols("0050.TW", src = "yahoo", from = "2008-01-01", to = "2023-12-31")

## Warning: 0050.TW contains missing values. Some functions will not work if
## objects contain missing values in the middle of the series. Consider using
## na.omit(), na.approx(), na.fill(), etc to remove or replace them.

## [1] "0050.TW"

getSymbols("0052.TW", src = "yahoo", from = "2008-01-01", to = "2023-12-31")

## Warning: 0052.TW contains missing values. Some functions will not work if
## objects contain missing values in the middle of the series. Consider using
## na.omit(), na.approx(), na.fill(), etc to remove or replace them.

## [1] "0052.TW"

getSymbols("0056.TW", src = "yahoo", from = "2008-01-01", to = "2023-12-31")

## Warning: 0056.TW contains missing values. Some functions will not work if
## objects contain missing values in the middle of the series. Consider using
## na.omit(), na.approx(), na.fill(), etc to remove or replace them.

## [1] "0056.TW"

# Extract closing prices
ETF_0050 <- Cl(`0050.TW`)
ETF_0052 <- Cl(`0052.TW`)
ETF_0056 <- Cl(`0056.TW`)

# Combine them into one data frame
ETF_data <- data.frame(
  date = index(ETF_0050),
  `0050` = coredata(ETF_0050),
  `0052` = coredata(ETF_0052),
  `0056` = coredata(ETF_0056)
)
# Convert date to proper format
ETF_data$date <- as.Date(ETF_data$date)

# View the first few rows
head(ETF_data)

##         date X0050.TW.Close X0052.TW.Close X0056.TW.Close
## 1 2008-01-02          60.01          40.14          25.70
## 2 2008-01-03          58.89          39.10          25.23
## 3 2008-01-04          59.01          38.64          25.51
## 4 2008-01-07          56.39          36.16          24.85
## 5 2008-01-08          56.98          35.82          25.38
## 6 2008-01-09          58.20          36.69          25.83

midterm

2025-01-05