Financial Database Midterm

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.0     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.1     ✔ tibble    3.1.8
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors

library(lubridate)


bike_orderline_table <- readRDS("C:\\Users\\emman\\Downloads\\bike_orderlines(1).rds")
bike_orderline_table

## # A tibble: 15,644 × 13
##    order_date          order_id order_line quantity price total_…¹ model categ…²
##    <dttm>                 <dbl>      <dbl>    <dbl> <dbl>    <dbl> <chr> <chr>  
##  1 2011-01-07 00:00:00        1          1        1  6070     6070 Jeky… Mounta…
##  2 2011-01-07 00:00:00        1          2        1  5970     5970 Trig… Mounta…
##  3 2011-01-10 00:00:00        2          1        1  2770     2770 Beas… Mounta…
##  4 2011-01-10 00:00:00        2          2        1  5970     5970 Trig… Mounta…
##  5 2011-01-10 00:00:00        3          1        1 10660    10660 Supe… Road   
##  6 2011-01-10 00:00:00        3          2        1  3200     3200 Jeky… Mounta…
##  7 2011-01-10 00:00:00        3          3        1 12790    12790 Supe… Road   
##  8 2011-01-10 00:00:00        3          4        1  5330     5330 Supe… Road   
##  9 2011-01-10 00:00:00        3          5        1  1570     1570 Syna… Road   
## 10 2011-01-11 00:00:00        4          1        1  4800     4800 Syna… Road   
## # … with 15,634 more rows, 5 more variables: category_2 <chr>,
## #   frame_material <chr>, bikeshop_name <chr>, city <chr>, state <chr>, and
## #   abbreviated variable names ¹total_price, ²category_1

#Question 1
bike_orderline_table <- bike_orderline_table %>%
  mutate(model = case_when(
    model == "CAAD Disk Ultegra" ~ "CAAD12 Disc Ultegra",
    model == "Syapse Carbon Tiagra" ~ "Synapse Carbon Tiagra",
    model == "Supersix Evo Hi-Mod Utegra" ~ "Supersix Evo Hi-Mod Ultegra",
    TRUE ~ model
  ))
glimpse(bike_orderline_table)

## Rows: 15,644
## Columns: 13
## $ order_date     <dttm> 2011-01-07, 2011-01-07, 2011-01-10, 2011-01-10, 2011-0…
## $ order_id       <dbl> 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7…
## $ order_line     <dbl> 1, 2, 1, 2, 1, 2, 3, 4, 5, 1, 1, 2, 3, 4, 1, 2, 3, 4, 1…
## $ quantity       <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1…
## $ price          <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570,…
## $ total_price    <dbl> 6070, 5970, 2770, 5970, 10660, 3200, 12790, 5330, 1570,…
## $ model          <chr> "Jekyll Carbon 2", "Trigger Carbon 2", "Beast of the Ea…
## $ category_1     <chr> "Mountain", "Mountain", "Mountain", "Mountain", "Road",…
## $ category_2     <chr> "Over Mountain", "Over Mountain", "Trail", "Over Mounta…
## $ frame_material <chr> "Carbon", "Carbon", "Aluminum", "Carbon", "Carbon", "Ca…
## $ bikeshop_name  <chr> "Ithaca Mountain Climbers", "Ithaca Mountain Climbers",…
## $ city           <chr> "Ithaca", "Ithaca", "Kansas City", "Kansas City", "Loui…
## $ state          <chr> "NY", "NY", "KS", "KS", "KY", "KY", "KY", "KY", "KY", "…

#Question 2
bike_orderline_table <- bike_orderline_table %>%
  mutate(order_date = as.Date(order_date),
         month = month(order_date, label = TRUE, abbr = FALSE))

monthly_sales <- bike_orderline_table %>%
  group_by(month) %>%
  summarize(total_sales = sum(total_price)) %>%
  mutate(total_sales = total_sales %>% scales::dollar())

month_with_highest_sales <- monthly_sales %>%
  filter(total_sales == max(total_sales)) %>%
  select(month, total_sales)
  


result <- paste("Month with the highest sales:", month_with_highest_sales$month, "with total sales:", month_with_highest_sales$total_sales)
result

## [1] "Month with the highest sales: April with total sales: $8,386,170"

#Question 3
bike_orderline_table %>%
  select(model, total_price) %>%
  mutate(black = model %>% str_detect("Black Inc")) %>%
  group_by(black) %>%
  summarize(median = median(total_price)) %>%
  ungroup() %>%
  mutate(median = median %>% scales::dollar()) %>%
  rename('Black Inc' = black,
         'Median Orderline' = median)

## # A tibble: 2 × 2
##   `Black Inc` `Median Orderline`
##   <lgl>       <chr>             
## 1 FALSE       $2,880            
## 2 TRUE        $12,250

bike_orderline_table %>%
  select(model, total_price) %>%
  mutate(ultegra = model %>% str_detect("Ultegra")) %>%
  group_by(ultegra) %>%
  summarize(median = median(total_price)) %>%
  ungroup() %>%
  mutate(median = median %>% scales::dollar()) %>%
  rename('Ultegra' = ultegra,
         'Median Orderline' = median)

## # A tibble: 2 × 2
##   Ultegra `Median Orderline`
##   <lgl>   <chr>             
## 1 FALSE   $3,200            
## 2 TRUE    $3,200

bike_orderline_table %>%
  select(model, total_price) %>%
  mutate(disc = model %>% str_detect("Disc")) %>%
  group_by(disc) %>%
  summarize(median = median(total_price)) %>%
  ungroup() %>%
  mutate(median = median %>% scales::dollar()) %>%
  rename('Disc' = disc,
         'Median Orderline' = median)

## # A tibble: 2 × 2
##   Disc  `Median Orderline`
##   <lgl> <chr>             
## 1 FALSE $3,200            
## 2 TRUE  $2,660

#Question 4

bike_orderline_table %>%
  select(model, category_1, category_2, price) %>%
  separate(model,
           into = str_c("model_", 1:7),
           sep = " ",
           extra = 'drop') %>%
  mutate(model_base = case_when(
    str_detect(str_to_lower(model_1), "supersix") ~ str_c(model_1, model_2, sep=" "),
    str_detect(str_to_lower(model_1), "fat") ~ str_c(model_1, model_2, sep=" "),
    str_detect(str_to_lower(model_1), "bad") ~ str_c(model_1, model_2, sep=" "),
    str_detect(str_to_lower(model_1), "beast") ~ str_c(model_1, model_2, model_3, model_4, sep=" "),
    str_detect(str_to_lower(model_2), "29") ~ str_c(model_1, model_2, sep=" "),
    TRUE ~ model_1)
  ) %>%
  mutate(model_1 = model_1 %>% str_trim()) %>%
  select(category_1, category_2, model_base, price) %>%
  group_by(category_1, category_2,model_base) %>%
  summarize(mean = round(mean(price),0) %>% scales::dollar(),
            min = min(price) %>% scales::dollar(),
            max = max(price) %>% scales::dollar()) %>%
  ungroup() %>%
  arrange(desc(mean)) %>%
  rename(
    `Category 1` = category_1,
    `Category 2` = category_2,
    `Model Base` = model_base,
    `Mean Price` = mean,
    `Min Price` = min, 
    `Max Price` = max
  )

## Warning: Expected 7 pieces. Missing pieces filled with `NA` in 15644 rows [1, 2, 3, 4,
## 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

## `summarise()` has grouped output by 'category_1', 'category_2'. You can
## override using the `.groups` argument.

## # A tibble: 19 × 6
##    `Category 1` `Category 2`       `Model Base`      Mean Pric…¹ Min P…² Max P…³
##    <chr>        <chr>              <chr>             <chr>       <chr>   <chr>  
##  1 Mountain     Cross Country Race Scalpel-Si        $6,695      $3,200  $12,790
##  2 Mountain     Sport              Catalyst          $541        $415    $705   
##  3 Mountain     Over Mountain      Jekyll            $5,042      $3,200  $7,990 
##  4 Road         Elite Road         Supersix Evo      $4,978      $1,840  $12,790
##  5 Mountain     Over Mountain      Trigger           $4,970      $3,200  $8,200 
##  6 Mountain     Trail              Habit             $4,611      $1,950  $12,250
##  7 Mountain     Cross Country Race F-Si              $4,504      $1,840  $11,190
##  8 Mountain     Cross Country Race Scalpel 29        $4,499      $3,200  $6,390 
##  9 Mountain     Fat Bike           Fat CAAD1         $3,730      $3,730  $3,730 
## 10 Road         Triathalon         Slice             $3,527      $1,950  $7,000 
## 11 Road         Endurance Road     Synapse           $3,080      $870    $9,590 
## 12 Road         Elite Road         CAAD12            $2,978      $1,680  $5,860 
## 13 Mountain     Trail              Bad Habit         $2,954      $2,660  $3,200 
## 14 Road         Elite Road         CAAD              $2,660      $2,660  $2,660 
## 15 Road         Cyclocross         SuperX            $2,339      $1,750  $3,500 
## 16 Mountain     Trail              Beast of the East $2,194      $1,620  $2,770 
## 17 Mountain     Fat Bike           Fat CAAD2         $2,130      $2,130  $2,130 
## 18 Mountain     Sport              Trail             $1,153      $815    $1,520 
## 19 Road         Elite Road         CAAD8             $1,136      $815    $1,410 
## # … with abbreviated variable names ¹`Mean Price`, ²`Min Price`, ³`Max Price`

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)

##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

Financial Database Midterm - Emmanuel

Emmanuel Ferdinand Anggawirja

2023-11-07

R Markdown

Including Plots