Introduction

This homework analyzes two datasets:

  1. Stock Data Analysis: Weekly closing prices for five US listed companies (Amazon, Apple, Facebook, Google, Microsoft) in 2019. The data is reshaped from wide format to long format for easier analysis.

  2. Bike Sales Analysis: Sales data for bike products, including revenue trends over time and revenue breakdown by product categories.

Load Libraries

library(tidyverse)
library(dplyr)
library(ggplot2)
library(lubridate)

Part 1: Reshape Data

Use the stock_df.csv to answer the following questions:

  • This data is the weekly closing prices for five US listed companies in 2019.
  • Import data stock_df.csv and reshape it from wide to long format (stock_df_long)
stock_df <- read_csv("stock_df.csv")
stock_df
## # A tibble: 5 × 106
##   company   `2019_week1` `2019_week2` `2019_week3` `2019_week4` `2019_week5`
##   <chr>            <dbl>        <dbl>        <dbl>        <dbl>        <dbl>
## 1 Amazon          1848.        1641.        1696.        1671.        1626. 
## 2 Apple             73.4         38.1         39.2         39.4         41.6
## 3 Facebook         205.         144.         150.         149.         166. 
## 4 Google          1337.        1057.        1098.        1091.        1111. 
## 5 Microsoft        158.         103.         108.         107.         103. 
## # ℹ 100 more variables: `2019_week6` <dbl>, `2019_week7` <dbl>,
## #   `2019_week8` <dbl>, `2019_week9` <dbl>, `2019_week10` <dbl>,
## #   `2019_week11` <dbl>, `2019_week12` <dbl>, `2019_week13` <dbl>,
## #   `2019_week14` <dbl>, `2019_week15` <dbl>, `2019_week16` <dbl>,
## #   `2019_week17` <dbl>, `2019_week18` <dbl>, `2019_week19` <dbl>,
## #   `2019_week20` <dbl>, `2019_week21` <dbl>, `2019_week22` <dbl>,
## #   `2019_week23` <dbl>, `2019_week24` <dbl>, `2019_week25` <dbl>, …
stock_df_long <- stock_df %>%
  pivot_longer(
    cols = -company,
    names_to = c("year", "week"),
    names_sep = "_week",
    values_to = "price"
  ) %>%
  mutate(
    year = as.integer(year),
    week = as.integer(week)
  )

stock_df_long
## # A tibble: 525 × 4
##    company  year  week price
##    <chr>   <int> <int> <dbl>
##  1 Amazon   2019     1 1848.
##  2 Amazon   2019     2 1641.
##  3 Amazon   2019     3 1696.
##  4 Amazon   2019     4 1671.
##  5 Amazon   2019     5 1626.
##  6 Amazon   2019     6 1588.
##  7 Amazon   2019     7 1608.
##  8 Amazon   2019     8 1632.
##  9 Amazon   2019     9 1672.
## 10 Amazon   2019    10 1621.
## # ℹ 515 more rows

Part 2: Bike Sales Analysis

Import and wrangle bike data

bike_orderlines <- read_csv("bike_orderlines.csv")

bike_orderlines_wrangled_tbl <- bike_orderlines %>%
  select(order_date, category_1, category_2, total_price) %>%
  mutate(order_date = ymd(order_date))

head(bike_orderlines_wrangled_tbl)
## # A tibble: 6 × 4
##   order_date category_1 category_2    total_price
##   <date>     <chr>      <chr>               <dbl>
## 1 2011-01-07 Mountain   Over Mountain        6070
## 2 2011-01-07 Mountain   Over Mountain        5970
## 3 2011-01-10 Mountain   Trail                2770
## 4 2011-01-10 Mountain   Over Mountain        5970
## 5 2011-01-10 Road       Elite Road          10660
## 6 2011-01-10 Mountain   Over Mountain        3200

Generate total sales by year

bike_sales_y <- bike_orderlines_wrangled_tbl %>%
  mutate(year = year(order_date)) %>% 
  group_by(year) %>% 
  summarise(sales = sum(total_price)) %>% 
  ungroup()

bike_sales_y
## # A tibble: 5 × 2
##    year    sales
##   <dbl>    <dbl>
## 1  2011 11292885
## 2  2012 12163075
## 3  2013 16480775
## 4  2014 13924085
## 5  2015 17171510

Plot: Revenue by Year

bike_sales_y %>% 
  ggplot(aes(x = year, y = sales, color = sales)) +
  geom_point(size = 5) + 
  geom_line(linewidth = 2) + 
  geom_smooth(method = "lm", formula = 'y ~ x', se = FALSE) +
  expand_limits(y = c(0, 20e6)) + 
  scale_colour_continuous(low = "red", high = "black", 
                          labels = scales::dollar_format(scale = 1/1e6, suffix = "M")) +
  scale_y_continuous(labels = scales::dollar_format(scale = 1/1e6, suffix = "M")) + 
  labs(
    title = "Revenue",
    subtitle = "Sales are trending up and to the right!",
    x = "year", 
    y = "Sales (Millions)",
    color = "Rev ($M)",
    caption = "Total sales from 2011 to 2015"
  )
## Warning: The following aesthetics were dropped during statistical transformation:
## colour.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

Generate total sales by category 2

revenue_by_category2_tbl <- bike_orderlines_wrangled_tbl %>% 
  select(category_2, total_price) %>% 
  group_by(category_2) %>% 
  summarise(revenue = sum(total_price)) %>% 
  ungroup()

revenue_by_category2_tbl
## # A tibble: 9 × 2
##   category_2          revenue
##   <chr>                 <dbl>
## 1 Cross Country Race 19224630
## 2 Cyclocross          2108120
## 3 Elite Road         15334665
## 4 Endurance Road     10381060
## 5 Fat Bike            1052620
## 6 Over Mountain       7571270
## 7 Sport               1932755
## 8 Trail               9373460
## 9 Triathalon          4053750

Plot: Revenue by Category

revenue_by_category2_tbl %>% 
  mutate(category_2 = category_2 %>% as_factor() %>% fct_reorder(desc(revenue))) %>% 
  ggplot(aes(category_2, revenue)) +
  geom_col(fill = "blue") +
  coord_flip() +
  scale_y_continuous(labels = scales::comma) +
  labs(
    title = "Revenue by Bike Category",
    x = "category_2",
    y = "revenue"
  ) +
  theme_minimal()

Conclusion

The analysis shows: