This homework analyzes two datasets:
Stock Data Analysis: Weekly closing prices for five US listed companies (Amazon, Apple, Facebook, Google, Microsoft) in 2019. The data is reshaped from wide format to long format for easier analysis.
Bike Sales Analysis: Sales data for bike products, including revenue trends over time and revenue breakdown by product categories.
library(tidyverse)
library(dplyr)
library(ggplot2)
library(lubridate)
stock_df <- read_csv("stock_df.csv")
stock_df
## # A tibble: 5 × 106
## company `2019_week1` `2019_week2` `2019_week3` `2019_week4` `2019_week5`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Amazon 1848. 1641. 1696. 1671. 1626.
## 2 Apple 73.4 38.1 39.2 39.4 41.6
## 3 Facebook 205. 144. 150. 149. 166.
## 4 Google 1337. 1057. 1098. 1091. 1111.
## 5 Microsoft 158. 103. 108. 107. 103.
## # ℹ 100 more variables: `2019_week6` <dbl>, `2019_week7` <dbl>,
## # `2019_week8` <dbl>, `2019_week9` <dbl>, `2019_week10` <dbl>,
## # `2019_week11` <dbl>, `2019_week12` <dbl>, `2019_week13` <dbl>,
## # `2019_week14` <dbl>, `2019_week15` <dbl>, `2019_week16` <dbl>,
## # `2019_week17` <dbl>, `2019_week18` <dbl>, `2019_week19` <dbl>,
## # `2019_week20` <dbl>, `2019_week21` <dbl>, `2019_week22` <dbl>,
## # `2019_week23` <dbl>, `2019_week24` <dbl>, `2019_week25` <dbl>, …
stock_df_long <- stock_df %>%
pivot_longer(
cols = -company,
names_to = c("year", "week"),
names_sep = "_week",
values_to = "price"
) %>%
mutate(
year = as.integer(year),
week = as.integer(week)
)
stock_df_long
## # A tibble: 525 × 4
## company year week price
## <chr> <int> <int> <dbl>
## 1 Amazon 2019 1 1848.
## 2 Amazon 2019 2 1641.
## 3 Amazon 2019 3 1696.
## 4 Amazon 2019 4 1671.
## 5 Amazon 2019 5 1626.
## 6 Amazon 2019 6 1588.
## 7 Amazon 2019 7 1608.
## 8 Amazon 2019 8 1632.
## 9 Amazon 2019 9 1672.
## 10 Amazon 2019 10 1621.
## # ℹ 515 more rows
bike_orderlines <- read_csv("bike_orderlines.csv")
bike_orderlines_wrangled_tbl <- bike_orderlines %>%
select(order_date, category_1, category_2, total_price) %>%
mutate(order_date = ymd(order_date))
head(bike_orderlines_wrangled_tbl)
## # A tibble: 6 × 4
## order_date category_1 category_2 total_price
## <date> <chr> <chr> <dbl>
## 1 2011-01-07 Mountain Over Mountain 6070
## 2 2011-01-07 Mountain Over Mountain 5970
## 3 2011-01-10 Mountain Trail 2770
## 4 2011-01-10 Mountain Over Mountain 5970
## 5 2011-01-10 Road Elite Road 10660
## 6 2011-01-10 Mountain Over Mountain 3200
bike_sales_y <- bike_orderlines_wrangled_tbl %>%
mutate(year = year(order_date)) %>%
group_by(year) %>%
summarise(sales = sum(total_price)) %>%
ungroup()
bike_sales_y
## # A tibble: 5 × 2
## year sales
## <dbl> <dbl>
## 1 2011 11292885
## 2 2012 12163075
## 3 2013 16480775
## 4 2014 13924085
## 5 2015 17171510
bike_sales_y %>%
ggplot(aes(x = year, y = sales, color = sales)) +
geom_point(size = 5) +
geom_line(linewidth = 2) +
geom_smooth(method = "lm", formula = 'y ~ x', se = FALSE) +
expand_limits(y = c(0, 20e6)) +
scale_colour_continuous(low = "red", high = "black",
labels = scales::dollar_format(scale = 1/1e6, suffix = "M")) +
scale_y_continuous(labels = scales::dollar_format(scale = 1/1e6, suffix = "M")) +
labs(
title = "Revenue",
subtitle = "Sales are trending up and to the right!",
x = "year",
y = "Sales (Millions)",
color = "Rev ($M)",
caption = "Total sales from 2011 to 2015"
)
## Warning: The following aesthetics were dropped during statistical transformation:
## colour.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
revenue_by_category2_tbl <- bike_orderlines_wrangled_tbl %>%
select(category_2, total_price) %>%
group_by(category_2) %>%
summarise(revenue = sum(total_price)) %>%
ungroup()
revenue_by_category2_tbl
## # A tibble: 9 × 2
## category_2 revenue
## <chr> <dbl>
## 1 Cross Country Race 19224630
## 2 Cyclocross 2108120
## 3 Elite Road 15334665
## 4 Endurance Road 10381060
## 5 Fat Bike 1052620
## 6 Over Mountain 7571270
## 7 Sport 1932755
## 8 Trail 9373460
## 9 Triathalon 4053750
revenue_by_category2_tbl %>%
mutate(category_2 = category_2 %>% as_factor() %>% fct_reorder(desc(revenue))) %>%
ggplot(aes(category_2, revenue)) +
geom_col(fill = "blue") +
coord_flip() +
scale_y_continuous(labels = scales::comma) +
labs(
title = "Revenue by Bike Category",
x = "category_2",
y = "revenue"
) +
theme_minimal()
The analysis shows: