options(repos = c(CRAN = "https://cran.rstudio.com"))
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(completejourney)
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
library(ggplot2)
library(dplyr)
library(lubridate)
library(stringr)
install.packages("viridis")
## Installing package into 'C:/Users/Senge/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'viridis' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Senge\AppData\Local\Temp\RtmpCe7YZP\downloaded_packages
products %>%
filter(str_detect(product_category, regex("TURKEY", ignore_case = TRUE))) %>%
inner_join(transactions_sample, by = "product_id") %>%
mutate(month = floor_date(as.Date(transaction_timestamp), "month")) %>%
group_by(month) %>%
summarize(total_sales = sum(sales_value, na.rm = TRUE)) %>%
ggplot(aes(x = month, y = total_sales)) +
geom_line(color = "brown", size = 1) +
geom_point(color = "red", size = 2) +
scale_y_continuous("Total Sales for Turkey", labels = scales::dollar) +
scale_x_date("Month") +
ggtitle("Monthly Turkey Sales",
subtitle = "Sales Dollars spent on Turkey throughout 2017")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

products %>%
filter(str_detect(product_category, regex("VEGETABLE", ignore_case = TRUE))) %>%
inner_join(transactions_sample, by = "product_id") %>%
inner_join(demographics, by = "household_id") %>%
group_by(marital_status) %>%
summarize(total_spent = sum(sales_value, na.rm = TRUE)) %>%
ggplot(aes(x = marital_status, y = total_spent)) +
geom_col(fill = "orange") +
scale_y_continuous("Total Money Spent on Vegetables", labels = scales::dollar) +
scale_x_discrete("Marital Status") +
ggtitle("Average Vegetable Consumption by Marital Status",
subtitle = "Comparison of average vegetable spending between married and unmarried households in 2017")

library(viridis)
## Loading required package: viridisLite
vitamin_data <- products %>%
filter(str_detect(product_category, regex("VITAMINS", ignore_case = TRUE))) %>%
inner_join(transactions_sample, by = "product_id") %>%
mutate(month = floor_date(as.Date(transaction_timestamp), "month"),
year = year(month),
quarter = quarter(month)) %>%
group_by(year, quarter) %>%
summarize(total_spent = sum(sales_value, na.rm = TRUE))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
head(vitamin_data)
## # A tibble: 4 × 3
## # Groups: year [1]
## year quarter total_spent
## <dbl> <int> <dbl>
## 1 2017 1 230.
## 2 2017 2 114.
## 3 2017 3 154.
## 4 2017 4 109.
vitamin_data <- vitamin_data %>%
mutate(percentage = total_spent / sum(total_spent) * 100,
quarter_label = paste("Q", quarter, "-", year))
ggplot(vitamin_data, aes(x = "", y = percentage, fill = factor(quarter_label))) +
geom_bar(stat = "identity", width = 1) +
coord_polar(theta = "y") +
scale_fill_viridis(discrete = TRUE, option = "C") +
labs(title = "Vitamin Spending by Quarter",
fill = "Quarter") +
theme_void() +
theme(legend.position = "bottom")
