options(repos = c(CRAN = "https://cran.rstudio.com"))
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(completejourney)
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
library(ggplot2)
library(dplyr)
library(lubridate)
library(stringr)
install.packages("viridis")
## Installing package into 'C:/Users/Senge/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'viridis' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\Senge\AppData\Local\Temp\RtmpCe7YZP\downloaded_packages
products %>%
  filter(str_detect(product_category, regex("TURKEY", ignore_case = TRUE))) %>% 
  inner_join(transactions_sample, by = "product_id") %>%  
  mutate(month = floor_date(as.Date(transaction_timestamp), "month")) %>% 
  group_by(month) %>%  
  summarize(total_sales = sum(sales_value, na.rm = TRUE)) %>% 
  ggplot(aes(x = month, y = total_sales)) +  
  geom_line(color = "brown", size = 1) +  
  geom_point(color = "red", size = 2) +  
  scale_y_continuous("Total Sales for Turkey", labels = scales::dollar) + 
  scale_x_date("Month") + 
  ggtitle("Monthly Turkey Sales",
          subtitle = "Sales Dollars spent on Turkey throughout 2017")  
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

products %>%
  filter(str_detect(product_category, regex("VEGETABLE", ignore_case = TRUE))) %>%  
  inner_join(transactions_sample, by = "product_id") %>%  
  inner_join(demographics, by = "household_id") %>%  
  group_by(marital_status) %>%  
  summarize(total_spent = sum(sales_value, na.rm = TRUE)) %>%  
  ggplot(aes(x = marital_status, y = total_spent)) +  
  geom_col(fill = "orange") +  
  scale_y_continuous("Total Money Spent on Vegetables", labels = scales::dollar) +  
  scale_x_discrete("Marital Status") +  
  ggtitle("Average Vegetable Consumption by Marital Status",
          subtitle = "Comparison of average vegetable spending between married and unmarried households in 2017") 

library(viridis)
## Loading required package: viridisLite
vitamin_data <- products %>%
  filter(str_detect(product_category, regex("VITAMINS", ignore_case = TRUE))) %>%
  inner_join(transactions_sample, by = "product_id") %>%
  mutate(month = floor_date(as.Date(transaction_timestamp), "month"),
         year = year(month),  
         quarter = quarter(month)) %>% 
  group_by(year, quarter) %>%
  summarize(total_spent = sum(sales_value, na.rm = TRUE))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
head(vitamin_data)
## # A tibble: 4 × 3
## # Groups:   year [1]
##    year quarter total_spent
##   <dbl>   <int>       <dbl>
## 1  2017       1        230.
## 2  2017       2        114.
## 3  2017       3        154.
## 4  2017       4        109.
vitamin_data <- vitamin_data %>%
  mutate(percentage = total_spent / sum(total_spent) * 100,
         quarter_label = paste("Q", quarter, "-", year))  


ggplot(vitamin_data, aes(x = "", y = percentage, fill = factor(quarter_label))) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar(theta = "y") + 
  scale_fill_viridis(discrete = TRUE, option = "C") +  
  labs(title = "Vitamin Spending by Quarter",
       fill = "Quarter") +
  theme_void() +  
  theme(legend.position = "bottom")