#Remember to install packages before loading them with library()

library(tidyverse) ## A set of tools for Data manipulation and visualization
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate) ## for date time manipulation
library(scales) ## Formatting numbers and values
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
#library(hrbrthemes)# For changing ggplot theme
library(extrafont) # More font options
## Warning: package 'extrafont' was built under R version 4.5.2
## Registering fonts with R

#Q1 - view data

sales <- read.csv("C:/Users/Nicop/Downloads/RStudio Projects/sales.csv")

glimpse(sales)
## Rows: 1,000
## Columns: 17
## $ Invoice.ID              <chr> "750-67-8428", "226-31-3081", "631-41-3108", "…
## $ Branch                  <chr> "A", "C", "A", "A", "A", "C", "A", "C", "A", "…
## $ City                    <chr> "Yangon", "Naypyitaw", "Yangon", "Yangon", "Ya…
## $ Customer.type           <chr> "Member", "Normal", "Normal", "Member", "Norma…
## $ Gender                  <chr> "Female", "Female", "Male", "Male", "Male", "M…
## $ Product.line            <chr> "Health and beauty", "Electronic accessories",…
## $ Unit.price              <dbl> 74.69, 15.28, 46.33, 58.22, 86.31, 85.39, 68.8…
## $ Quantity                <int> 7, 5, 7, 8, 7, 7, 6, 10, 2, 3, 4, 4, 5, 10, 10…
## $ Tax.5.                  <dbl> 26.1415, 3.8200, 16.2155, 23.2880, 30.2085, 29…
## $ Total                   <dbl> 548.9715, 80.2200, 340.5255, 489.0480, 634.378…
## $ Date                    <chr> "1/5/2019", "3/8/2019", "3/3/2019", "1/27/2019…
## $ Time                    <chr> "13:08", "10:29", "13:23", "20:33", "10:37", "…
## $ Payment                 <chr> "Ewallet", "Cash", "Credit card", "Ewallet", "…
## $ cogs                    <dbl> 522.83, 76.40, 324.31, 465.76, 604.17, 597.73,…
## $ gross.margin.percentage <dbl> 4.761905, 4.761905, 4.761905, 4.761905, 4.7619…
## $ gross.income            <dbl> 26.1415, 3.8200, 16.2155, 23.2880, 30.2085, 29…
## $ Rating                  <dbl> 9.1, 9.6, 7.4, 8.4, 5.3, 4.1, 5.8, 8.0, 7.2, 5…
# 1. Extract hour as a number (from Time column)
sales <- sales %>%
  mutate(
    Hour = as.integer(substr(Time, 1, 2))
  )

# 2. Convert Date column to proper date format
sales <- sales %>%
  mutate(
    Date2 = mdy(Date)
  )

# 3. Create weekday column (Mon, Tue, etc.)
sales <- sales %>%
  mutate(
    Weekday = weekdays(Date2, abbreviate = TRUE)
  )

# View results
glimpse(sales)
## Rows: 1,000
## Columns: 20
## $ Invoice.ID              <chr> "750-67-8428", "226-31-3081", "631-41-3108", "…
## $ Branch                  <chr> "A", "C", "A", "A", "A", "C", "A", "C", "A", "…
## $ City                    <chr> "Yangon", "Naypyitaw", "Yangon", "Yangon", "Ya…
## $ Customer.type           <chr> "Member", "Normal", "Normal", "Member", "Norma…
## $ Gender                  <chr> "Female", "Female", "Male", "Male", "Male", "M…
## $ Product.line            <chr> "Health and beauty", "Electronic accessories",…
## $ Unit.price              <dbl> 74.69, 15.28, 46.33, 58.22, 86.31, 85.39, 68.8…
## $ Quantity                <int> 7, 5, 7, 8, 7, 7, 6, 10, 2, 3, 4, 4, 5, 10, 10…
## $ Tax.5.                  <dbl> 26.1415, 3.8200, 16.2155, 23.2880, 30.2085, 29…
## $ Total                   <dbl> 548.9715, 80.2200, 340.5255, 489.0480, 634.378…
## $ Date                    <chr> "1/5/2019", "3/8/2019", "3/3/2019", "1/27/2019…
## $ Time                    <chr> "13:08", "10:29", "13:23", "20:33", "10:37", "…
## $ Payment                 <chr> "Ewallet", "Cash", "Credit card", "Ewallet", "…
## $ cogs                    <dbl> 522.83, 76.40, 324.31, 465.76, 604.17, 597.73,…
## $ gross.margin.percentage <dbl> 4.761905, 4.761905, 4.761905, 4.761905, 4.7619…
## $ gross.income            <dbl> 26.1415, 3.8200, 16.2155, 23.2880, 30.2085, 29…
## $ Rating                  <dbl> 9.1, 9.6, 7.4, 8.4, 5.3, 4.1, 5.8, 8.0, 7.2, 5…
## $ Hour                    <int> 13, 10, 13, 20, 10, 18, 14, 11, 17, 13, 18, 17…
## $ Date2                   <date> 2019-01-05, 2019-03-08, 2019-03-03, 2019-01-2…
## $ Weekday                 <chr> "Sat", "Fri", "Sun", "Sun", "Fri", "Mon", "Mon…
rainbow_colors <- c(
  "Mon" = "#FF4D4D",
  "Tue" = "#FFA64D",
  "Wed" = "#FFD24D",
  "Thu" = "#57D957",
  "Fri" = "#4D79FF",
  "Sat" = "#B266FF",
  "Sun" = "#FF66A3"
)

sales_by_day <- sales %>%
  group_by(Weekday) %>%
  summarise(TotalSales = sum(Total, na.rm = TRUE)) %>%
  arrange(desc(TotalSales))              # <— THIS sorts HIGH → LOW
  

# Now set factor levels EXACTLY in this sorted order:
sales_by_day$Weekday <- factor(sales_by_day$Weekday,
                               levels = sales_by_day$Weekday)

ggplot(sales_by_day, aes(x = Weekday, y = TotalSales, fill = Weekday)) +
  
  geom_col(width = 0.5) +
  
  geom_text(aes(label = comma(TotalSales)),
            hjust = 1.1, color = "white", size = 4, fontface = "bold") +
  
  coord_flip() +
  
  scale_fill_manual(values = rainbow_colors) +
  
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", size = 18, hjust = 0.5),
    axis.title.y = element_blank(),
    axis.title.x = element_text(size = 12),
    axis.text.y = element_text(size = 12, face = "bold"),
    axis.text.x = element_text(size = 11, color = "gray40"),
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_line(color = "gray85", linewidth = 0.3),
    legend.position = "none"
  ) +
  
  labs(
    title = "Total Sales by Day of Week",
    y = "Total Sales"
  )

# --- Create Hour + Weekday ---
sales <- sales %>%
  mutate(
    Hour    = as.integer(substr(Time, 1, 2)),
    Date_mdy = mdy(Date),
    Weekday = wday(Date_mdy, label = TRUE, abbr = TRUE, week_start = 7)
  )

# --- Sum total sales by weekday + hour ---
sales_summary <- sales %>%
  group_by(Weekday, Hour) %>%
  summarise(TotalSales = sum(Total), .groups = "drop")

# Order days Sun–Sat
sales_summary$Weekday <- factor(
  sales_summary$Weekday,
  levels = c("Sun","Mon","Tue","Wed","Thu","Fri","Sat")
)

# Only label some hours so they’re not on top of each other
hours_to_show <- seq(0, 23, by = 4)   # 0, 4, 8, 12, 16, 20

# --- Final bar chart ---
ggplot(sales_summary,
       aes(x = TotalSales, y = factor(Hour))) +
  geom_col(fill = "steelblue", width = 0.7) +
  facet_wrap(~ Weekday, ncol = 3, scales = "free_x") +
  scale_x_continuous(breaks = pretty_breaks(n = 4)) +
  scale_y_discrete(breaks = as.character(hours_to_show)) +
  labs(
    title = "Total Sales Breakdown by Weekday and Time",
    x = "Total Sales",
    y = "Hour of the Day"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    legend.position = "none",
    strip.text = element_text(size = 13, face = "bold")
  )

sales <- sales %>%
  mutate(
    Date_mdy = mdy(Date),
    Month = floor_date(Date_mdy, unit = "month")
  )

sales_gender <- sales %>%
  group_by(Month, Gender) %>%
  summarise(TotalSales = sum(Total), .groups = "drop")

ggplot(sales_gender, aes(x = Month, y = TotalSales, color = Gender)) +
  geom_line(size = 1.3) +
  geom_point(size = 2.5) +
  scale_x_date(date_labels = "%b", date_breaks = "1 month") +
  theme_minimal(base_size = 14)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.