#Remember to install packages before loading them with library()

library(tidyverse) ## A set of tools for Data manipulation and visualization
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate) ## for date time manipulation
library(scales) ## Formatting numbers and values
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
#library(hrbrthemes)# For changing ggplot theme
library(extrafont) # More font options
## Registering fonts with R

#Q1 - view data

# Read the sales data

sales <- read.csv("sales.csv", stringsAsFactors = FALSE)

# Extract Hours from Time column

sales$Hours <- substr(sales$Time, 1, 2) %>% as.integer()

# Convert Date column and add Weekday

sales$date <- mdy(sales$Date)
sales$Weekday <- weekdays(sales$date)

# Save updated data (optional)

write.csv(sales, "sales_updated.csv", row.names = FALSE)

# Quick view

head(sales)
##    Invoice.ID Branch      City Customer.type Gender           Product.line
## 1 750-67-8428      A    Yangon        Member Female      Health and beauty
## 2 226-31-3081      C Naypyitaw        Normal Female Electronic accessories
## 3 631-41-3108      A    Yangon        Normal   Male     Home and lifestyle
## 4 123-19-1176      A    Yangon        Member   Male      Health and beauty
## 5 373-73-7910      A    Yangon        Normal   Male      Sports and travel
## 6 699-14-3026      C Naypyitaw        Normal   Male Electronic accessories
##   Unit.price Quantity  Tax.5.    Total      Date  Time     Payment   cogs
## 1      74.69        7 26.1415 548.9715  1/5/2019 13:08     Ewallet 522.83
## 2      15.28        5  3.8200  80.2200  3/8/2019 10:29        Cash  76.40
## 3      46.33        7 16.2155 340.5255  3/3/2019 13:23 Credit card 324.31
## 4      58.22        8 23.2880 489.0480 1/27/2019 20:33     Ewallet 465.76
## 5      86.31        7 30.2085 634.3785  2/8/2019 10:37     Ewallet 604.17
## 6      85.39        7 29.8865 627.6165 3/25/2019 18:30     Ewallet 597.73
##   gross.margin.percentage gross.income Rating Hours       date  Weekday
## 1                4.761905      26.1415    9.1    13 2019-01-05 Saturday
## 2                4.761905       3.8200    9.6    10 2019-03-08   Friday
## 3                4.761905      16.2155    7.4    13 2019-03-03   Sunday
## 4                4.761905      23.2880    8.4    20 2019-01-27   Sunday
## 5                4.761905      30.2085    5.3    10 2019-02-08   Friday
## 6                4.761905      29.8865    4.1    18 2019-03-25   Monday
str(sales$Hours)
##  int [1:1000] 13 10 13 20 10 18 14 11 17 13 ...
library(tidyverse)
library(lubridate)

# Read file from local directory
sales <- read.csv("sales.csv", stringsAsFactors = FALSE)

# --- Extract Hours using substr() and convert to integer ---
sales$Hours <- substr(sales$Time, 1, 2) %>% 
  as.integer()

# --- Convert Date column using mdy ---
sales$date <- mdy(sales$Date)

# --- Add weekday ---
sales$Weekday <- weekdays(sales$date)

# --- Save updated file ---
write.csv(sales, "sales_updated.csv", row.names = FALSE)

head(sales)
##    Invoice.ID Branch      City Customer.type Gender           Product.line
## 1 750-67-8428      A    Yangon        Member Female      Health and beauty
## 2 226-31-3081      C Naypyitaw        Normal Female Electronic accessories
## 3 631-41-3108      A    Yangon        Normal   Male     Home and lifestyle
## 4 123-19-1176      A    Yangon        Member   Male      Health and beauty
## 5 373-73-7910      A    Yangon        Normal   Male      Sports and travel
## 6 699-14-3026      C Naypyitaw        Normal   Male Electronic accessories
##   Unit.price Quantity  Tax.5.    Total      Date  Time     Payment   cogs
## 1      74.69        7 26.1415 548.9715  1/5/2019 13:08     Ewallet 522.83
## 2      15.28        5  3.8200  80.2200  3/8/2019 10:29        Cash  76.40
## 3      46.33        7 16.2155 340.5255  3/3/2019 13:23 Credit card 324.31
## 4      58.22        8 23.2880 489.0480 1/27/2019 20:33     Ewallet 465.76
## 5      86.31        7 30.2085 634.3785  2/8/2019 10:37     Ewallet 604.17
## 6      85.39        7 29.8865 627.6165 3/25/2019 18:30     Ewallet 597.73
##   gross.margin.percentage gross.income Rating Hours       date  Weekday
## 1                4.761905      26.1415    9.1    13 2019-01-05 Saturday
## 2                4.761905       3.8200    9.6    10 2019-03-08   Friday
## 3                4.761905      16.2155    7.4    13 2019-03-03   Sunday
## 4                4.761905      23.2880    8.4    20 2019-01-27   Sunday
## 5                4.761905      30.2085    5.3    10 2019-02-08   Friday
## 6                4.761905      29.8865    4.1    18 2019-03-25   Monday
str(sales$Hours)  # confirms integer type
##  int [1:1000] 13 10 13 20 10 18 14 11 17 13 ...
# --- Calculate total sales by weekday ---

weekday_sales <- sales %>%
group_by(Weekday) %>%
summarise(Total_Sales = sum(Total, na.rm = TRUE)) %>%

# Create formatted label for plotting

mutate(Sales_Label = dollar(round(Total_Sales, 2)))

weekday_sales
## # A tibble: 7 × 3
##   Weekday   Total_Sales Sales_Label
##   <chr>           <dbl> <chr>      
## 1 Friday         43926. $43,926.34 
## 2 Monday         37899. $37,899.08 
## 3 Saturday       56121. $56,120.81 
## 4 Sunday         44458. $44,457.89 
## 5 Thursday       45349. $45,349.25 
## 6 Tuesday        51482. $51,482.25 
## 7 Wednesday      43731. $43,731.14
# Reorder Weekday by Total_Sales
weekday_sales <- weekday_sales %>%
  mutate(Weekday = fct_reorder(Weekday, Total_Sales))

# --- Horizontal bar chart with labels inside ---
ggplot(weekday_sales, aes(x = Weekday, y = Total_Sales, fill = Weekday)) +
  geom_col() +
  coord_flip() +
  
  # Labels inside bars
  geom_text(aes(label = Sales_Label),
            hjust = 1.1,
            color = "black",
            size = 4) +
  
  # Different color per weekday
  scale_fill_brewer(palette = "Set3") +
  
  # Dollar formatting on axis
  scale_y_continuous(labels = dollar) +
  
  labs(
    title = "Total Sales by Weekday",
    x = "Weekday",
    y = "Total Sales"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

# --- Total sales by weekday and hour ---

weekday_hour_sales <- sales %>%
group_by(Weekday, Hours) %>%
summarise(Total_Sales = sum(Total, na.rm = TRUE)) %>%
ungroup() %>%
mutate(Sales_Label = dollar(round(Total_Sales, 2)))
## `summarise()` has grouped output by 'Weekday'. You can override using the
## `.groups` argument.
daily_totals <- weekday_hour_sales %>%
group_by(Weekday) %>%
summarise(Day_Total = sum(Total_Sales)) %>%
mutate(Day_Label = dollar(round(Day_Total, 2)))

# Order weekdays by total sales

weekday_order <- daily_totals %>% arrange(Day_Total) %>% pull(Weekday)
weekday_hour_sales$Weekday <- factor(weekday_hour_sales$Weekday, levels = weekday_order)
daily_totals$Weekday <- factor(daily_totals$Weekday, levels = weekday_order)
# --- Stacked bar chart: Total Sales by Weekday and Hour ---

ggplot() +
geom_col(data = weekday_hour_sales,
aes(x = Weekday, y = Total_Sales, fill = factor(Hours)),
position = "stack") +
geom_text(data = weekday_hour_sales,
aes(x = Weekday, y = Total_Sales, label = Sales_Label),
position = position_stack(vjust = 0.5),
size = 3, color = "white") +
geom_text(data = daily_totals,
aes(x = Weekday, y = Day_Total, label = Day_Label),
inherit.aes = FALSE,
vjust = -0.5, size = 3, color = "black") +
scale_fill_viridis_d(name = "Hour") +
scale_y_continuous(labels = dollar) +
labs(title = "Total Sales by Weekday and Hour",
x = "Weekday",
y = "Total Sales") +
theme_minimal() +
theme(legend.position = "right")