#Remember to install packages before loading them with library()

library(tidyverse) ## A set of tools for Data manipulation and visualization
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate) ## for date time manipulation
library(scales) ## Formatting numbers and values
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
#library(hrbrthemes)# For changing ggplot theme
library(extrafont) # More font options
## Warning: package 'extrafont' was built under R version 4.5.2
## Registering fonts with R

#Q1 - view data

sales <- read.csv("sales.csv")

sales %>% View()
glimpse(sales)
## Rows: 1,000
## Columns: 17
## $ Invoice.ID              <chr> "750-67-8428", "226-31-3081", "631-41-3108", "…
## $ Branch                  <chr> "A", "C", "A", "A", "A", "C", "A", "C", "A", "…
## $ City                    <chr> "Yangon", "Naypyitaw", "Yangon", "Yangon", "Ya…
## $ Customer.type           <chr> "Member", "Normal", "Normal", "Member", "Norma…
## $ Gender                  <chr> "Female", "Female", "Male", "Male", "Male", "M…
## $ Product.line            <chr> "Health and beauty", "Electronic accessories",…
## $ Unit.price              <dbl> 74.69, 15.28, 46.33, 58.22, 86.31, 85.39, 68.8…
## $ Quantity                <int> 7, 5, 7, 8, 7, 7, 6, 10, 2, 3, 4, 4, 5, 10, 10…
## $ Tax.5.                  <dbl> 26.1415, 3.8200, 16.2155, 23.2880, 30.2085, 29…
## $ Total                   <dbl> 548.9715, 80.2200, 340.5255, 489.0480, 634.378…
## $ Date                    <chr> "1/5/2019", "3/8/2019", "3/3/2019", "1/27/2019…
## $ Time                    <chr> "13:08", "10:29", "13:23", "20:33", "10:37", "…
## $ Payment                 <chr> "Ewallet", "Cash", "Credit card", "Ewallet", "…
## $ cogs                    <dbl> 522.83, 76.40, 324.31, 465.76, 604.17, 597.73,…
## $ gross.margin.percentage <dbl> 4.761905, 4.761905, 4.761905, 4.761905, 4.7619…
## $ gross.income            <dbl> 26.1415, 3.8200, 16.2155, 23.2880, 30.2085, 29…
## $ Rating                  <dbl> 9.1, 9.6, 7.4, 8.4, 5.3, 4.1, 5.8, 8.0, 7.2, 5…
library(tidyverse)
library(lubridate)

sales <- read_csv("sales.csv")
## Rows: 1000 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (8): Invoice.ID, Branch, City, Customer.type, Gender, Product.line, Dat...
## dbl  (8): Unit.price, Quantity, Tax.5., Total, cogs, gross.margin.percentage...
## time (1): Time
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
sales <- sales %>%
  mutate(
    # 1. Hour from Time column (HH:MM)
    Hour = as.integer(substr(Time, 1, 2)),
    
    # 2. Convert Date to proper Date object
    Date = mdy(Date),

    # 3. Weekday (Mon, Tue, etc.)
    Weekday = wday(Date, label = TRUE, abbr = TRUE)
  )
head(sales)
## # A tibble: 6 × 19
##   Invoice.ID  Branch City  Customer.type Gender Product.line Unit.price Quantity
##   <chr>       <chr>  <chr> <chr>         <chr>  <chr>             <dbl>    <dbl>
## 1 750-67-8428 A      Yang… Member        Female Health and …       74.7        7
## 2 226-31-3081 C      Nayp… Normal        Female Electronic …       15.3        5
## 3 631-41-3108 A      Yang… Normal        Male   Home and li…       46.3        7
## 4 123-19-1176 A      Yang… Member        Male   Health and …       58.2        8
## 5 373-73-7910 A      Yang… Normal        Male   Sports and …       86.3        7
## 6 699-14-3026 C      Nayp… Normal        Male   Electronic …       85.4        7
## # ℹ 11 more variables: Tax.5. <dbl>, Total <dbl>, Date <date>, Time <time>,
## #   Payment <chr>, cogs <dbl>, gross.margin.percentage <dbl>,
## #   gross.income <dbl>, Rating <dbl>, Hour <int>, Weekday <ord>
library(dplyr)
library(ggplot2)
library(forcats)

# Summarize total sales by weekday
sales_by_day <- sales %>%
  group_by(Weekday) %>%
  summarise(TotalSales = sum(Total, na.rm = TRUE)) %>%
  arrange(desc(TotalSales))  # optional, ensures the data frame is also sorted

# Plot with y-axis in descending order of TotalSales
ggplot(sales_by_day, aes(y = fct_reorder(Weekday, TotalSales), x = TotalSales, fill = Weekday)) +
  geom_col() +
  geom_text(aes(label = round(TotalSales, 1)),
            hjust = 1.1,          # pushes text *inside* the bar
            color = "white",      # high contrast for readability
            size = 4) +
  scale_fill_brewer(palette = "Set3") +
  labs(
    title = "Total Sales by Day of Week",
    x = "Total Sales",
    y = "Day of Week"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

library(dplyr)
library(tidyr)
library(ggplot2)

# Summarize sales by Hour and Weekday
sales_hour_day <- sales %>%
  group_by(Weekday, Hour) %>%
  summarise(TotalSales = sum(Total), .groups = "drop")

# Faceted line chart with linewidth instead of size
ggplot(sales_hour_day, aes(x = Hour, y = TotalSales, group = Weekday, color = Weekday)) +
  geom_line(linewidth = 1) +
  geom_point(size = 2) +
  facet_wrap(~ Weekday, ncol = 3) +
  scale_color_brewer(palette = "Set3") +
  labs(
    title = "Sales by Hour Across Days of the Week",
    x = "Hour of Day",
    y = "Total Sales"
  ) +
  theme_minimal() +
  theme(
    legend.position = "none",
    strip.text = element_text(size = 12, face = "bold")
  )