#Remember to install packages before loading them with library()

library(tidyverse) ## A set of tools for Data manipulation and visualization
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate) ## for date time manipulation
library(scales) ## Formatting numbers and values
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
#library(hrbrthemes)# For changing ggplot theme
library(extrafont) # More font options
## Registering fonts with R

#Q1 - view data

sales <- read.csv("~/Desktop/sales.csv")


glimpse(sales)
## Rows: 1,000
## Columns: 17
## $ Invoice.ID              <chr> "750-67-8428", "226-31-3081", "631-41-3108", "…
## $ Branch                  <chr> "A", "C", "A", "A", "A", "C", "A", "C", "A", "…
## $ City                    <chr> "Yangon", "Naypyitaw", "Yangon", "Yangon", "Ya…
## $ Customer.type           <chr> "Member", "Normal", "Normal", "Member", "Norma…
## $ Gender                  <chr> "Female", "Female", "Male", "Male", "Male", "M…
## $ Product.line            <chr> "Health and beauty", "Electronic accessories",…
## $ Unit.price              <dbl> 74.69, 15.28, 46.33, 58.22, 86.31, 85.39, 68.8…
## $ Quantity                <int> 7, 5, 7, 8, 7, 7, 6, 10, 2, 3, 4, 4, 5, 10, 10…
## $ Tax.5.                  <dbl> 26.1415, 3.8200, 16.2155, 23.2880, 30.2085, 29…
## $ Total                   <dbl> 548.9715, 80.2200, 340.5255, 489.0480, 634.378…
## $ Date                    <chr> "1/5/2019", "3/8/2019", "3/3/2019", "1/27/2019…
## $ Time                    <chr> "13:08", "10:29", "13:23", "20:33", "10:37", "…
## $ Payment                 <chr> "Ewallet", "Cash", "Credit card", "Ewallet", "…
## $ cogs                    <dbl> 522.83, 76.40, 324.31, 465.76, 604.17, 597.73,…
## $ gross.margin.percentage <dbl> 4.761905, 4.761905, 4.761905, 4.761905, 4.7619…
## $ gross.income            <dbl> 26.1415, 3.8200, 16.2155, 23.2880, 30.2085, 29…
## $ Rating                  <dbl> 9.1, 9.6, 7.4, 8.4, 5.3, 4.1, 5.8, 8.0, 7.2, 5…
# Q1 - view data

sales <- read.csv("~/Desktop/sales.csv")

# Add the three requested columns exactly as specified
sales <- sales %>%
  mutate(
    # Extract hour as integer from 'time' column
    hours_in_digit = as.integer(substr(Time, 1, 2)),
    
    # Convert date to Date format using mdy()
    date = mdy(Date),
    
    # Extract weekday abbreviation (Mon, Sat, etc.)
    weekday = wday(date, label = TRUE, abbr = TRUE)
  )
glimpse(sales)
## Rows: 1,000
## Columns: 20
## $ Invoice.ID              <chr> "750-67-8428", "226-31-3081", "631-41-3108", "…
## $ Branch                  <chr> "A", "C", "A", "A", "A", "C", "A", "C", "A", "…
## $ City                    <chr> "Yangon", "Naypyitaw", "Yangon", "Yangon", "Ya…
## $ Customer.type           <chr> "Member", "Normal", "Normal", "Member", "Norma…
## $ Gender                  <chr> "Female", "Female", "Male", "Male", "Male", "M…
## $ Product.line            <chr> "Health and beauty", "Electronic accessories",…
## $ Unit.price              <dbl> 74.69, 15.28, 46.33, 58.22, 86.31, 85.39, 68.8…
## $ Quantity                <int> 7, 5, 7, 8, 7, 7, 6, 10, 2, 3, 4, 4, 5, 10, 10…
## $ Tax.5.                  <dbl> 26.1415, 3.8200, 16.2155, 23.2880, 30.2085, 29…
## $ Total                   <dbl> 548.9715, 80.2200, 340.5255, 489.0480, 634.378…
## $ Date                    <chr> "1/5/2019", "3/8/2019", "3/3/2019", "1/27/2019…
## $ Time                    <chr> "13:08", "10:29", "13:23", "20:33", "10:37", "…
## $ Payment                 <chr> "Ewallet", "Cash", "Credit card", "Ewallet", "…
## $ cogs                    <dbl> 522.83, 76.40, 324.31, 465.76, 604.17, 597.73,…
## $ gross.margin.percentage <dbl> 4.761905, 4.761905, 4.761905, 4.761905, 4.7619…
## $ gross.income            <dbl> 26.1415, 3.8200, 16.2155, 23.2880, 30.2085, 29…
## $ Rating                  <dbl> 9.1, 9.6, 7.4, 8.4, 5.3, 4.1, 5.8, 8.0, 7.2, 5…
## $ hours_in_digit          <int> 13, 10, 13, 20, 10, 18, 14, 11, 17, 13, 18, 17…
## $ date                    <date> 2019-01-05, 2019-03-08, 2019-03-03, 2019-01-2…
## $ weekday                 <ord> Sat, Fri, Sun, Sun, Fri, Mon, Mon, Sun, Thu, W…
# Q1 - view data

sales <- read.csv("~/Desktop/sales.csv")

# FIRST: Check what columns you actually have
print("Column names in your dataset:")
## [1] "Column names in your dataset:"
colnames(sales)
##  [1] "Invoice.ID"              "Branch"                 
##  [3] "City"                    "Customer.type"          
##  [5] "Gender"                  "Product.line"           
##  [7] "Unit.price"              "Quantity"               
##  [9] "Tax.5."                  "Total"                  
## [11] "Date"                    "Time"                   
## [13] "Payment"                 "cogs"                   
## [15] "gross.margin.percentage" "gross.income"           
## [17] "Rating"
print("First few rows:")
## [1] "First few rows:"
head(sales)
##    Invoice.ID Branch      City Customer.type Gender           Product.line
## 1 750-67-8428      A    Yangon        Member Female      Health and beauty
## 2 226-31-3081      C Naypyitaw        Normal Female Electronic accessories
## 3 631-41-3108      A    Yangon        Normal   Male     Home and lifestyle
## 4 123-19-1176      A    Yangon        Member   Male      Health and beauty
## 5 373-73-7910      A    Yangon        Normal   Male      Sports and travel
## 6 699-14-3026      C Naypyitaw        Normal   Male Electronic accessories
##   Unit.price Quantity  Tax.5.    Total      Date  Time     Payment   cogs
## 1      74.69        7 26.1415 548.9715  1/5/2019 13:08     Ewallet 522.83
## 2      15.28        5  3.8200  80.2200  3/8/2019 10:29        Cash  76.40
## 3      46.33        7 16.2155 340.5255  3/3/2019 13:23 Credit card 324.31
## 4      58.22        8 23.2880 489.0480 1/27/2019 20:33     Ewallet 465.76
## 5      86.31        7 30.2085 634.3785  2/8/2019 10:37     Ewallet 604.17
## 6      85.39        7 29.8865 627.6165 3/25/2019 18:30     Ewallet 597.73
##   gross.margin.percentage gross.income Rating
## 1                4.761905      26.1415    9.1
## 2                4.761905       3.8200    9.6
## 3                4.761905      16.2155    7.4
## 4                4.761905      23.2880    8.4
## 5                4.761905      30.2085    5.3
## 6                4.761905      29.8865    4.1
# Check if there's a time-related column
print("Checking for time-related columns:")
## [1] "Checking for time-related columns:"
grep("time|Time|hour|Hour", colnames(sales), value = TRUE, ignore.case = TRUE)
## [1] "Time"
# Create a horizontal bar graph of total sales by weekday

# First, create weekday column using the correct column names
sales <- sales %>%
  mutate(
    # Use the correct column name: Date (not date)
    date_formatted = mdy(Date),  # Capital D!
    weekday = wday(date_formatted, label = TRUE, abbr = TRUE)
  )

# Check if it worked
print("Weekday values created:")
## [1] "Weekday values created:"
table(sales$weekday)
## 
## Sun Mon Tue Wed Thu Fri Sat 
## 133 125 158 143 138 139 164
# Calculate total sales by weekday using the Total column
weekday_sales <- sales %>%
  group_by(weekday) %>%
  summarise(
    total_sales = sum(Total, na.rm = TRUE)  # Using Total column from your data
  ) %>%
  arrange(total_sales)

# View the summary
print("Sales by weekday:")
## [1] "Sales by weekday:"
weekday_sales
## # A tibble: 7 × 2
##   weekday total_sales
##   <ord>         <dbl>
## 1 Mon          37899.
## 2 Wed          43731.
## 3 Fri          43926.
## 4 Sun          44458.
## 5 Thu          45349.
## 6 Tue          51482.
## 7 Sat          56121.
# Create COLORFUL horizontal bar graph
library(RColorBrewer)

ggplot(weekday_sales, aes(x = total_sales, y = reorder(weekday, total_sales), 
                          fill = weekday)) +
  geom_bar(stat = "identity", width = 0.7) +
  geom_text(aes(label = scales::dollar(total_sales, accuracy = 1)), 
            hjust = -0.1, size = 3.5, fontface = "bold") +
  scale_fill_brewer(palette = "Set3") +  # Colorful palette
  labs(
    title = "Total Sales by Weekday",
    x = "Total Sales ($)",
    y = "Weekday",
    caption = "Data: Sales Dataset"
  ) +
  scale_x_continuous(labels = scales::dollar, expand = expansion(mult = c(0, 0.1))) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold", size = 16, color = "darkblue"),
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 11),
    panel.grid.major.y = element_blank(),
    panel.grid.minor.y = element_blank(),
    legend.position = "none"
  )

# Alternative: Using gross.income instead of Total
# Using gross.income column
weekday_income <- sales %>%
  group_by(weekday) %>%
  summarise(
    total_income = sum(gross.income, na.rm = TRUE)
  ) %>%
  arrange(total_income)

ggplot(weekday_income, aes(x = total_income, y = reorder(weekday, total_income), 
                          fill = weekday)) +
  geom_bar(stat = "identity", width = 0.7, color = "white", size = 0.5) +
  geom_text(aes(label = scales::dollar(total_income, accuracy = 1)), 
            hjust = -0.1, size = 3.5, fontface = "bold") +
  scale_fill_viridis_d(option = "D") +  # Another colorful palette
  labs(
    title = "Gross Income by Weekday",
    x = "Gross Income ($)",
    y = "Weekday",
    caption = "Data: Sales Dataset"
  ) +
  scale_x_continuous(labels = scales::dollar, expand = expansion(mult = c(0, 0.1))) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold", size = 16, color = "darkgreen"),
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 11),
    panel.grid.major.y = element_blank(),
    panel.grid.minor.y = element_blank(),
    legend.position = "none"
  )
## Warning in geom_bar(stat = "identity", width = 0.7, color = "white", size =
## 0.5): Ignoring unknown parameters: `size`

#Sales By Hour Across Days of Week

library(tidyverse)
library(lubridate)
library(scales)

# 1. Load the data
sales <- read.csv("~/Desktop/sales.csv")

# 2. View the data structure
cat("=== Data Structure ===\n")
## === Data Structure ===
glimpse(sales)
## Rows: 1,000
## Columns: 17
## $ Invoice.ID              <chr> "750-67-8428", "226-31-3081", "631-41-3108", "…
## $ Branch                  <chr> "A", "C", "A", "A", "A", "C", "A", "C", "A", "…
## $ City                    <chr> "Yangon", "Naypyitaw", "Yangon", "Yangon", "Ya…
## $ Customer.type           <chr> "Member", "Normal", "Normal", "Member", "Norma…
## $ Gender                  <chr> "Female", "Female", "Male", "Male", "Male", "M…
## $ Product.line            <chr> "Health and beauty", "Electronic accessories",…
## $ Unit.price              <dbl> 74.69, 15.28, 46.33, 58.22, 86.31, 85.39, 68.8…
## $ Quantity                <int> 7, 5, 7, 8, 7, 7, 6, 10, 2, 3, 4, 4, 5, 10, 10…
## $ Tax.5.                  <dbl> 26.1415, 3.8200, 16.2155, 23.2880, 30.2085, 29…
## $ Total                   <dbl> 548.9715, 80.2200, 340.5255, 489.0480, 634.378…
## $ Date                    <chr> "1/5/2019", "3/8/2019", "3/3/2019", "1/27/2019…
## $ Time                    <chr> "13:08", "10:29", "13:23", "20:33", "10:37", "…
## $ Payment                 <chr> "Ewallet", "Cash", "Credit card", "Ewallet", "…
## $ cogs                    <dbl> 522.83, 76.40, 324.31, 465.76, 604.17, 597.73,…
## $ gross.margin.percentage <dbl> 4.761905, 4.761905, 4.761905, 4.761905, 4.7619…
## $ gross.income            <dbl> 26.1415, 3.8200, 16.2155, 23.2880, 30.2085, 29…
## $ Rating                  <dbl> 9.1, 9.6, 7.4, 8.4, 5.3, 4.1, 5.8, 8.0, 7.2, 5…
cat("\n")
# 3. Add the three required columns
sales <- sales %>%
  mutate(
    hours_in_digit = as.integer(substr(Time, 1, 2)),
    date = mdy(Date),
    weekday = wday(date, label = TRUE, abbr = TRUE, week_start = 1)
  )

cat("=== New Columns Added ===\n")
## === New Columns Added ===
head(sales %>% select(Date, Time, hours_in_digit, date, weekday))
##        Date  Time hours_in_digit       date weekday
## 1  1/5/2019 13:08             13 2019-01-05     Sat
## 2  3/8/2019 10:29             10 2019-03-08     Fri
## 3  3/3/2019 13:23             13 2019-03-03     Sun
## 4 1/27/2019 20:33             20 2019-01-27     Sun
## 5  2/8/2019 10:37             10 2019-02-08     Fri
## 6 3/25/2019 18:30             18 2019-03-25     Mon
cat("\n")
# 4. Calculate sales by hour and weekday
sales_hour_day <- sales %>%
  group_by(weekday, hours_in_digit) %>%
  summarise(
    TotalSales = sum(Total, na.rm = TRUE),
    .groups = "drop"
  )

cat("=== Sales Summary by Hour and Weekday ===\n")
## === Sales Summary by Hour and Weekday ===
head(sales_hour_day)
## # A tibble: 6 × 3
##   weekday hours_in_digit TotalSales
##   <ord>            <int>      <dbl>
## 1 Mon                 10      3738.
## 2 Mon                 11      2873.
## 3 Mon                 12      4726.
## 4 Mon                 13      3759.
## 5 Mon                 14      2004.
## 6 Mon                 15      5141.
cat("\n")
# 5. SALES BY HOUR ACROSS DAYS OF WEEK CHART
cat("=== Creating Chart ===\n")
## === Creating Chart ===
ggplot(sales_hour_day, aes(x = hours_in_digit, y = TotalSales)) +
  geom_line(linewidth = 1, color = "steelblue") +
  geom_point(size = 2, color = "darkred") +
  facet_wrap(~ weekday, ncol = 4) +
  scale_x_continuous(
    breaks = c(10, 12, 14, 16, 18, 20),
    limits = c(9, 21),
    name = "Hour of Day"
  ) +
  scale_y_continuous(
    labels = dollar_format(),
    expand = expansion(mult = c(0.05, 0.15)),
    name = "Total Sales ($)"
  ) +
  labs(
    title = "Sales by Hour Across Days of the Week"
  ) +
  theme_minimal() +
  theme(
    legend.position = "none",
    strip.text = element_text(size = 11, face = "bold"),
    strip.background = element_rect(fill = "lightgray", color = "gray"),
    axis.text.x = element_text(angle = 0, hjust = 0.5, size = 10),
    axis.text.y = element_text(size = 9),
    axis.title = element_text(size = 12, face = "bold"),
    plot.title = element_text(hjust = 0.5, face = "bold", size = 16),
    panel.spacing = unit(2, "lines"),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
  )

library(tidyverse)
library(lubridate)
library(scales)

# Load the data
sales <- read.csv("~/Desktop/sales.csv")

# Add the three required columns
sales <- sales %>%
  mutate(
    hours_in_digit = as.integer(substr(Time, 1, 2)),
    date = mdy(Date),
    weekday = wday(date, label = TRUE, abbr = TRUE, week_start = 1)
  )

# Calculate sales by hour and weekday
sales_hour_day <- sales %>%
  group_by(weekday, hours_in_digit) %>%
  summarise(
    TotalSales = sum(Total, na.rm = TRUE),
    .groups = "drop"
  )

# Create the chart with consistent styling
ggplot(sales_hour_day, aes(x = hours_in_digit, y = TotalSales)) +
  # Colorful lines for each panel
  geom_line(aes(color = weekday), linewidth = 1.2) +
  geom_point(aes(color = weekday), size = 2.5) +
  
  # Facet by weekday
  facet_wrap(~ weekday, ncol = 4) +
  
  # Custom color palette
  scale_color_brewer(palette = "Set2", guide = "none") +
  
  # Consistent x-axis for ALL panels
  scale_x_continuous(
    breaks = c(10, 12, 14, 16, 18, 20),
    labels = c("10", "12", "14", "16", "18", "20"),  # Explicit labels
    limits = c(9, 21),
    name = "Hour of Day"
  ) +
  
  # Consistent y-axis for ALL panels
  scale_y_continuous(
    labels = dollar_format(),
    breaks = scales::pretty_breaks(n = 6),
    name = "Total Sales ($)"
  ) +
  
  # Labels
  labs(
    title = "Sales by Hour Across Days of the Week"
  ) +
  
  # Consistent theme
  theme_minimal() +
  theme(
    # No legend
    legend.position = "none",
    
    # Panel headers
    strip.text = element_text(size = 11, face = "bold", color = "black"),
    strip.background = element_rect(fill = "lightgray", color = "gray"),
    
    # X-axis - FIXED to show on ALL panels
    axis.text.x = element_text(
      size = 10,
      color = "black",
      angle = 0,
      hjust = 0.5,
      vjust = 0.5
    ),
    
    # Y-axis
    axis.text.y = element_text(
      size = 9,
      color = "black"
    ),
    
    # Axis titles
    axis.title.x = element_text(
      size = 12,
      face = "bold",
      margin = margin(t = 10)
    ),
    axis.title.y = element_text(
      size = 12,
      face = "bold",
      margin = margin(r = 10)
    ),
    
    # Plot title
    plot.title = element_text(
      hjust = 0.5,
      face = "bold",
      size = 16,
      margin = margin(b = 15)
    ),
    
    # Panel spacing
    panel.spacing = unit(1.5, "lines"),
    
    # Grid lines
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(color = "gray90", linewidth = 0.5),
    
    # Panel borders
    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
  )