#Remember to install packages before loading them with library()
library(tidyverse) ## A set of tools for Data manipulation and visualization
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate) ## for date time manipulation
library(scales) ## Formatting numbers and values
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
#library(hrbrthemes)# For changing ggplot theme
library(extrafont) # More font options
## Registering fonts with R
#Q1 - view data
# Read the sales data
sales <- read.csv("sales.csv", stringsAsFactors = FALSE)
# Extract Hours from Time column
sales$Hours <- substr(sales$Time, 1, 2) %>% as.integer()
# Convert Date column and add Weekday
sales$date <- mdy(sales$Date)
sales$Weekday <- weekdays(sales$date)
# Save updated data (optional)
write.csv(sales, "sales_updated.csv", row.names = FALSE)
# Quick view
head(sales)
## Invoice.ID Branch City Customer.type Gender Product.line
## 1 750-67-8428 A Yangon Member Female Health and beauty
## 2 226-31-3081 C Naypyitaw Normal Female Electronic accessories
## 3 631-41-3108 A Yangon Normal Male Home and lifestyle
## 4 123-19-1176 A Yangon Member Male Health and beauty
## 5 373-73-7910 A Yangon Normal Male Sports and travel
## 6 699-14-3026 C Naypyitaw Normal Male Electronic accessories
## Unit.price Quantity Tax.5. Total Date Time Payment cogs
## 1 74.69 7 26.1415 548.9715 1/5/2019 13:08 Ewallet 522.83
## 2 15.28 5 3.8200 80.2200 3/8/2019 10:29 Cash 76.40
## 3 46.33 7 16.2155 340.5255 3/3/2019 13:23 Credit card 324.31
## 4 58.22 8 23.2880 489.0480 1/27/2019 20:33 Ewallet 465.76
## 5 86.31 7 30.2085 634.3785 2/8/2019 10:37 Ewallet 604.17
## 6 85.39 7 29.8865 627.6165 3/25/2019 18:30 Ewallet 597.73
## gross.margin.percentage gross.income Rating Hours date Weekday
## 1 4.761905 26.1415 9.1 13 2019-01-05 Saturday
## 2 4.761905 3.8200 9.6 10 2019-03-08 Friday
## 3 4.761905 16.2155 7.4 13 2019-03-03 Sunday
## 4 4.761905 23.2880 8.4 20 2019-01-27 Sunday
## 5 4.761905 30.2085 5.3 10 2019-02-08 Friday
## 6 4.761905 29.8865 4.1 18 2019-03-25 Monday
str(sales$Hours)
## int [1:1000] 13 10 13 20 10 18 14 11 17 13 ...
library(tidyverse)
library(lubridate)
# Read file from local directory
sales <- read.csv("sales.csv", stringsAsFactors = FALSE)
# --- Extract Hours using substr() and convert to integer ---
sales$Hours <- substr(sales$Time, 1, 2) %>%
as.integer()
# --- Convert Date column using mdy ---
sales$date <- mdy(sales$Date)
# --- Add weekday ---
sales$Weekday <- weekdays(sales$date)
# --- Save updated file ---
write.csv(sales, "sales_updated.csv", row.names = FALSE)
head(sales)
## Invoice.ID Branch City Customer.type Gender Product.line
## 1 750-67-8428 A Yangon Member Female Health and beauty
## 2 226-31-3081 C Naypyitaw Normal Female Electronic accessories
## 3 631-41-3108 A Yangon Normal Male Home and lifestyle
## 4 123-19-1176 A Yangon Member Male Health and beauty
## 5 373-73-7910 A Yangon Normal Male Sports and travel
## 6 699-14-3026 C Naypyitaw Normal Male Electronic accessories
## Unit.price Quantity Tax.5. Total Date Time Payment cogs
## 1 74.69 7 26.1415 548.9715 1/5/2019 13:08 Ewallet 522.83
## 2 15.28 5 3.8200 80.2200 3/8/2019 10:29 Cash 76.40
## 3 46.33 7 16.2155 340.5255 3/3/2019 13:23 Credit card 324.31
## 4 58.22 8 23.2880 489.0480 1/27/2019 20:33 Ewallet 465.76
## 5 86.31 7 30.2085 634.3785 2/8/2019 10:37 Ewallet 604.17
## 6 85.39 7 29.8865 627.6165 3/25/2019 18:30 Ewallet 597.73
## gross.margin.percentage gross.income Rating Hours date Weekday
## 1 4.761905 26.1415 9.1 13 2019-01-05 Saturday
## 2 4.761905 3.8200 9.6 10 2019-03-08 Friday
## 3 4.761905 16.2155 7.4 13 2019-03-03 Sunday
## 4 4.761905 23.2880 8.4 20 2019-01-27 Sunday
## 5 4.761905 30.2085 5.3 10 2019-02-08 Friday
## 6 4.761905 29.8865 4.1 18 2019-03-25 Monday
str(sales$Hours) # confirms integer type
## int [1:1000] 13 10 13 20 10 18 14 11 17 13 ...
# --- Calculate total sales by weekday ---
weekday_sales <- sales %>%
group_by(Weekday) %>%
summarise(Total_Sales = sum(Total, na.rm = TRUE)) %>%
# Create formatted label for plotting
mutate(Sales_Label = dollar(round(Total_Sales, 2)))
weekday_sales
## # A tibble: 7 × 3
## Weekday Total_Sales Sales_Label
## <chr> <dbl> <chr>
## 1 Friday 43926. $43,926.34
## 2 Monday 37899. $37,899.08
## 3 Saturday 56121. $56,120.81
## 4 Sunday 44458. $44,457.89
## 5 Thursday 45349. $45,349.25
## 6 Tuesday 51482. $51,482.25
## 7 Wednesday 43731. $43,731.14
# Reorder Weekday by Total_Sales
weekday_sales <- weekday_sales %>%
mutate(Weekday = fct_reorder(Weekday, Total_Sales))
# --- Horizontal bar chart with labels inside ---
ggplot(weekday_sales, aes(x = Weekday, y = Total_Sales, fill = Weekday)) +
geom_col() +
coord_flip() +
# Labels inside bars
geom_text(aes(label = Sales_Label),
hjust = 1.1,
color = "black",
size = 4) +
# Different color per weekday
scale_fill_brewer(palette = "Set3") +
# Dollar formatting on axis
scale_y_continuous(labels = dollar) +
labs(
title = "Total Sales by Weekday",
x = "Weekday",
y = "Total Sales"
) +
theme_minimal() +
theme(legend.position = "none")
# --- Total sales by weekday and hour ---
weekday_hour_sales <- sales %>%
group_by(Weekday, Hours) %>%
summarise(Total_Sales = sum(Total, na.rm = TRUE)) %>%
ungroup() %>%
mutate(Sales_Label = dollar(round(Total_Sales, 2)))
## `summarise()` has grouped output by 'Weekday'. You can override using the
## `.groups` argument.
daily_totals <- weekday_hour_sales %>%
group_by(Weekday) %>%
summarise(Day_Total = sum(Total_Sales)) %>%
mutate(Day_Label = dollar(round(Day_Total, 2)))
# Order weekdays by total sales
weekday_order <- daily_totals %>% arrange(Day_Total) %>% pull(Weekday)
weekday_hour_sales$Weekday <- factor(weekday_hour_sales$Weekday, levels = weekday_order)
daily_totals$Weekday <- factor(daily_totals$Weekday, levels = weekday_order)
# --- Stacked bar chart: Total Sales by Weekday and Hour ---
ggplot() +
geom_col(data = weekday_hour_sales,
aes(x = Weekday, y = Total_Sales, fill = factor(Hours)),
position = "stack") +
geom_text(data = weekday_hour_sales,
aes(x = Weekday, y = Total_Sales, label = Sales_Label),
position = position_stack(vjust = 0.5),
size = 3, color = "white") +
geom_text(data = daily_totals,
aes(x = Weekday, y = Day_Total, label = Day_Label),
inherit.aes = FALSE,
vjust = -0.5, size = 3, color = "black") +
scale_fill_viridis_d(name = "Hour") +
scale_y_continuous(labels = dollar) +
labs(title = "Total Sales by Weekday and Hour",
x = "Weekday",
y = "Total Sales") +
theme_minimal() +
theme(legend.position = "right")