#Remember to install packages before loading them with library()
library(tidyverse) ## A set of tools for Data manipulation and visualization
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate) ## for date time manipulation
library(scales) ## Formatting numbers and values
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
#library(hrbrthemes)# For changing ggplot theme
library(extrafont) # More font options
## Registering fonts with R
#Q1 - view data
sales <- read.csv("~/Desktop/sales.csv")
glimpse(sales)
## Rows: 1,000
## Columns: 17
## $ Invoice.ID <chr> "750-67-8428", "226-31-3081", "631-41-3108", "…
## $ Branch <chr> "A", "C", "A", "A", "A", "C", "A", "C", "A", "…
## $ City <chr> "Yangon", "Naypyitaw", "Yangon", "Yangon", "Ya…
## $ Customer.type <chr> "Member", "Normal", "Normal", "Member", "Norma…
## $ Gender <chr> "Female", "Female", "Male", "Male", "Male", "M…
## $ Product.line <chr> "Health and beauty", "Electronic accessories",…
## $ Unit.price <dbl> 74.69, 15.28, 46.33, 58.22, 86.31, 85.39, 68.8…
## $ Quantity <int> 7, 5, 7, 8, 7, 7, 6, 10, 2, 3, 4, 4, 5, 10, 10…
## $ Tax.5. <dbl> 26.1415, 3.8200, 16.2155, 23.2880, 30.2085, 29…
## $ Total <dbl> 548.9715, 80.2200, 340.5255, 489.0480, 634.378…
## $ Date <chr> "1/5/2019", "3/8/2019", "3/3/2019", "1/27/2019…
## $ Time <chr> "13:08", "10:29", "13:23", "20:33", "10:37", "…
## $ Payment <chr> "Ewallet", "Cash", "Credit card", "Ewallet", "…
## $ cogs <dbl> 522.83, 76.40, 324.31, 465.76, 604.17, 597.73,…
## $ gross.margin.percentage <dbl> 4.761905, 4.761905, 4.761905, 4.761905, 4.7619…
## $ gross.income <dbl> 26.1415, 3.8200, 16.2155, 23.2880, 30.2085, 29…
## $ Rating <dbl> 9.1, 9.6, 7.4, 8.4, 5.3, 4.1, 5.8, 8.0, 7.2, 5…
# Q1 - view data
sales <- read.csv("~/Desktop/sales.csv")
# Add the three requested columns exactly as specified
sales <- sales %>%
mutate(
# Extract hour as integer from 'time' column
hours_in_digit = as.integer(substr(Time, 1, 2)),
# Convert date to Date format using mdy()
date = mdy(Date),
# Extract weekday abbreviation (Mon, Sat, etc.)
weekday = wday(date, label = TRUE, abbr = TRUE)
)
glimpse(sales)
## Rows: 1,000
## Columns: 20
## $ Invoice.ID <chr> "750-67-8428", "226-31-3081", "631-41-3108", "…
## $ Branch <chr> "A", "C", "A", "A", "A", "C", "A", "C", "A", "…
## $ City <chr> "Yangon", "Naypyitaw", "Yangon", "Yangon", "Ya…
## $ Customer.type <chr> "Member", "Normal", "Normal", "Member", "Norma…
## $ Gender <chr> "Female", "Female", "Male", "Male", "Male", "M…
## $ Product.line <chr> "Health and beauty", "Electronic accessories",…
## $ Unit.price <dbl> 74.69, 15.28, 46.33, 58.22, 86.31, 85.39, 68.8…
## $ Quantity <int> 7, 5, 7, 8, 7, 7, 6, 10, 2, 3, 4, 4, 5, 10, 10…
## $ Tax.5. <dbl> 26.1415, 3.8200, 16.2155, 23.2880, 30.2085, 29…
## $ Total <dbl> 548.9715, 80.2200, 340.5255, 489.0480, 634.378…
## $ Date <chr> "1/5/2019", "3/8/2019", "3/3/2019", "1/27/2019…
## $ Time <chr> "13:08", "10:29", "13:23", "20:33", "10:37", "…
## $ Payment <chr> "Ewallet", "Cash", "Credit card", "Ewallet", "…
## $ cogs <dbl> 522.83, 76.40, 324.31, 465.76, 604.17, 597.73,…
## $ gross.margin.percentage <dbl> 4.761905, 4.761905, 4.761905, 4.761905, 4.7619…
## $ gross.income <dbl> 26.1415, 3.8200, 16.2155, 23.2880, 30.2085, 29…
## $ Rating <dbl> 9.1, 9.6, 7.4, 8.4, 5.3, 4.1, 5.8, 8.0, 7.2, 5…
## $ hours_in_digit <int> 13, 10, 13, 20, 10, 18, 14, 11, 17, 13, 18, 17…
## $ date <date> 2019-01-05, 2019-03-08, 2019-03-03, 2019-01-2…
## $ weekday <ord> Sat, Fri, Sun, Sun, Fri, Mon, Mon, Sun, Thu, W…
# Q1 - view data
sales <- read.csv("~/Desktop/sales.csv")
# FIRST: Check what columns you actually have
print("Column names in your dataset:")
## [1] "Column names in your dataset:"
colnames(sales)
## [1] "Invoice.ID" "Branch"
## [3] "City" "Customer.type"
## [5] "Gender" "Product.line"
## [7] "Unit.price" "Quantity"
## [9] "Tax.5." "Total"
## [11] "Date" "Time"
## [13] "Payment" "cogs"
## [15] "gross.margin.percentage" "gross.income"
## [17] "Rating"
print("First few rows:")
## [1] "First few rows:"
head(sales)
## Invoice.ID Branch City Customer.type Gender Product.line
## 1 750-67-8428 A Yangon Member Female Health and beauty
## 2 226-31-3081 C Naypyitaw Normal Female Electronic accessories
## 3 631-41-3108 A Yangon Normal Male Home and lifestyle
## 4 123-19-1176 A Yangon Member Male Health and beauty
## 5 373-73-7910 A Yangon Normal Male Sports and travel
## 6 699-14-3026 C Naypyitaw Normal Male Electronic accessories
## Unit.price Quantity Tax.5. Total Date Time Payment cogs
## 1 74.69 7 26.1415 548.9715 1/5/2019 13:08 Ewallet 522.83
## 2 15.28 5 3.8200 80.2200 3/8/2019 10:29 Cash 76.40
## 3 46.33 7 16.2155 340.5255 3/3/2019 13:23 Credit card 324.31
## 4 58.22 8 23.2880 489.0480 1/27/2019 20:33 Ewallet 465.76
## 5 86.31 7 30.2085 634.3785 2/8/2019 10:37 Ewallet 604.17
## 6 85.39 7 29.8865 627.6165 3/25/2019 18:30 Ewallet 597.73
## gross.margin.percentage gross.income Rating
## 1 4.761905 26.1415 9.1
## 2 4.761905 3.8200 9.6
## 3 4.761905 16.2155 7.4
## 4 4.761905 23.2880 8.4
## 5 4.761905 30.2085 5.3
## 6 4.761905 29.8865 4.1
# Check if there's a time-related column
print("Checking for time-related columns:")
## [1] "Checking for time-related columns:"
grep("time|Time|hour|Hour", colnames(sales), value = TRUE, ignore.case = TRUE)
## [1] "Time"
# Create a horizontal bar graph of total sales by weekday
# First, create weekday column using the correct column names
sales <- sales %>%
mutate(
# Use the correct column name: Date (not date)
date_formatted = mdy(Date), # Capital D!
weekday = wday(date_formatted, label = TRUE, abbr = TRUE)
)
# Check if it worked
print("Weekday values created:")
## [1] "Weekday values created:"
table(sales$weekday)
##
## Sun Mon Tue Wed Thu Fri Sat
## 133 125 158 143 138 139 164
# Calculate total sales by weekday using the Total column
weekday_sales <- sales %>%
group_by(weekday) %>%
summarise(
total_sales = sum(Total, na.rm = TRUE) # Using Total column from your data
) %>%
arrange(total_sales)
# View the summary
print("Sales by weekday:")
## [1] "Sales by weekday:"
weekday_sales
## # A tibble: 7 × 2
## weekday total_sales
## <ord> <dbl>
## 1 Mon 37899.
## 2 Wed 43731.
## 3 Fri 43926.
## 4 Sun 44458.
## 5 Thu 45349.
## 6 Tue 51482.
## 7 Sat 56121.
# Create COLORFUL horizontal bar graph
library(RColorBrewer)
ggplot(weekday_sales, aes(x = total_sales, y = reorder(weekday, total_sales),
fill = weekday)) +
geom_bar(stat = "identity", width = 0.7) +
geom_text(aes(label = scales::dollar(total_sales, accuracy = 1)),
hjust = -0.1, size = 3.5, fontface = "bold") +
scale_fill_brewer(palette = "Set3") + # Colorful palette
labs(
title = "Total Sales by Weekday",
x = "Total Sales ($)",
y = "Weekday",
caption = "Data: Sales Dataset"
) +
scale_x_continuous(labels = scales::dollar, expand = expansion(mult = c(0, 0.1))) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, face = "bold", size = 16, color = "darkblue"),
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 11),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
legend.position = "none"
)
# Alternative: Using gross.income instead of Total
# Using gross.income column
weekday_income <- sales %>%
group_by(weekday) %>%
summarise(
total_income = sum(gross.income, na.rm = TRUE)
) %>%
arrange(total_income)
ggplot(weekday_income, aes(x = total_income, y = reorder(weekday, total_income),
fill = weekday)) +
geom_bar(stat = "identity", width = 0.7, color = "white", size = 0.5) +
geom_text(aes(label = scales::dollar(total_income, accuracy = 1)),
hjust = -0.1, size = 3.5, fontface = "bold") +
scale_fill_viridis_d(option = "D") + # Another colorful palette
labs(
title = "Gross Income by Weekday",
x = "Gross Income ($)",
y = "Weekday",
caption = "Data: Sales Dataset"
) +
scale_x_continuous(labels = scales::dollar, expand = expansion(mult = c(0, 0.1))) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, face = "bold", size = 16, color = "darkgreen"),
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 11),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
legend.position = "none"
)
## Warning in geom_bar(stat = "identity", width = 0.7, color = "white", size =
## 0.5): Ignoring unknown parameters: `size`
#Sales By Hour Across Days of Week
library(tidyverse)
library(lubridate)
library(scales)
# 1. Load the data
sales <- read.csv("~/Desktop/sales.csv")
# 2. View the data structure
cat("=== Data Structure ===\n")
## === Data Structure ===
glimpse(sales)
## Rows: 1,000
## Columns: 17
## $ Invoice.ID <chr> "750-67-8428", "226-31-3081", "631-41-3108", "…
## $ Branch <chr> "A", "C", "A", "A", "A", "C", "A", "C", "A", "…
## $ City <chr> "Yangon", "Naypyitaw", "Yangon", "Yangon", "Ya…
## $ Customer.type <chr> "Member", "Normal", "Normal", "Member", "Norma…
## $ Gender <chr> "Female", "Female", "Male", "Male", "Male", "M…
## $ Product.line <chr> "Health and beauty", "Electronic accessories",…
## $ Unit.price <dbl> 74.69, 15.28, 46.33, 58.22, 86.31, 85.39, 68.8…
## $ Quantity <int> 7, 5, 7, 8, 7, 7, 6, 10, 2, 3, 4, 4, 5, 10, 10…
## $ Tax.5. <dbl> 26.1415, 3.8200, 16.2155, 23.2880, 30.2085, 29…
## $ Total <dbl> 548.9715, 80.2200, 340.5255, 489.0480, 634.378…
## $ Date <chr> "1/5/2019", "3/8/2019", "3/3/2019", "1/27/2019…
## $ Time <chr> "13:08", "10:29", "13:23", "20:33", "10:37", "…
## $ Payment <chr> "Ewallet", "Cash", "Credit card", "Ewallet", "…
## $ cogs <dbl> 522.83, 76.40, 324.31, 465.76, 604.17, 597.73,…
## $ gross.margin.percentage <dbl> 4.761905, 4.761905, 4.761905, 4.761905, 4.7619…
## $ gross.income <dbl> 26.1415, 3.8200, 16.2155, 23.2880, 30.2085, 29…
## $ Rating <dbl> 9.1, 9.6, 7.4, 8.4, 5.3, 4.1, 5.8, 8.0, 7.2, 5…
cat("\n")
# 3. Add the three required columns
sales <- sales %>%
mutate(
hours_in_digit = as.integer(substr(Time, 1, 2)),
date = mdy(Date),
weekday = wday(date, label = TRUE, abbr = TRUE, week_start = 1)
)
cat("=== New Columns Added ===\n")
## === New Columns Added ===
head(sales %>% select(Date, Time, hours_in_digit, date, weekday))
## Date Time hours_in_digit date weekday
## 1 1/5/2019 13:08 13 2019-01-05 Sat
## 2 3/8/2019 10:29 10 2019-03-08 Fri
## 3 3/3/2019 13:23 13 2019-03-03 Sun
## 4 1/27/2019 20:33 20 2019-01-27 Sun
## 5 2/8/2019 10:37 10 2019-02-08 Fri
## 6 3/25/2019 18:30 18 2019-03-25 Mon
cat("\n")
# 4. Calculate sales by hour and weekday
sales_hour_day <- sales %>%
group_by(weekday, hours_in_digit) %>%
summarise(
TotalSales = sum(Total, na.rm = TRUE),
.groups = "drop"
)
cat("=== Sales Summary by Hour and Weekday ===\n")
## === Sales Summary by Hour and Weekday ===
head(sales_hour_day)
## # A tibble: 6 × 3
## weekday hours_in_digit TotalSales
## <ord> <int> <dbl>
## 1 Mon 10 3738.
## 2 Mon 11 2873.
## 3 Mon 12 4726.
## 4 Mon 13 3759.
## 5 Mon 14 2004.
## 6 Mon 15 5141.
cat("\n")
# 5. SALES BY HOUR ACROSS DAYS OF WEEK CHART
cat("=== Creating Chart ===\n")
## === Creating Chart ===
ggplot(sales_hour_day, aes(x = hours_in_digit, y = TotalSales)) +
geom_line(linewidth = 1, color = "steelblue") +
geom_point(size = 2, color = "darkred") +
facet_wrap(~ weekday, ncol = 4) +
scale_x_continuous(
breaks = c(10, 12, 14, 16, 18, 20),
limits = c(9, 21),
name = "Hour of Day"
) +
scale_y_continuous(
labels = dollar_format(),
expand = expansion(mult = c(0.05, 0.15)),
name = "Total Sales ($)"
) +
labs(
title = "Sales by Hour Across Days of the Week"
) +
theme_minimal() +
theme(
legend.position = "none",
strip.text = element_text(size = 11, face = "bold"),
strip.background = element_rect(fill = "lightgray", color = "gray"),
axis.text.x = element_text(angle = 0, hjust = 0.5, size = 10),
axis.text.y = element_text(size = 9),
axis.title = element_text(size = 12, face = "bold"),
plot.title = element_text(hjust = 0.5, face = "bold", size = 16),
panel.spacing = unit(2, "lines"),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
)
library(tidyverse)
library(lubridate)
library(scales)
# Load the data
sales <- read.csv("~/Desktop/sales.csv")
# Add the three required columns
sales <- sales %>%
mutate(
hours_in_digit = as.integer(substr(Time, 1, 2)),
date = mdy(Date),
weekday = wday(date, label = TRUE, abbr = TRUE, week_start = 1)
)
# Calculate sales by hour and weekday
sales_hour_day <- sales %>%
group_by(weekday, hours_in_digit) %>%
summarise(
TotalSales = sum(Total, na.rm = TRUE),
.groups = "drop"
)
# Create the chart with consistent styling
ggplot(sales_hour_day, aes(x = hours_in_digit, y = TotalSales)) +
# Colorful lines for each panel
geom_line(aes(color = weekday), linewidth = 1.2) +
geom_point(aes(color = weekday), size = 2.5) +
# Facet by weekday
facet_wrap(~ weekday, ncol = 4) +
# Custom color palette
scale_color_brewer(palette = "Set2", guide = "none") +
# Consistent x-axis for ALL panels
scale_x_continuous(
breaks = c(10, 12, 14, 16, 18, 20),
labels = c("10", "12", "14", "16", "18", "20"), # Explicit labels
limits = c(9, 21),
name = "Hour of Day"
) +
# Consistent y-axis for ALL panels
scale_y_continuous(
labels = dollar_format(),
breaks = scales::pretty_breaks(n = 6),
name = "Total Sales ($)"
) +
# Labels
labs(
title = "Sales by Hour Across Days of the Week"
) +
# Consistent theme
theme_minimal() +
theme(
# No legend
legend.position = "none",
# Panel headers
strip.text = element_text(size = 11, face = "bold", color = "black"),
strip.background = element_rect(fill = "lightgray", color = "gray"),
# X-axis - FIXED to show on ALL panels
axis.text.x = element_text(
size = 10,
color = "black",
angle = 0,
hjust = 0.5,
vjust = 0.5
),
# Y-axis
axis.text.y = element_text(
size = 9,
color = "black"
),
# Axis titles
axis.title.x = element_text(
size = 12,
face = "bold",
margin = margin(t = 10)
),
axis.title.y = element_text(
size = 12,
face = "bold",
margin = margin(r = 10)
),
# Plot title
plot.title = element_text(
hjust = 0.5,
face = "bold",
size = 16,
margin = margin(b = 15)
),
# Panel spacing
panel.spacing = unit(1.5, "lines"),
# Grid lines
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "gray90", linewidth = 0.5),
# Panel borders
panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
)