#Remember to install packages before loading them with library()
library(tidyverse) ## A set of tools for Data manipulation and visualization
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate) ## for date time manipulation
library(scales) ## Formatting numbers and values
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
#library(hrbrthemes)# For changing ggplot theme
library(extrafont) # More font options
## Registering fonts with R
#Q1 - view data
data <- read.csv("~/Downloads/sales.csv")
head(data)
## Invoice.ID Branch City Customer.type Gender Product.line
## 1 750-67-8428 A Yangon Member Female Health and beauty
## 2 226-31-3081 C Naypyitaw Normal Female Electronic accessories
## 3 631-41-3108 A Yangon Normal Male Home and lifestyle
## 4 123-19-1176 A Yangon Member Male Health and beauty
## 5 373-73-7910 A Yangon Normal Male Sports and travel
## 6 699-14-3026 C Naypyitaw Normal Male Electronic accessories
## Unit.price Quantity Tax.5. Total Date Time Payment cogs
## 1 74.69 7 26.1415 548.9715 1/5/2019 13:08 Ewallet 522.83
## 2 15.28 5 3.8200 80.2200 3/8/2019 10:29 Cash 76.40
## 3 46.33 7 16.2155 340.5255 3/3/2019 13:23 Credit card 324.31
## 4 58.22 8 23.2880 489.0480 1/27/2019 20:33 Ewallet 465.76
## 5 86.31 7 30.2085 634.3785 2/8/2019 10:37 Ewallet 604.17
## 6 85.39 7 29.8865 627.6165 3/25/2019 18:30 Ewallet 597.73
## gross.margin.percentage gross.income Rating
## 1 4.761905 26.1415 9.1
## 2 4.761905 3.8200 9.6
## 3 4.761905 16.2155 7.4
## 4 4.761905 23.2880 8.4
## 5 4.761905 30.2085 5.3
## 6 4.761905 29.8865 4.1
names(data)
## [1] "Invoice.ID" "Branch"
## [3] "City" "Customer.type"
## [5] "Gender" "Product.line"
## [7] "Unit.price" "Quantity"
## [9] "Tax.5." "Total"
## [11] "Date" "Time"
## [13] "Payment" "cogs"
## [15] "gross.margin.percentage" "gross.income"
## [17] "Rating"
library(tidyverse)
library(lubridate)
data <- read.csv("~/Downloads/sales.csv")
# 1. Add Hour column (extract hour from Time like "13:08" or "19:45")
data$Hour <- as.integer(substr(data$Time, 1, 2))
# 2. Convert Date column using mdy()
data$Date <- mdy(data$Date)
# 3. Add Weekday column (Mon, Tue, etc.)
data$Weekday <- wday(data$Date, label = TRUE, abbr = TRUE)
head(data)
## Invoice.ID Branch City Customer.type Gender Product.line
## 1 750-67-8428 A Yangon Member Female Health and beauty
## 2 226-31-3081 C Naypyitaw Normal Female Electronic accessories
## 3 631-41-3108 A Yangon Normal Male Home and lifestyle
## 4 123-19-1176 A Yangon Member Male Health and beauty
## 5 373-73-7910 A Yangon Normal Male Sports and travel
## 6 699-14-3026 C Naypyitaw Normal Male Electronic accessories
## Unit.price Quantity Tax.5. Total Date Time Payment cogs
## 1 74.69 7 26.1415 548.9715 2019-01-05 13:08 Ewallet 522.83
## 2 15.28 5 3.8200 80.2200 2019-03-08 10:29 Cash 76.40
## 3 46.33 7 16.2155 340.5255 2019-03-03 13:23 Credit card 324.31
## 4 58.22 8 23.2880 489.0480 2019-01-27 20:33 Ewallet 465.76
## 5 86.31 7 30.2085 634.3785 2019-02-08 10:37 Ewallet 604.17
## 6 85.39 7 29.8865 627.6165 2019-03-25 18:30 Ewallet 597.73
## gross.margin.percentage gross.income Rating Hour Weekday
## 1 4.761905 26.1415 9.1 13 Sat
## 2 4.761905 3.8200 9.6 10 Fri
## 3 4.761905 16.2155 7.4 13 Sun
## 4 4.761905 23.2880 8.4 20 Sun
## 5 4.761905 30.2085 5.3 10 Fri
## 6 4.761905 29.8865 4.1 18 Mon
library(tidyverse)
library(lubridate)
# Summarize total sales per weekday
sales_by_weekday <- data %>%
group_by(Weekday) %>%
summarise(TotalSales = sum(Total))
# Horizontal bar chart with labels
ggplot(sales_by_weekday, aes(x = Weekday, y = TotalSales)) +
geom_bar(stat = "identity", fill = "tomato") +
geom_text(aes(label = round(TotalSales, 0)),
hjust = 1.1,
color = "white",
size = 4) +
coord_flip() +
labs(title = "Total Sales by Weekday",
x = "Weekday",
y = "Total Sales") +
theme_minimal()
library(tidyverse)
library(lubridate)
# Summarize total sales per hour
sales_by_hour <- data %>%
group_by(Hour) %>%
summarise(TotalSales = sum(Total)) %>%
arrange(Hour)
# Horizontal bar chart with labels
ggplot(sales_by_hour, aes(x = as.factor(Hour), y = TotalSales)) +
geom_bar(stat = "identity", fill = "steelblue") +
geom_text(aes(label = round(TotalSales, 0)),
hjust = 1.1,
color = "white",
size = 4) +
coord_flip() +
labs(title = "Total Sales by Hour of Day",
x = "Hour",
y = "Total Sales") +
theme_minimal()