#Remember to install packages before loading them with library()
library(tidyverse) ## A set of tools for Data manipulation and visualization
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate) ## for date time manipulation
library(scales) ## Formatting numbers and values
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
#library(hrbrthemes)# For changing ggplot theme
library(extrafont) # More font options
## Registering fonts with R
#Q1 - view data
sales <- read.csv("sales.csv")
glimpse(sales)
## Rows: 1,000
## Columns: 17
## $ Invoice.ID <chr> "750-67-8428", "226-31-3081", "631-41-3108", "…
## $ Branch <chr> "A", "C", "A", "A", "A", "C", "A", "C", "A", "…
## $ City <chr> "Yangon", "Naypyitaw", "Yangon", "Yangon", "Ya…
## $ Customer.type <chr> "Member", "Normal", "Normal", "Member", "Norma…
## $ Gender <chr> "Female", "Female", "Male", "Male", "Male", "M…
## $ Product.line <chr> "Health and beauty", "Electronic accessories",…
## $ Unit.price <dbl> 74.69, 15.28, 46.33, 58.22, 86.31, 85.39, 68.8…
## $ Quantity <int> 7, 5, 7, 8, 7, 7, 6, 10, 2, 3, 4, 4, 5, 10, 10…
## $ Tax.5. <dbl> 26.1415, 3.8200, 16.2155, 23.2880, 30.2085, 29…
## $ Total <dbl> 548.9715, 80.2200, 340.5255, 489.0480, 634.378…
## $ Date <chr> "1/5/2019", "3/8/2019", "3/3/2019", "1/27/2019…
## $ Time <chr> "13:08", "10:29", "13:23", "20:33", "10:37", "…
## $ Payment <chr> "Ewallet", "Cash", "Credit card", "Ewallet", "…
## $ cogs <dbl> 522.83, 76.40, 324.31, 465.76, 604.17, 597.73,…
## $ gross.margin.percentage <dbl> 4.761905, 4.761905, 4.761905, 4.761905, 4.7619…
## $ gross.income <dbl> 26.1415, 3.8200, 16.2155, 23.2880, 30.2085, 29…
## $ Rating <dbl> 9.1, 9.6, 7.4, 8.4, 5.3, 4.1, 5.8, 8.0, 7.2, 5…
3 new columns
sales <- sales %>%
mutate(time=as.integer(substr(Time, 1, 2)),
date=mdy(Date),weekday = wday(date,label = TRUE))
Learn about ungroup %>% ungroup summarise(Total_Sales = sum(Total), .groups = “drop”)
df2 <- sales %>%
group_by(City, weekday) %>%
summarise(Total_Sales = sum(Total))
## `summarise()` has grouped output by 'City'. You can override using the
## `.groups` argument.
head(df2)
## # A tibble: 6 × 3
## # Groups: City [1]
## City weekday Total_Sales
## <chr> <ord> <dbl>
## 1 Mandalay Sun 10415.
## 2 Mandalay Mon 12735.
## 3 Mandalay Tue 18859.
## 4 Mandalay Wed 12708.
## 5 Mandalay Thu 15779.
## 6 Mandalay Fri 14418.
df2 %>% mutate(percent = Total_Sales / sum(Total_Sales))
## # A tibble: 21 × 4
## # Groups: City [3]
## City weekday Total_Sales percent
## <chr> <ord> <dbl> <dbl>
## 1 Mandalay Sun 10415. 0.0981
## 2 Mandalay Mon 12735. 0.120
## 3 Mandalay Tue 18859. 0.178
## 4 Mandalay Wed 12708. 0.120
## 5 Mandalay Thu 15779. 0.149
## 6 Mandalay Fri 14418. 0.136
## 7 Mandalay Sat 21284. 0.200
## 8 Naypyitaw Sun 17036. 0.154
## 9 Naypyitaw Mon 10925. 0.0988
## 10 Naypyitaw Tue 17668. 0.160
## # ℹ 11 more rows
df2 %>%
ungroup() %>%
mutate(percent = Total_Sales / sum(Total_Sales))
## # A tibble: 21 × 4
## City weekday Total_Sales percent
## <chr> <ord> <dbl> <dbl>
## 1 Mandalay Sun 10415. 0.0322
## 2 Mandalay Mon 12735. 0.0394
## 3 Mandalay Tue 18859. 0.0584
## 4 Mandalay Wed 12708. 0.0393
## 5 Mandalay Thu 15779. 0.0489
## 6 Mandalay Fri 14418. 0.0446
## 7 Mandalay Sat 21284. 0.0659
## 8 Naypyitaw Sun 17036. 0.0527
## 9 Naypyitaw Mon 10925. 0.0338
## 10 Naypyitaw Tue 17668. 0.0547
## # ℹ 11 more rows
#Q3 - create and visualize a summary by weekday
sales_by_day <- sales %>% group_by(weekday) %>%
summarise(Total_Sales=sum(Total)) %>% ungroup
sales_by_day %>%
ggplot(aes(reorder(weekday,Total_Sales),Total_Sales,fill=weekday))+
geom_col(show.legend = FALSE,color="black")+
geom_text(aes(label=comma(Total_Sales)),size=3,hjust=1,color="black")+
scale_fill_brewer(palette = "Paired")+
coord_flip()+
theme_classic()+
labs(title = "Total Sales breakdown by Weekday",x="Day of the week",y= "Total sales")
#Q4 - create and visualize a summary by weekday and time
sales_by_day_hour <- sales %>% group_by(weekday,time) %>%
summarise(Total_Sales=sum(Total)) %>% ungroup()
## `summarise()` has grouped output by 'weekday'. You can override using the
## `.groups` argument.
sales_by_day_hour %>%
ggplot(aes(time,Total_Sales,fill=weekday))+
geom_col(show.legend = FALSE,color="black")+
scale_fill_brewer(palette = "Paired")+
facet_wrap(~weekday,scales="free_y")+
coord_flip()+
theme_classic()+
labs(title = "Total Sales breakdown by Weekday and Time",x="Hour of the day",y= "Total sales")