#Remember to install packages before loading them with library()
library(tidyverse) ## A set of tools for Data manipulation and visualization
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate) ## for date time manipulation
library(scales) ## Formatting numbers and values
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
#library(hrbrthemes)# For changing ggplot theme
library(extrafont) # More font options
## Warning: package 'extrafont' was built under R version 4.5.2
## Registering fonts with R
#Q1 - view data
sales <- read.csv("sales.csv")
sales %>% View()
glimpse(sales)
## Rows: 1,000
## Columns: 17
## $ Invoice.ID <chr> "750-67-8428", "226-31-3081", "631-41-3108", "…
## $ Branch <chr> "A", "C", "A", "A", "A", "C", "A", "C", "A", "…
## $ City <chr> "Yangon", "Naypyitaw", "Yangon", "Yangon", "Ya…
## $ Customer.type <chr> "Member", "Normal", "Normal", "Member", "Norma…
## $ Gender <chr> "Female", "Female", "Male", "Male", "Male", "M…
## $ Product.line <chr> "Health and beauty", "Electronic accessories",…
## $ Unit.price <dbl> 74.69, 15.28, 46.33, 58.22, 86.31, 85.39, 68.8…
## $ Quantity <int> 7, 5, 7, 8, 7, 7, 6, 10, 2, 3, 4, 4, 5, 10, 10…
## $ Tax.5. <dbl> 26.1415, 3.8200, 16.2155, 23.2880, 30.2085, 29…
## $ Total <dbl> 548.9715, 80.2200, 340.5255, 489.0480, 634.378…
## $ Date <chr> "1/5/2019", "3/8/2019", "3/3/2019", "1/27/2019…
## $ Time <chr> "13:08", "10:29", "13:23", "20:33", "10:37", "…
## $ Payment <chr> "Ewallet", "Cash", "Credit card", "Ewallet", "…
## $ cogs <dbl> 522.83, 76.40, 324.31, 465.76, 604.17, 597.73,…
## $ gross.margin.percentage <dbl> 4.761905, 4.761905, 4.761905, 4.761905, 4.7619…
## $ gross.income <dbl> 26.1415, 3.8200, 16.2155, 23.2880, 30.2085, 29…
## $ Rating <dbl> 9.1, 9.6, 7.4, 8.4, 5.3, 4.1, 5.8, 8.0, 7.2, 5…
library(tidyverse)
# Calculate mean rating by product line
rating_summary <- sales %>%
group_by(Product.line) %>%
summarize(mean_rating = mean(Rating), .groups = "drop")
# Horizontal bar chart with gradient color
ggplot(rating_summary,
aes(x = mean_rating,
y = reorder(Product.line, mean_rating),
fill = mean_rating)) +
geom_col() +
scale_fill_gradient(low = "#6EC1E4", high = "#7B2CBF") + # blue → purple
labs(
title = "Average Rating by Product Line",
x = "Average Rating",
y = "Product Line",
fill = "Rating"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold", size = 15),
axis.title.y = element_text(face = "bold"),
axis.title.x = element_text(face = "bold")
)
library(tidyverse)
# Calculate mean rating by product line
rating_summary <- sales %>%
group_by(Product.line) %>%
summarize(mean_rating = mean(Rating), .groups = "drop")
# Improved horizontal bar chart
ggplot(rating_summary,
aes(x = mean_rating,
y = reorder(Product.line, mean_rating),
fill = mean_rating)) +
geom_col(width = 0.55, color = "white", linewidth = 0.8) + # thinner + spaced + outlined
scale_fill_gradient(low = "#93C6E7", high = "#6247AA") + # soft blue → violet gradient
labs(
title = "Average Rating by Product Line",
x = "Average Rating",
y = "Product Line",
fill = "Rating"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold", size = 16),
axis.title = element_text(face = "bold"),
panel.grid.major.y = element_blank(), # removes horizontal grid lines (cleaner)
panel.grid.minor = element_blank(),
legend.position = "right"
)
library(tidyverse)
# Calculate mean rating by product line
rating_summary <- sales %>%
group_by(Product.line) %>%
summarize(mean_rating = mean(Rating), .groups = "drop")
# Improved horizontal bar chart with zoomed x-axis
ggplot(rating_summary,
aes(x = mean_rating,
y = reorder(Product.line, mean_rating),
fill = mean_rating)) +
geom_col(width = 0.55, color = "white", linewidth = 0.8) +
scale_fill_gradient(low = "#93C6E7", high = "#6247AA") +
coord_cartesian(xlim = c(7.5, 10)) + # <<< ZOOM IN HERE
labs(
title = "Average Rating by Product Line",
x = "Average Rating (Zoomed In)",
y = "Product Line",
fill = "Rating"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold", size = 16),
axis.title = element_text(face = "bold"),
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank()
)
library(tidyverse)
# Calculate mean rating by product line
rating_summary <- sales %>%
group_by(Product.line) %>%
summarize(mean_rating = mean(Rating), .groups = "drop")
# Horizontal bar chart, thinner bars, spacing, and zoomed in (5–7.5)
ggplot(rating_summary,
aes(x = mean_rating,
y = reorder(Product.line, mean_rating),
fill = mean_rating)) +
geom_col(width = 0.55, color = "white", linewidth = 0.8) +
scale_fill_gradient(low = "#93C6E7", high = "#6247AA") +
coord_cartesian(xlim = c(5, 7.5)) + # <<< ZOOMED RANGE
labs(
title = "Average Rating by Product Line (Zoomed 5–7.5)",
x = "Average Rating",
y = "Product Line",
fill = "Rating"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold", size = 16),
axis.title = element_text(face = "bold"),
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank()
)