#LBB 2 - Data Visualization
#data input from https://www.kaggle.com/aungpyaeap/supermarket-sales
sales <- read.csv("supermarket_sales - Sheet1.csv")
str(sales)
## 'data.frame': 1000 obs. of 17 variables:
## $ Invoice.ID : chr "750-67-8428" "226-31-3081" "631-41-3108" "123-19-1176" ...
## $ Branch : chr "A" "C" "A" "A" ...
## $ City : chr "Yangon" "Naypyitaw" "Yangon" "Yangon" ...
## $ Customer.type : chr "Member" "Normal" "Normal" "Member" ...
## $ Gender : chr "Female" "Female" "Male" "Male" ...
## $ Product.line : chr "Health and beauty" "Electronic accessories" "Home and lifestyle" "Health and beauty" ...
## $ Unit.price : num 74.7 15.3 46.3 58.2 86.3 ...
## $ Quantity : int 7 5 7 8 7 7 6 10 2 3 ...
## $ Tax.5. : num 26.14 3.82 16.22 23.29 30.21 ...
## $ Total : num 549 80.2 340.5 489 634.4 ...
## $ Date : chr "1/5/2019" "3/8/2019" "3/3/2019" "1/27/2019" ...
## $ Time : chr "13:08" "10:29" "13:23" "20:33" ...
## $ Payment : chr "Ewallet" "Cash" "Credit card" "Ewallet" ...
## $ cogs : num 522.8 76.4 324.3 465.8 604.2 ...
## $ gross.margin.percentage: num 4.76 4.76 4.76 4.76 4.76 ...
## $ gross.income : num 26.14 3.82 16.22 23.29 30.21 ...
## $ Rating : num 9.1 9.6 7.4 8.4 5.3 4.1 5.8 8 7.2 5.9 ...
library(dplyr)
library(lubridate)
sales[, c("Branch", "City", "Customer.type", "Gender", "Product.line", "Payment")] <- lapply(sales[, c("Branch", "City", "Customer.type", "Gender", "Product.line", "Payment")], FUN = as.factor)
sales$Date <- mdy(sales$Date)
sales %>%
is.na() %>%
colSums()
## Invoice.ID Branch City
## 0 0 0
## Customer.type Gender Product.line
## 0 0 0
## Unit.price Quantity Tax.5.
## 0 0 0
## Total Date Time
## 0 0 0
## Payment cogs gross.margin.percentage
## 0 0 0
## gross.income Rating
## 0 0
summary(sales)
## Invoice.ID Branch City Customer.type Gender
## Length:1000 A:340 Mandalay :332 Member:501 Female:501
## Class :character B:332 Naypyitaw:328 Normal:499 Male :499
## Mode :character C:328 Yangon :340
##
##
##
## Product.line Unit.price Quantity Tax.5.
## Electronic accessories:170 Min. :10.08 Min. : 1.00 Min. : 0.5085
## Fashion accessories :178 1st Qu.:32.88 1st Qu.: 3.00 1st Qu.: 5.9249
## Food and beverages :174 Median :55.23 Median : 5.00 Median :12.0880
## Health and beauty :152 Mean :55.67 Mean : 5.51 Mean :15.3794
## Home and lifestyle :160 3rd Qu.:77.94 3rd Qu.: 8.00 3rd Qu.:22.4453
## Sports and travel :166 Max. :99.96 Max. :10.00 Max. :49.6500
## Total Date Time Payment
## Min. : 10.68 Min. :2019-01-01 Length:1000 Cash :344
## 1st Qu.: 124.42 1st Qu.:2019-01-24 Class :character Credit card:311
## Median : 253.85 Median :2019-02-13 Mode :character Ewallet :345
## Mean : 322.97 Mean :2019-02-14
## 3rd Qu.: 471.35 3rd Qu.:2019-03-08
## Max. :1042.65 Max. :2019-03-30
## cogs gross.margin.percentage gross.income Rating
## Min. : 10.17 Min. :4.762 Min. : 0.5085 Min. : 4.000
## 1st Qu.:118.50 1st Qu.:4.762 1st Qu.: 5.9249 1st Qu.: 5.500
## Median :241.76 Median :4.762 Median :12.0880 Median : 7.000
## Mean :307.59 Mean :4.762 Mean :15.3794 Mean : 6.973
## 3rd Qu.:448.90 3rd Qu.:4.762 3rd Qu.:22.4453 3rd Qu.: 8.500
## Max. :993.00 Max. :4.762 Max. :49.6500 Max. :10.000
Summary:
1. Branch A, B, dan C adalah sama dengan kota Yangon, Mandalay, dan Naypitaw.
2. Terdapat 6 kategori produk yang dijual.
3. Rata-rata Gross Income adalah sebesar 15.3794.
4. Total Sales terendah adalah sebesar 10.68 dengan Gross Income sebesar 0.5085.
5. Total Sales tertinggi adalah sebesar 1042.65 dengan Gross Income sebesar 49.6500.
6. Data observasi diambil pada rentang waktu 1 Januari 2019 hingga 30 Maret 2019 (Q1 2019).
7. Rata-rata Rating untuk seluruh kota adalah 6.973.
1. Gross Income Harian setiap kota selama Q1
# Menggunakan library Lubridate untuk transformasi format tanggal.
sales$Month <- month(sales$Date, label = T, abbr = F)
data_dailyP <- sales %>%
group_by(City, Date) %>%
summarise(Gross.income = sum(gross.income))
## `summarise()` has grouped output by 'City'. You can override using the `.groups` argument.
library(ggplot2)
plot_dailyP <- data_dailyP %>%
ggplot(aes(x = Date,
y = City))+
geom_line(aes(x=Date,
y=Gross.income,
col=City))+
labs(title = "Daily Gross Income of All City YTD Q1 2019",
x = "Month",
y = "Gross Income")+
theme(legend.title = element_blank(),
plot.title = element_text(hjust = 0.5, size=14),
plot.title.position = "panel",
plot.background = element_rect(fill = "moccasin"),
legend.position = "top",
panel.grid = element_blank())+
scale_fill_brewer(palette="Set3")
plot_dailyP
2. Gross Income setiap kota berdasarkan Product Line
plot_product_proportion <- sales %>%
group_by(City, Product.line) %>%
summarise(Gross.income = sum(gross.income)) %>%
ggplot(aes(x = Gross.income,
y = City,
fill = Product.line))+
geom_col(aes(x=Gross.income,
y=City),
width = 0.6,
col="black")+
labs(title = "Gross Income based on Product Line",
x = "Gross Income",
y = "City")+
theme(legend.title = element_blank(),
plot.title = element_text(hjust = 0.5),
panel.background = element_rect(fill = "grey"),
plot.background = element_rect(fill = "moccasin"),
legend.position = "top",
panel.grid = element_blank())+
scale_fill_brewer(palette="Set3")
plot_product_proportion
3. Gross Income setiap kota berdasarkan Tipe Payment
plot_paymn <- sales %>%
group_by(Payment, City) %>%
summarise(Gross.income = sum(gross.income)) %>%
ggplot(aes(x = Gross.income,
y = City,
fill=Payment))+
geom_col(aes(x=Gross.income,
y=City),
width = 0.6,
col="black")+
labs(title = "Customer Behavior in Payment",
x = "Gross Income",
y = "City")+
theme(legend.position = "top",
plot.title = element_text(hjust = 0.5, size = 20),
panel.background = element_rect(fill = "grey"),
plot.background = element_rect(fill = "moccasin"),
panel.grid = element_blank())+
scale_fill_brewer(palette="Spectral")
plot_paymn
4. City dengan rata-rata Rating tertinggi
plot_Rat <- sales %>%
group_by(City) %>%
summarise(Mean.rating = mean(Rating)) %>%
ggplot(aes(x = Mean.rating,
y = City,
fill=City))+
geom_col(aes(x=Mean.rating,
y=City),
width = 0.5,
col="black")+
scale_fill_brewer(palette="Pastel1")+
coord_flip()+
labs(title = "Rating per City",
x = "Rating",
y = "City")+
theme(legend.position = "none",
plot.title = element_text(hjust = 0.5),
panel.background = element_rect(fill = "grey"),
plot.background = element_rect(fill = "moccasin"),
panel.grid = element_blank())
plot_Rat
5. Rata-rata Rating yang diperoleh setiap kota berdasarkan Gender Customer
plot_gR <- sales %>%
group_by(City, Gender) %>%
summarise(Mean.rating = mean(Rating)) %>%
ggplot(aes(x = Mean.rating,
y = City))+
geom_col(aes(x=Mean.rating,
y=City,
fill=Gender),
position="dodge",
width = 0.7,
col="black")+
coord_flip()+
labs(title = "Mean Rating Given per City",
subtitle = "Based on Customer Gender",
x = "Rating",
y = "City")+
theme(legend.position = "top",
plot.title = element_text(hjust = 0.5, size = 20),
plot.subtitle = element_text(hjust = 0.5, size = 12),
plot.background = element_rect(fill = "moccasin"),
panel.background = element_rect(fill = "grey"),
panel.grid = element_blank())
plot_gR
6. Rata-rata Rating setiap kota berdasarkan Product Line
plot_plR <- sales %>%
group_by(City, Product.line) %>%
summarise(Mean.rating = mean(Rating)) %>%
ggplot(aes(x = Mean.rating,
y = City))+
geom_col(aes(x=Mean.rating,
y=City,
fill=Product.line),
position = "dodge",
width = 0.7,
col="black")+
coord_flip()+
labs(title = "Rating by Product Line purchased per City",
x = "Rating",
y = "City")+
theme(legend.position = "top",
legend.title = element_blank(),
plot.title = element_text(hjust = 0.5, size = 20),
panel.background = element_rect(fill = "grey"),
plot.background = element_rect(fill = "moccasin"),
panel.grid = element_blank())+
scale_fill_brewer(palette="Set3")
plot_plR