#LBB 2 - Data Visualization

Read and Inspect Data

#data input from https://www.kaggle.com/aungpyaeap/supermarket-sales
sales <- read.csv("supermarket_sales - Sheet1.csv")
str(sales)
## 'data.frame':    1000 obs. of  17 variables:
##  $ Invoice.ID             : chr  "750-67-8428" "226-31-3081" "631-41-3108" "123-19-1176" ...
##  $ Branch                 : chr  "A" "C" "A" "A" ...
##  $ City                   : chr  "Yangon" "Naypyitaw" "Yangon" "Yangon" ...
##  $ Customer.type          : chr  "Member" "Normal" "Normal" "Member" ...
##  $ Gender                 : chr  "Female" "Female" "Male" "Male" ...
##  $ Product.line           : chr  "Health and beauty" "Electronic accessories" "Home and lifestyle" "Health and beauty" ...
##  $ Unit.price             : num  74.7 15.3 46.3 58.2 86.3 ...
##  $ Quantity               : int  7 5 7 8 7 7 6 10 2 3 ...
##  $ Tax.5.                 : num  26.14 3.82 16.22 23.29 30.21 ...
##  $ Total                  : num  549 80.2 340.5 489 634.4 ...
##  $ Date                   : chr  "1/5/2019" "3/8/2019" "3/3/2019" "1/27/2019" ...
##  $ Time                   : chr  "13:08" "10:29" "13:23" "20:33" ...
##  $ Payment                : chr  "Ewallet" "Cash" "Credit card" "Ewallet" ...
##  $ cogs                   : num  522.8 76.4 324.3 465.8 604.2 ...
##  $ gross.margin.percentage: num  4.76 4.76 4.76 4.76 4.76 ...
##  $ gross.income           : num  26.14 3.82 16.22 23.29 30.21 ...
##  $ Rating                 : num  9.1 9.6 7.4 8.4 5.3 4.1 5.8 8 7.2 5.9 ...

Data Inspection and Treatment missing value

library(dplyr)
library(lubridate)
sales[, c("Branch", "City", "Customer.type", "Gender", "Product.line", "Payment")] <- lapply(sales[, c("Branch", "City", "Customer.type", "Gender", "Product.line", "Payment")], FUN = as.factor)
sales$Date <- mdy(sales$Date)
sales %>% 
  is.na() %>% 
  colSums()
##              Invoice.ID                  Branch                    City 
##                       0                       0                       0 
##           Customer.type                  Gender            Product.line 
##                       0                       0                       0 
##              Unit.price                Quantity                  Tax.5. 
##                       0                       0                       0 
##                   Total                    Date                    Time 
##                       0                       0                       0 
##                 Payment                    cogs gross.margin.percentage 
##                       0                       0                       0 
##            gross.income                  Rating 
##                       0                       0

Data Explanation

summary(sales)
##   Invoice.ID        Branch         City     Customer.type    Gender   
##  Length:1000        A:340   Mandalay :332   Member:501    Female:501  
##  Class :character   B:332   Naypyitaw:328   Normal:499    Male  :499  
##  Mode  :character   C:328   Yangon   :340                             
##                                                                       
##                                                                       
##                                                                       
##                  Product.line   Unit.price       Quantity         Tax.5.       
##  Electronic accessories:170   Min.   :10.08   Min.   : 1.00   Min.   : 0.5085  
##  Fashion accessories   :178   1st Qu.:32.88   1st Qu.: 3.00   1st Qu.: 5.9249  
##  Food and beverages    :174   Median :55.23   Median : 5.00   Median :12.0880  
##  Health and beauty     :152   Mean   :55.67   Mean   : 5.51   Mean   :15.3794  
##  Home and lifestyle    :160   3rd Qu.:77.94   3rd Qu.: 8.00   3rd Qu.:22.4453  
##  Sports and travel     :166   Max.   :99.96   Max.   :10.00   Max.   :49.6500  
##      Total              Date                Time                  Payment   
##  Min.   :  10.68   Min.   :2019-01-01   Length:1000        Cash       :344  
##  1st Qu.: 124.42   1st Qu.:2019-01-24   Class :character   Credit card:311  
##  Median : 253.85   Median :2019-02-13   Mode  :character   Ewallet    :345  
##  Mean   : 322.97   Mean   :2019-02-14                                       
##  3rd Qu.: 471.35   3rd Qu.:2019-03-08                                       
##  Max.   :1042.65   Max.   :2019-03-30                                       
##       cogs        gross.margin.percentage  gross.income         Rating      
##  Min.   : 10.17   Min.   :4.762           Min.   : 0.5085   Min.   : 4.000  
##  1st Qu.:118.50   1st Qu.:4.762           1st Qu.: 5.9249   1st Qu.: 5.500  
##  Median :241.76   Median :4.762           Median :12.0880   Median : 7.000  
##  Mean   :307.59   Mean   :4.762           Mean   :15.3794   Mean   : 6.973  
##  3rd Qu.:448.90   3rd Qu.:4.762           3rd Qu.:22.4453   3rd Qu.: 8.500  
##  Max.   :993.00   Max.   :4.762           Max.   :49.6500   Max.   :10.000

Summary:
1. Branch A, B, dan C adalah sama dengan kota Yangon, Mandalay, dan Naypitaw.
2. Terdapat 6 kategori produk yang dijual.
3. Rata-rata Gross Income adalah sebesar 15.3794.
4. Total Sales terendah adalah sebesar 10.68 dengan Gross Income sebesar 0.5085.
5. Total Sales tertinggi adalah sebesar 1042.65 dengan Gross Income sebesar 49.6500.
6. Data observasi diambil pada rentang waktu 1 Januari 2019 hingga 30 Maret 2019 (Q1 2019).
7. Rata-rata Rating untuk seluruh kota adalah 6.973.

Data Pre-Processing and Business Question

1. Gross Income Harian setiap kota selama Q1

# Menggunakan library Lubridate untuk transformasi format tanggal.
sales$Month <- month(sales$Date, label = T, abbr = F)
data_dailyP <- sales %>% 
group_by(City, Date) %>%
    summarise(Gross.income = sum(gross.income))
## `summarise()` has grouped output by 'City'. You can override using the `.groups` argument.
library(ggplot2)
  plot_dailyP <- data_dailyP %>% 
  ggplot(aes(x = Date,
           y = City))+
  geom_line(aes(x=Date,
                y=Gross.income,
                col=City))+
  labs(title = "Daily Gross Income of All City YTD Q1 2019",
       x = "Month",
       y = "Gross Income")+
  theme(legend.title = element_blank(),
        plot.title = element_text(hjust = 0.5, size=14),
        plot.title.position = "panel",
        plot.background = element_rect(fill = "moccasin"),
        legend.position = "top",
        panel.grid = element_blank())+
scale_fill_brewer(palette="Set3")
plot_dailyP

2. Gross Income setiap kota berdasarkan Product Line

plot_product_proportion <- sales %>% 
group_by(City, Product.line) %>%
    summarise(Gross.income = sum(gross.income)) %>% 
ggplot(aes(x = Gross.income,
           y = City, 
           fill = Product.line))+
  geom_col(aes(x=Gross.income,
               y=City),
           width = 0.6,
           col="black")+
  labs(title = "Gross Income based on Product Line",
       x = "Gross Income",
       y = "City")+
  theme(legend.title = element_blank(),
        plot.title = element_text(hjust = 0.5),
        panel.background = element_rect(fill = "grey"),
        plot.background = element_rect(fill = "moccasin"),
        legend.position = "top",
        panel.grid = element_blank())+
scale_fill_brewer(palette="Set3")

plot_product_proportion

3. Gross Income setiap kota berdasarkan Tipe Payment

plot_paymn <- sales %>% 
group_by(Payment, City) %>%
    summarise(Gross.income = sum(gross.income)) %>% 
ggplot(aes(x = Gross.income,
           y = City,
           fill=Payment))+
  geom_col(aes(x=Gross.income,
               y=City),
           width = 0.6,
           col="black")+
  labs(title = "Customer Behavior in Payment",
       x = "Gross Income",
       y = "City")+
  theme(legend.position = "top",
        plot.title = element_text(hjust = 0.5, size = 20),
        panel.background = element_rect(fill = "grey"),
        plot.background = element_rect(fill = "moccasin"),
        panel.grid = element_blank())+
scale_fill_brewer(palette="Spectral")
plot_paymn

4. City dengan rata-rata Rating tertinggi

plot_Rat <- sales %>% 
group_by(City) %>%
    summarise(Mean.rating = mean(Rating)) %>% 
ggplot(aes(x = Mean.rating,
           y = City,
           fill=City))+
  geom_col(aes(x=Mean.rating,
               y=City),
           width = 0.5,
           col="black")+
  scale_fill_brewer(palette="Pastel1")+
  coord_flip()+
  labs(title = "Rating per City",
       x = "Rating",
       y = "City")+
  theme(legend.position = "none",
        plot.title = element_text(hjust = 0.5),
        panel.background = element_rect(fill = "grey"),
        plot.background = element_rect(fill = "moccasin"),
        panel.grid = element_blank())
plot_Rat

5. Rata-rata Rating yang diperoleh setiap kota berdasarkan Gender Customer

plot_gR <- sales %>% 
group_by(City, Gender) %>%
    summarise(Mean.rating = mean(Rating)) %>% 
ggplot(aes(x = Mean.rating,
           y = City))+
  geom_col(aes(x=Mean.rating,
               y=City,
               fill=Gender),
           position="dodge",
           width = 0.7,
           col="black")+
  coord_flip()+
  labs(title = "Mean Rating Given per City",
       subtitle = "Based on Customer Gender",
       x = "Rating",
       y = "City")+
  theme(legend.position = "top",
        plot.title = element_text(hjust = 0.5, size = 20),
        plot.subtitle = element_text(hjust = 0.5, size = 12),
        plot.background = element_rect(fill = "moccasin"),
        panel.background = element_rect(fill = "grey"),
        panel.grid = element_blank())
plot_gR

6. Rata-rata Rating setiap kota berdasarkan Product Line

plot_plR <- sales %>% 
group_by(City, Product.line) %>%
    summarise(Mean.rating = mean(Rating)) %>% 
ggplot(aes(x = Mean.rating,
           y = City))+
  geom_col(aes(x=Mean.rating,
               y=City,
               fill=Product.line),
           position = "dodge",
           width = 0.7,
           col="black")+
  coord_flip()+
  labs(title = "Rating by Product Line purchased per City",
       x = "Rating",
       y = "City")+
  theme(legend.position = "top",
        legend.title = element_blank(),
        plot.title = element_text(hjust = 0.5, size = 20),
        panel.background = element_rect(fill = "grey"),
        plot.background = element_rect(fill = "moccasin"),
        panel.grid = element_blank())+
  scale_fill_brewer(palette="Set3")
plot_plR