Pendahuluan

Analisis ini menggunakan dataset Sample Superstore untuk melihat pola penjualan berdasarkan tahun, region, dan kategori produk. Visualisasi dibuat menggunakan R Markdown agar hasil analisis dapat ditampilkan dalam bentuk dokumen HTML.

Import Library

library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(readr)
## Warning: package 'readr' was built under R version 4.4.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.4.3
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

Import Data

data_superstore <- read_excel("C:/Users/fafa/Downloads/Sample_Superstore.xlsx")

head(data_superstore)
## # A tibble: 6 × 21
##   `Row ID` `Order ID`     `Order Date` `Ship Date` `Ship Mode`    `Customer ID`
##   <chr>    <chr>          <chr>        <chr>       <chr>          <chr>        
## 1 1        CA-2016-152156 11/8/2016    11/11/2016  Second Class   CG-12520     
## 2 2        CA-2016-152156 11/8/2016    11/11/2016  Second Class   CG-12520     
## 3 3        CA-2016-138688 6/12/2016    6/16/2016   Second Class   DV-13045     
## 4 4        US-2015-108966 10/11/2015   10/18/2015  Standard Class SO-20335     
## 5 5        US-2015-108966 10/11/2015   10/18/2015  Standard Class SO-20335     
## 6 6        CA-2014-115812 6/9/2014     6/14/2014   Standard Class BH-11710     
## # ℹ 15 more variables: `Customer Name` <chr>, Segment <chr>, Country <chr>,
## #   City <chr>, State <chr>, `Postal Code` <chr>, Region <chr>,
## #   `Product ID` <chr>, Category <chr>, `Sub-Category` <chr>,
## #   `Product Name` <chr>, Sales <chr>, Quantity <chr>, Discount <chr>,
## #   Profit <chr>
str(data_superstore)
## tibble [9,994 × 21] (S3: tbl_df/tbl/data.frame)
##  $ Row ID       : chr [1:9994] "1" "2" "3" "4" ...
##  $ Order ID     : chr [1:9994] "CA-2016-152156" "CA-2016-152156" "CA-2016-138688" "US-2015-108966" ...
##  $ Order Date   : chr [1:9994] "11/8/2016" "11/8/2016" "6/12/2016" "10/11/2015" ...
##  $ Ship Date    : chr [1:9994] "11/11/2016" "11/11/2016" "6/16/2016" "10/18/2015" ...
##  $ Ship Mode    : chr [1:9994] "Second Class" "Second Class" "Second Class" "Standard Class" ...
##  $ Customer ID  : chr [1:9994] "CG-12520" "CG-12520" "DV-13045" "SO-20335" ...
##  $ Customer Name: chr [1:9994] "Claire Gute" "Claire Gute" "Darrin Van Huff" "Sean O'Donnell" ...
##  $ Segment      : chr [1:9994] "Consumer" "Consumer" "Corporate" "Consumer" ...
##  $ Country      : chr [1:9994] "United States" "United States" "United States" "United States" ...
##  $ City         : chr [1:9994] "Henderson" "Henderson" "Los Angeles" "Fort Lauderdale" ...
##  $ State        : chr [1:9994] "Kentucky" "Kentucky" "California" "Florida" ...
##  $ Postal Code  : chr [1:9994] "42420" "42420" "90036" "33311" ...
##  $ Region       : chr [1:9994] "South" "South" "West" "South" ...
##  $ Product ID   : chr [1:9994] "FUR-BO-10001798" "FUR-CH-10000454" "OFF-LA-10000240" "FUR-TA-10000577" ...
##  $ Category     : chr [1:9994] "Furniture" "Furniture" "Office Supplies" "Furniture" ...
##  $ Sub-Category : chr [1:9994] "Bookcases" "Chairs" "Labels" "Tables" ...
##  $ Product Name : chr [1:9994] "Bush Somerset Collection Bookcase" "Hon Deluxe Fabric Upholstered Stacking Chairs, Rounded Back" "Self-Adhesive Address Labels for Typewriters by Universal" "Bretford CR4500 Series Slim Rectangular Table" ...
##  $ Sales        : chr [1:9994] "261.96" "731.94" "14.62" "957.5775" ...
##  $ Quantity     : chr [1:9994] "2" "3" "2" "5" ...
##  $ Discount     : chr [1:9994] "0" "0" "0" "0.45" ...
##  $ Profit       : chr [1:9994] "41.9136" "219.582" "6.8714" "-383.031" ...
names(data_superstore)
##  [1] "Row ID"        "Order ID"      "Order Date"    "Ship Date"    
##  [5] "Ship Mode"     "Customer ID"   "Customer Name" "Segment"      
##  [9] "Country"       "City"          "State"         "Postal Code"  
## [13] "Region"        "Product ID"    "Category"      "Sub-Category" 
## [17] "Product Name"  "Sales"         "Quantity"      "Discount"     
## [21] "Profit"

Preprocessing Data

data_superstore$Sales <- parse_number(as.character(data_superstore$Sales))
data_superstore$Profit <- parse_number(as.character(data_superstore$Profit))

data_superstore$`Order Date` <- parse_date_time(data_superstore$`Order Date`,
                                                orders = c("mdy", "dmy", "ymd"))

data_superstore$Year <- year(data_superstore$`Order Date`)

head(data_superstore)
## # A tibble: 6 × 22
##   `Row ID` `Order ID`  `Order Date`        `Ship Date` `Ship Mode` `Customer ID`
##   <chr>    <chr>       <dttm>              <chr>       <chr>       <chr>        
## 1 1        CA-2016-15… 2016-11-08 00:00:00 11/11/2016  Second Cla… CG-12520     
## 2 2        CA-2016-15… 2016-11-08 00:00:00 11/11/2016  Second Cla… CG-12520     
## 3 3        CA-2016-13… 2016-06-12 00:00:00 6/16/2016   Second Cla… DV-13045     
## 4 4        US-2015-10… 2015-10-11 00:00:00 10/18/2015  Standard C… SO-20335     
## 5 5        US-2015-10… 2015-10-11 00:00:00 10/18/2015  Standard C… SO-20335     
## 6 6        CA-2014-11… 2014-06-09 00:00:00 6/14/2014   Standard C… BH-11710     
## # ℹ 16 more variables: `Customer Name` <chr>, Segment <chr>, Country <chr>,
## #   City <chr>, State <chr>, `Postal Code` <chr>, Region <chr>,
## #   `Product ID` <chr>, Category <chr>, `Sub-Category` <chr>,
## #   `Product Name` <chr>, Sales <dbl>, Quantity <chr>, Discount <chr>,
## #   Profit <dbl>, Year <dbl>
str(data_superstore)
## tibble [9,994 × 22] (S3: tbl_df/tbl/data.frame)
##  $ Row ID       : chr [1:9994] "1" "2" "3" "4" ...
##  $ Order ID     : chr [1:9994] "CA-2016-152156" "CA-2016-152156" "CA-2016-138688" "US-2015-108966" ...
##  $ Order Date   : POSIXct[1:9994], format: "2016-11-08" "2016-11-08" ...
##  $ Ship Date    : chr [1:9994] "11/11/2016" "11/11/2016" "6/16/2016" "10/18/2015" ...
##  $ Ship Mode    : chr [1:9994] "Second Class" "Second Class" "Second Class" "Standard Class" ...
##  $ Customer ID  : chr [1:9994] "CG-12520" "CG-12520" "DV-13045" "SO-20335" ...
##  $ Customer Name: chr [1:9994] "Claire Gute" "Claire Gute" "Darrin Van Huff" "Sean O'Donnell" ...
##  $ Segment      : chr [1:9994] "Consumer" "Consumer" "Corporate" "Consumer" ...
##  $ Country      : chr [1:9994] "United States" "United States" "United States" "United States" ...
##  $ City         : chr [1:9994] "Henderson" "Henderson" "Los Angeles" "Fort Lauderdale" ...
##  $ State        : chr [1:9994] "Kentucky" "Kentucky" "California" "Florida" ...
##  $ Postal Code  : chr [1:9994] "42420" "42420" "90036" "33311" ...
##  $ Region       : chr [1:9994] "South" "South" "West" "South" ...
##  $ Product ID   : chr [1:9994] "FUR-BO-10001798" "FUR-CH-10000454" "OFF-LA-10000240" "FUR-TA-10000577" ...
##  $ Category     : chr [1:9994] "Furniture" "Furniture" "Office Supplies" "Furniture" ...
##  $ Sub-Category : chr [1:9994] "Bookcases" "Chairs" "Labels" "Tables" ...
##  $ Product Name : chr [1:9994] "Bush Somerset Collection Bookcase" "Hon Deluxe Fabric Upholstered Stacking Chairs, Rounded Back" "Self-Adhesive Address Labels for Typewriters by Universal" "Bretford CR4500 Series Slim Rectangular Table" ...
##  $ Sales        : num [1:9994] 262 731.9 14.6 957.6 22.4 ...
##  $ Quantity     : chr [1:9994] "2" "3" "2" "5" ...
##  $ Discount     : chr [1:9994] "0" "0" "0" "0.45" ...
##  $ Profit       : num [1:9994] 41.91 219.58 6.87 -383.03 2.52 ...
##  $ Year         : num [1:9994] 2016 2016 2016 2015 2015 ...

Visualisasi 1: Trend Sales by Region

sales_year <- data_superstore %>%
  group_by(Year, Region) %>%
  summarise(total_sales = sum(Sales, na.rm = TRUE),
            .groups = "drop")

ggplot(sales_year,
       aes(x = Year,
           y = total_sales,
           color = Region,
           group = Region)) +
  geom_line(linewidth = 1) +
  geom_point(size = 2) +
  labs(title = "Trend Sales by Region",
       x = "Tahun",
       y = "Total Sales",
       color = "Region") +
  theme_minimal()

Visualisasi 2: Sales per Category

sales_category <- data_superstore %>%
  group_by(Category) %>%
  summarise(total_sales = sum(Sales, na.rm = TRUE),
            .groups = "drop")

ggplot(sales_category,
       aes(x = reorder(Category, total_sales),
           y = total_sales,
           fill = Category)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  labs(title = "Sales per Category",
       x = "Category",
       y = "Total Sales") +
  theme_minimal()

Visualisasi 3: Profit per Sub-Category

profit_subcategory <- data_superstore %>%
  group_by(`Sub-Category`) %>%
  summarise(total_profit = sum(Profit, na.rm = TRUE),
            .groups = "drop")

ggplot(profit_subcategory,
       aes(x = reorder(`Sub-Category`, total_profit),
           y = total_profit,
           fill = total_profit)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  labs(title = "Profit per Sub-Category",
       x = "Sub-Category",
       y = "Total Profit") +
  theme_minimal()

Interpretasi

Berdasarkan grafik trend sales by region, penjualan pada beberapa region cenderung mengalami peningkatan dari tahun ke tahun. Region tertentu memiliki total penjualan yang lebih tinggi dibandingkan region lainnya, sehingga dapat menjadi wilayah yang memberikan kontribusi besar terhadap total sales.

Pada grafik sales per category, kategori Technology memiliki total sales paling tinggi dibandingkan kategori Furniture dan Office Supplies. Hal ini menunjukkan bahwa produk teknologi menjadi kategori yang paling dominan dalam penjualan.

Pada grafik profit per sub-category, terlihat bahwa beberapa sub-category memberikan profit yang tinggi, sedangkan beberapa sub-category lainnya memiliki profit rendah bahkan negatif. Hal ini menunjukkan bahwa tidak semua produk dengan penjualan tinggi selalu memberikan keuntungan yang besar.

Kesimpulan

Secara keseluruhan, hasil visualisasi menunjukkan bahwa penjualan pada Superstore berbeda-beda pada setiap region, category, maupun sub-category. Analisis ini dapat digunakan untuk mengetahui wilayah dan jenis produk yang memiliki penjualan terbaik, serta membantu mengidentifikasi produk yang performanya masih kurang dan perlu diperhatikan lebih lanjut.