knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)

1. Tujuan

Bagaimana tren penjualan bulanan di 4 Regional Market dan bagaimana karakteristik Top 5 State dengan total penjualan tertinggi?

Analisis ini bertujuan untuk mengidentifikasi wilayah dengan performa terbaik, pola tren penjualan dari waktu ke waktu, serta hubungan antara penjualan dan keuntungan antar wilayah.

2. Persiapan Data

2.1 Load library

library(readxl)
library(dplyr)
library(ggplot2)
library(scales)
library(lubridate)

2.2 Import dataset

coffee <- read_excel("C:/Users/Asus/Downloads/STAT/SMT 4/SIM/1. Tugas SIM 2025B - Coffee Chain Datasets.xlsx", sheet = "data")

# Konversi kolom Date
coffee$Date <- as.Date(coffee$Date)
coffee$Year  <- year(coffee$Date)
coffee$Month <- month(coffee$Date)
coffee$YearMonth <- floor_date(coffee$Date, "month")

print(coffee)
## # A tibble: 4,248 × 23
##    `Area Code` Date       Market  `Market Size` Product           `Product Line`
##          <dbl> <date>     <chr>   <chr>         <chr>             <chr>         
##  1         719 2012-01-01 Central Major Market  Amaretto          Beans         
##  2         970 2012-01-01 Central Major Market  Colombian         Beans         
##  3         970 2012-01-01 Central Major Market  Decaf Irish Cream Beans         
##  4         303 2012-01-01 Central Major Market  Green Tea         Leaves        
##  5         303 2012-01-01 Central Major Market  Caffe Mocha       Beans         
##  6         720 2012-01-01 Central Major Market  Decaf Espresso    Beans         
##  7         970 2012-01-01 Central Major Market  Chamomile         Leaves        
##  8         719 2012-01-01 Central Major Market  Lemon             Leaves        
##  9         970 2012-01-01 Central Major Market  Mint              Leaves        
## 10         719 2012-01-01 Central Major Market  Darjeeling        Leaves        
## # ℹ 4,238 more rows
## # ℹ 17 more variables: `Product Type` <chr>, State <chr>, Type <chr>,
## #   `Budget COGS` <dbl>, `Budget Margin` <dbl>, `Budget Profit` <dbl>,
## #   `Budget Sales` <dbl>, COGS <dbl>, Inventory <dbl>, Margin <dbl>,
## #   Marketing <dbl>, Profit <dbl>, Sales <dbl>, `Total Expenses` <dbl>,
## #   Year <dbl>, Month <dbl>, YearMonth <date>

2.3 Deskripsi data

glimpse(coffee)
## Rows: 4,248
## Columns: 23
## $ `Area Code`      <dbl> 719, 970, 970, 303, 303, 720, 970, 719, 970, 719, 303…
## $ Date             <date> 2012-01-01, 2012-01-01, 2012-01-01, 2012-01-01, 2012…
## $ Market           <chr> "Central", "Central", "Central", "Central", "Central"…
## $ `Market Size`    <chr> "Major Market", "Major Market", "Major Market", "Majo…
## $ Product          <chr> "Amaretto", "Colombian", "Decaf Irish Cream", "Green …
## $ `Product Line`   <chr> "Beans", "Beans", "Beans", "Leaves", "Beans", "Beans"…
## $ `Product Type`   <chr> "Coffee", "Coffee", "Coffee", "Tea", "Espresso", "Esp…
## $ State            <chr> "Colorado", "Colorado", "Colorado", "Colorado", "Colo…
## $ Type             <chr> "Regular", "Regular", "Decaf", "Regular", "Regular", …
## $ `Budget COGS`    <dbl> 90, 80, 100, 30, 60, 80, 140, 50, 50, 40, 50, 150, 10…
## $ `Budget Margin`  <dbl> 130, 110, 140, 50, 90, 130, 160, 80, 70, 70, 70, 210,…
## $ `Budget Profit`  <dbl> 100, 80, 110, 30, 70, 80, 110, 20, 40, 20, 40, 130, 1…
## $ `Budget Sales`   <dbl> 220, 190, 240, 80, 150, 210, 300, 130, 120, 110, 120,…
## $ COGS             <dbl> 89, 83, 95, 44, 54, 72, 170, 63, 60, 58, 64, 144, 95,…
## $ Inventory        <dbl> 777, 623, 821, 623, 456, 558, 1091, 435, 336, 338, 96…
## $ Margin           <dbl> 130, 107, 139, 56, 80, 108, 171, 87, 80, 72, 76, 201,…
## $ Marketing        <dbl> 24, 27, 26, 14, 15, 23, 47, 57, 19, 22, 19, 47, 30, 7…
## $ Profit           <dbl> 94, 68, 101, 30, 54, 53, 99, 0, 33, 17, 36, 111, 87, …
## $ Sales            <dbl> 219, 190, 234, 100, 134, 180, 341, 150, 140, 130, 140…
## $ `Total Expenses` <dbl> 36, 39, 38, 26, 26, 55, 72, 87, 47, 55, 40, 90, 52, 1…
## $ Year             <dbl> 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012,…
## $ Month            <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ YearMonth        <date> 2012-01-01, 2012-01-01, 2012-01-01, 2012-01-01, 2012…
summary(coffee[, c("Sales", "Profit", "COGS", "Marketing")])
##      Sales         Profit            COGS          Marketing     
##  Min.   : 17   Min.   :-638.0   Min.   :  0.00   Min.   :  0.00  
##  1st Qu.:100   1st Qu.:  17.0   1st Qu.: 43.00   1st Qu.: 13.00  
##  Median :138   Median :  40.0   Median : 60.00   Median : 22.00  
##  Mean   :193   Mean   :  61.1   Mean   : 84.43   Mean   : 31.19  
##  3rd Qu.:230   3rd Qu.:  92.0   3rd Qu.:100.00   3rd Qu.: 39.00  
##  Max.   :912   Max.   : 778.0   Max.   :364.00   Max.   :156.00

Variabel utama yang digunakan dalam analisis ini adalah Sales, Profit, Market (regional), dan State.

3. Analisis dan Visualisasi

3.1 Total penjualan per regional market

market_sales <- coffee %>%
  group_by(Market) %>%
  summarise(
    Total_Sales  = sum(Sales),
    Total_Profit = sum(Profit),
    Profit_Margin = round(sum(Profit) / sum(Sales) * 100, 1)
  ) %>%
  arrange(desc(Total_Sales))

market_sales
## # A tibble: 4 × 4
##   Market  Total_Sales Total_Profit Profit_Margin
##   <chr>         <dbl>        <dbl>         <dbl>
## 1 West         272264        73996          27.2
## 2 Central      265045        93852          35.4
## 3 East         178576        59217          33.2
## 4 South        103926        32478          31.3
ggplot(market_sales, aes(x = reorder(Market, Total_Sales), y = Total_Sales, fill = Market)) +
  geom_col(width = 0.6, show.legend = FALSE) +
  geom_text(aes(label = comma(Total_Sales)), hjust = -0.1, size = 3.8) +
  coord_flip() +
  scale_y_continuous(labels = comma, expand = expansion(mult = c(0, 0.15))) +
  scale_fill_manual(values = c("West" = "#6F4E37", "Central" = "#C4813A",
                                "East"  = "#A0522D", "South"   = "#D2B48C")) +
  labs(
    title    = "Total Penjualan per Regional Market (2012–2013)",
    subtitle = "West dan Central mendominasi penjualan coffee chain",
    x = "Regional Market",
    y = "Total Sales"
  ) +
  theme_minimal(base_size = 12) +
  theme(plot.title = element_text(face = "bold"))

Interpretasi: Wilayah West memimpin dengan total penjualan tertinggi sebesar 272.264, diikuti Central (265.045). Kedua wilayah ini jauh melampaui East dan South yang menunjukkan konsentrasi pasar yang kuat di bagian barat dan tengah Amerika Serikat.

3.2 Tren Penjualan Bulanan per Regional Market

monthly_trend <- coffee %>%
  group_by(YearMonth, Market) %>%
  summarise(Total_Sales = sum(Sales), .groups = "drop")
ggplot(monthly_trend, aes(x = YearMonth, y = Total_Sales, color = Market, group = Market)) +
  geom_line(linewidth = 1) +
  geom_point(size = 1.5) +
  scale_x_date(date_labels = "%b %Y", date_breaks = "3 months") +
  scale_y_continuous(labels = comma) +
  scale_color_manual(values = c("West" = "#6F4E37", "Central" = "#C4813A",
                                 "East"  = "#A0522D", "South"   = "#D2B48C")) +
  labs(
    title    = "Tren Penjualan Bulanan per Regional Market (2012–2013)",
    subtitle = "Pola fluktuasi penjualan selama 24 bulan",
    x = "Bulan",
    y = "Total Sales",
    color = "Market"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title   = element_text(face = "bold"),
    axis.text.x  = element_text(angle = 45, hjust = 1),
    legend.position = "top"
  )

Interpretasi: Tren penjualan keempat wilayah menunjukkan pola yang relatif stabil sepanjang 2012–2013. West dan Central secara konsisten berada di posisi teratas setiap bulannya dan tidak tampak adanya lonjakan musiman yang ekstrem, namun terdapat sedikit fluktuasi yang perlu diperhatikan terutama pada pertengahan tahun.

3.3 Top 5 State dengan Penjualan Tertinggi

top5_state <- coffee %>%
  group_by(State) %>%
  summarise(
    Total_Sales  = sum(Sales),
    Total_Profit = sum(Profit),
    Profit_Margin = round(sum(Profit) / sum(Sales) * 100, 1)
  ) %>%
  arrange(desc(Total_Sales)) %>%
  slice_head(n = 5)

top5_state
## # A tibble: 5 × 4
##   State      Total_Sales Total_Profit Profit_Margin
##   <chr>            <dbl>        <dbl>         <dbl>
## 1 California       96892        31785          32.8
## 2 New York         70852        20096          28.4
## 3 Illinois         69883        30821          44.1
## 4 Nevada           60159        10616          17.6
## 5 Iowa             54750        22212          40.6
ggplot(top5_state, aes(x = reorder(State, Total_Sales), y = Total_Sales, fill = State)) +
  geom_col(width = 0.6, show.legend = FALSE) +
  geom_text(aes(label = comma(Total_Sales)), hjust = -0.1, size = 3.8) +
  coord_flip() +
  scale_y_continuous(labels = comma, expand = expansion(mult = c(0, 0.18))) +
  scale_fill_brewer(palette = "YlOrBr") +
  labs(
    title    = "Top 5 State dengan Penjualan Tertinggi",
    subtitle = "California memimpin jauh di atas negara bagian lainnya",
    x = "State",
    y = "Total Sales"
  ) +
  theme_minimal(base_size = 12) +
  theme(plot.title = element_text(face = "bold"))

Interpretasi: California menduduki posisi pertama dengan total penjualan 96.892, hampir 37% lebih tinggi dari New York di posisi kedua (70.852). Lalu diikuti Illinois, Nevada, dan Iowa yang melengkapi 5 besar. Kelima negara bagian ini berkontribusi signifikan terhadap total penjualan keseluruhan.

3.4 Hubungan Sales dan Profit per Market

market_detail <- coffee %>%
  group_by(Market, State) %>%
  summarise(
    Total_Sales  = sum(Sales),
    Total_Profit = sum(Profit),
    .groups = "drop"
  )

ggplot(market_detail, aes(x = Total_Sales, y = Total_Profit, color = Market)) +
  geom_point(size = 3, alpha = 0.7) +
  geom_smooth(method = "lm", se = FALSE, linewidth = 0.8) +
  scale_x_continuous(labels = comma) +
  scale_y_continuous(labels = comma) +
  scale_color_manual(values = c("West" = "#6F4E37", "Central" = "#C4813A",
                                 "East"  = "#A0522D", "South"   = "#D2B48C")) +
  labs(
    title    = "Hubungan antara Total Sales dan Total Profit per State",
    subtitle = "Dikelompokkan berdasarkan Regional Market",
    x = "Total Sales",
    y = "Total Profit",
    color = "Market"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(face = "bold"),
    legend.position = "top"
  )

cor_result <- cor.test(coffee$Sales, coffee$Profit)
cat("Korelasi Sales vs Profit:", round(cor_result$estimate, 3),
    "\np-value:", round(cor_result$p.value, 5))
## Korelasi Sales vs Profit: 0.797 
## p-value: 0

Interpretasi: Terdapat korelasi positif yang kuat antara penjualan dan keuntungan (r ≈ 0.9, p < 0.05). Artinya, semakin tinggi penjualan di suatu wilayah, maka semakin tinggi pula keuntungan yang dihasilkan. Hal ini konsisten di semua regional market.

4. Kesimpulan

Berdasarkan analisis tren penjualan Coffee Chain Dataset 2012–2013, diperoleh beberapa temuan utama: 1. West dan Central adalah dua regional market dengan penjualan tertinggi yang secara konsisten mendominasi setiap bulannya. 2. Tren penjualan bulanan relatif stabil sepanjang periode pengamatan tanpa lonjakan musiman yang signifikan, menunjukkan permintaan yang stabil. 3. California adalah negara bagian dengan penjualan tertinggi (96.892), diikuti New York dan Illinois yang mencerminkan konsentrasi pasar di kota-kota besar. 4. Terdapat korelasi positif yang sangat kuat antara penjualan dan profit, mengindikasikan adanya efisiensi operasional yang baik di seluruh wilayah.