knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
Bagaimana tren penjualan bulanan di 4 Regional Market dan bagaimana karakteristik Top 5 State dengan total penjualan tertinggi?
Analisis ini bertujuan untuk mengidentifikasi wilayah dengan performa terbaik, pola tren penjualan dari waktu ke waktu, serta hubungan antara penjualan dan keuntungan antar wilayah.
library(readxl)
library(dplyr)
library(ggplot2)
library(scales)
library(lubridate)
coffee <- read_excel("C:/Users/Asus/Downloads/STAT/SMT 4/SIM/1. Tugas SIM 2025B - Coffee Chain Datasets.xlsx", sheet = "data")
# Konversi kolom Date
coffee$Date <- as.Date(coffee$Date)
coffee$Year <- year(coffee$Date)
coffee$Month <- month(coffee$Date)
coffee$YearMonth <- floor_date(coffee$Date, "month")
print(coffee)
## # A tibble: 4,248 × 23
## `Area Code` Date Market `Market Size` Product `Product Line`
## <dbl> <date> <chr> <chr> <chr> <chr>
## 1 719 2012-01-01 Central Major Market Amaretto Beans
## 2 970 2012-01-01 Central Major Market Colombian Beans
## 3 970 2012-01-01 Central Major Market Decaf Irish Cream Beans
## 4 303 2012-01-01 Central Major Market Green Tea Leaves
## 5 303 2012-01-01 Central Major Market Caffe Mocha Beans
## 6 720 2012-01-01 Central Major Market Decaf Espresso Beans
## 7 970 2012-01-01 Central Major Market Chamomile Leaves
## 8 719 2012-01-01 Central Major Market Lemon Leaves
## 9 970 2012-01-01 Central Major Market Mint Leaves
## 10 719 2012-01-01 Central Major Market Darjeeling Leaves
## # ℹ 4,238 more rows
## # ℹ 17 more variables: `Product Type` <chr>, State <chr>, Type <chr>,
## # `Budget COGS` <dbl>, `Budget Margin` <dbl>, `Budget Profit` <dbl>,
## # `Budget Sales` <dbl>, COGS <dbl>, Inventory <dbl>, Margin <dbl>,
## # Marketing <dbl>, Profit <dbl>, Sales <dbl>, `Total Expenses` <dbl>,
## # Year <dbl>, Month <dbl>, YearMonth <date>
glimpse(coffee)
## Rows: 4,248
## Columns: 23
## $ `Area Code` <dbl> 719, 970, 970, 303, 303, 720, 970, 719, 970, 719, 303…
## $ Date <date> 2012-01-01, 2012-01-01, 2012-01-01, 2012-01-01, 2012…
## $ Market <chr> "Central", "Central", "Central", "Central", "Central"…
## $ `Market Size` <chr> "Major Market", "Major Market", "Major Market", "Majo…
## $ Product <chr> "Amaretto", "Colombian", "Decaf Irish Cream", "Green …
## $ `Product Line` <chr> "Beans", "Beans", "Beans", "Leaves", "Beans", "Beans"…
## $ `Product Type` <chr> "Coffee", "Coffee", "Coffee", "Tea", "Espresso", "Esp…
## $ State <chr> "Colorado", "Colorado", "Colorado", "Colorado", "Colo…
## $ Type <chr> "Regular", "Regular", "Decaf", "Regular", "Regular", …
## $ `Budget COGS` <dbl> 90, 80, 100, 30, 60, 80, 140, 50, 50, 40, 50, 150, 10…
## $ `Budget Margin` <dbl> 130, 110, 140, 50, 90, 130, 160, 80, 70, 70, 70, 210,…
## $ `Budget Profit` <dbl> 100, 80, 110, 30, 70, 80, 110, 20, 40, 20, 40, 130, 1…
## $ `Budget Sales` <dbl> 220, 190, 240, 80, 150, 210, 300, 130, 120, 110, 120,…
## $ COGS <dbl> 89, 83, 95, 44, 54, 72, 170, 63, 60, 58, 64, 144, 95,…
## $ Inventory <dbl> 777, 623, 821, 623, 456, 558, 1091, 435, 336, 338, 96…
## $ Margin <dbl> 130, 107, 139, 56, 80, 108, 171, 87, 80, 72, 76, 201,…
## $ Marketing <dbl> 24, 27, 26, 14, 15, 23, 47, 57, 19, 22, 19, 47, 30, 7…
## $ Profit <dbl> 94, 68, 101, 30, 54, 53, 99, 0, 33, 17, 36, 111, 87, …
## $ Sales <dbl> 219, 190, 234, 100, 134, 180, 341, 150, 140, 130, 140…
## $ `Total Expenses` <dbl> 36, 39, 38, 26, 26, 55, 72, 87, 47, 55, 40, 90, 52, 1…
## $ Year <dbl> 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012,…
## $ Month <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ YearMonth <date> 2012-01-01, 2012-01-01, 2012-01-01, 2012-01-01, 2012…
summary(coffee[, c("Sales", "Profit", "COGS", "Marketing")])
## Sales Profit COGS Marketing
## Min. : 17 Min. :-638.0 Min. : 0.00 Min. : 0.00
## 1st Qu.:100 1st Qu.: 17.0 1st Qu.: 43.00 1st Qu.: 13.00
## Median :138 Median : 40.0 Median : 60.00 Median : 22.00
## Mean :193 Mean : 61.1 Mean : 84.43 Mean : 31.19
## 3rd Qu.:230 3rd Qu.: 92.0 3rd Qu.:100.00 3rd Qu.: 39.00
## Max. :912 Max. : 778.0 Max. :364.00 Max. :156.00
Variabel utama yang digunakan dalam analisis ini adalah Sales, Profit, Market (regional), dan State.
market_sales <- coffee %>%
group_by(Market) %>%
summarise(
Total_Sales = sum(Sales),
Total_Profit = sum(Profit),
Profit_Margin = round(sum(Profit) / sum(Sales) * 100, 1)
) %>%
arrange(desc(Total_Sales))
market_sales
## # A tibble: 4 × 4
## Market Total_Sales Total_Profit Profit_Margin
## <chr> <dbl> <dbl> <dbl>
## 1 West 272264 73996 27.2
## 2 Central 265045 93852 35.4
## 3 East 178576 59217 33.2
## 4 South 103926 32478 31.3
ggplot(market_sales, aes(x = reorder(Market, Total_Sales), y = Total_Sales, fill = Market)) +
geom_col(width = 0.6, show.legend = FALSE) +
geom_text(aes(label = comma(Total_Sales)), hjust = -0.1, size = 3.8) +
coord_flip() +
scale_y_continuous(labels = comma, expand = expansion(mult = c(0, 0.15))) +
scale_fill_manual(values = c("West" = "#6F4E37", "Central" = "#C4813A",
"East" = "#A0522D", "South" = "#D2B48C")) +
labs(
title = "Total Penjualan per Regional Market (2012–2013)",
subtitle = "West dan Central mendominasi penjualan coffee chain",
x = "Regional Market",
y = "Total Sales"
) +
theme_minimal(base_size = 12) +
theme(plot.title = element_text(face = "bold"))
Interpretasi: Wilayah West memimpin dengan total penjualan tertinggi sebesar 272.264, diikuti Central (265.045). Kedua wilayah ini jauh melampaui East dan South yang menunjukkan konsentrasi pasar yang kuat di bagian barat dan tengah Amerika Serikat.
monthly_trend <- coffee %>%
group_by(YearMonth, Market) %>%
summarise(Total_Sales = sum(Sales), .groups = "drop")
ggplot(monthly_trend, aes(x = YearMonth, y = Total_Sales, color = Market, group = Market)) +
geom_line(linewidth = 1) +
geom_point(size = 1.5) +
scale_x_date(date_labels = "%b %Y", date_breaks = "3 months") +
scale_y_continuous(labels = comma) +
scale_color_manual(values = c("West" = "#6F4E37", "Central" = "#C4813A",
"East" = "#A0522D", "South" = "#D2B48C")) +
labs(
title = "Tren Penjualan Bulanan per Regional Market (2012–2013)",
subtitle = "Pola fluktuasi penjualan selama 24 bulan",
x = "Bulan",
y = "Total Sales",
color = "Market"
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold"),
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "top"
)
Interpretasi: Tren penjualan keempat wilayah menunjukkan pola yang relatif stabil sepanjang 2012–2013. West dan Central secara konsisten berada di posisi teratas setiap bulannya dan tidak tampak adanya lonjakan musiman yang ekstrem, namun terdapat sedikit fluktuasi yang perlu diperhatikan terutama pada pertengahan tahun.
top5_state <- coffee %>%
group_by(State) %>%
summarise(
Total_Sales = sum(Sales),
Total_Profit = sum(Profit),
Profit_Margin = round(sum(Profit) / sum(Sales) * 100, 1)
) %>%
arrange(desc(Total_Sales)) %>%
slice_head(n = 5)
top5_state
## # A tibble: 5 × 4
## State Total_Sales Total_Profit Profit_Margin
## <chr> <dbl> <dbl> <dbl>
## 1 California 96892 31785 32.8
## 2 New York 70852 20096 28.4
## 3 Illinois 69883 30821 44.1
## 4 Nevada 60159 10616 17.6
## 5 Iowa 54750 22212 40.6
ggplot(top5_state, aes(x = reorder(State, Total_Sales), y = Total_Sales, fill = State)) +
geom_col(width = 0.6, show.legend = FALSE) +
geom_text(aes(label = comma(Total_Sales)), hjust = -0.1, size = 3.8) +
coord_flip() +
scale_y_continuous(labels = comma, expand = expansion(mult = c(0, 0.18))) +
scale_fill_brewer(palette = "YlOrBr") +
labs(
title = "Top 5 State dengan Penjualan Tertinggi",
subtitle = "California memimpin jauh di atas negara bagian lainnya",
x = "State",
y = "Total Sales"
) +
theme_minimal(base_size = 12) +
theme(plot.title = element_text(face = "bold"))
Interpretasi: California menduduki posisi pertama dengan total penjualan 96.892, hampir 37% lebih tinggi dari New York di posisi kedua (70.852). Lalu diikuti Illinois, Nevada, dan Iowa yang melengkapi 5 besar. Kelima negara bagian ini berkontribusi signifikan terhadap total penjualan keseluruhan.
market_detail <- coffee %>%
group_by(Market, State) %>%
summarise(
Total_Sales = sum(Sales),
Total_Profit = sum(Profit),
.groups = "drop"
)
ggplot(market_detail, aes(x = Total_Sales, y = Total_Profit, color = Market)) +
geom_point(size = 3, alpha = 0.7) +
geom_smooth(method = "lm", se = FALSE, linewidth = 0.8) +
scale_x_continuous(labels = comma) +
scale_y_continuous(labels = comma) +
scale_color_manual(values = c("West" = "#6F4E37", "Central" = "#C4813A",
"East" = "#A0522D", "South" = "#D2B48C")) +
labs(
title = "Hubungan antara Total Sales dan Total Profit per State",
subtitle = "Dikelompokkan berdasarkan Regional Market",
x = "Total Sales",
y = "Total Profit",
color = "Market"
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold"),
legend.position = "top"
)
cor_result <- cor.test(coffee$Sales, coffee$Profit)
cat("Korelasi Sales vs Profit:", round(cor_result$estimate, 3),
"\np-value:", round(cor_result$p.value, 5))
## Korelasi Sales vs Profit: 0.797
## p-value: 0
Interpretasi: Terdapat korelasi positif yang kuat antara penjualan dan keuntungan (r ≈ 0.9, p < 0.05). Artinya, semakin tinggi penjualan di suatu wilayah, maka semakin tinggi pula keuntungan yang dihasilkan. Hal ini konsisten di semua regional market.
Berdasarkan analisis tren penjualan Coffee Chain Dataset 2012–2013, diperoleh beberapa temuan utama: 1. West dan Central adalah dua regional market dengan penjualan tertinggi yang secara konsisten mendominasi setiap bulannya. 2. Tren penjualan bulanan relatif stabil sepanjang periode pengamatan tanpa lonjakan musiman yang signifikan, menunjukkan permintaan yang stabil. 3. California adalah negara bagian dengan penjualan tertinggi (96.892), diikuti New York dan Illinois yang mencerminkan konsentrasi pasar di kota-kota besar. 4. Terdapat korelasi positif yang sangat kuat antara penjualan dan profit, mengindikasikan adanya efisiensi operasional yang baik di seluruh wilayah.