library(DBI)
library(RSQLite)
## Warning: package 'RSQLite' was built under R version 4.5.3
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
#Import File Database
getwd()
## [1] "C:/Users/ARIMBY/Contacts"
#Menyambungkan Database
dw <- dbConnect(SQLite(), "enterprise_dw.db")
dbListTables(dw)
## [1] "dim_brand" "dim_category" "dim_customer" "dim_date"
## [5] "dim_employee" "dim_product" "dim_store" "dim_subcategory"
## [9] "fact_hr_payroll" "fact_inventory" "fact_sales"
dim_product <- dbGetQuery(dw, "
SELECT
p.product_id,
p.product_name,
b.brand_name,
sc.subcategory_name AS sub_category,
c.category_name AS category,
p.unit_price,
p.unit_cost
FROM dim_product p
JOIN dim_subcategory sc ON p.subcategory_id = sc.subcategory_id
JOIN dim_category c ON sc.category_id = c.category_id
JOIN dim_brand b ON p.brand_id = b.brand_id
WHERE p.is_active = 1
AND c.category_name = 'Fashion'
")
head(dim_product)
## product_id product_name brand_name sub_category category unit_price
## 1 P101 Kaos Uniqlo Basic Uniqlo Pakaian Pria Fashion 199000
## 2 P102 Dress H&M Summer H&M Pakaian Wanita Fashion 499000
## unit_cost
## 1 120000
## 2 300000
#Tambah dim_customer
dim_product <- dbGetQuery(dw, "
SELECT
p.product_id,
p.product_name,
b.brand_name,
b.country AS brand_country,
sc.subcategory_name AS sub_category,
c.category_name AS category,
c.department,
p.unit_price,
p.unit_cost,
p.launch_date
FROM dim_product p
JOIN dim_subcategory sc ON p.subcategory_id = sc.subcategory_id
JOIN dim_category c ON sc.category_id = c.category_id
JOIN dim_brand b ON p.brand_id = b.brand_id
WHERE p.is_active = 1
AND c.category_name = 'Fashion'
")
head(dim_product)
## product_id product_name brand_name brand_country sub_category category
## 1 P101 Kaos Uniqlo Basic Uniqlo Jepang Pakaian Pria Fashion
## 2 P102 Dress H&M Summer H&M Swedia Pakaian Wanita Fashion
## department unit_price unit_cost launch_date
## 1 Retail 199000 120000 2023-01-01
## 2 Retail 499000 300000 2023-03-01
dim_customer <- dbGetQuery(dw, "
SELECT
customer_id,
customer_name,
gender,
city,
province,
region,
segment,
join_date
FROM dim_customer
WHERE is_active = 1
")
head(dim_customer)
## customer_id customer_name gender city province
## 1 C0001 Dewi Hidayat L Semarang Jawa Tengah
## 2 C0002 Hanif Permana P Pontianak Kalimantan Barat
## 3 C0004 Kirana Wardana P Denpasar Bali
## 4 C0005 Luthfi Hidayat P Banjarmasin Kalimantan Selatan
## 5 C0006 Andi Anggraini P Yogyakarta DI Yogyakarta
## 6 C0007 Gilang Kurniawan L Balikpapan Kalimantan Timur
## region segment join_date
## 1 Jawa Gold 2024-06-26
## 2 Kalimantan Silver 2023-06-23
## 3 Bali & Nusa Tenggara Regular 2023-03-25
## 4 Kalimantan Silver 2021-06-06
## 5 Jawa Regular 2024-08-13
## 6 Kalimantan Silver 2023-01-15
#Ambil fact_sales (Fashion)
fact_sales <- dbGetQuery(dw, "
SELECT
sale_id,
date_key AS sale_date,
customer_id,
product_id,
store_id,
quantity,
unit_price,
discount_pct,
total_amount,
channel,
payment_method
FROM fact_sales
WHERE business_unit = 'Retail Fashion'
AND status = 'COMPLETED'
AND date_key >= '2024-01-01'
AND date_key <= '2025-12-31'
")
fact_sales <- fact_sales %>%
filter(
customer_id %in% dim_customer$customer_id,
product_id %in% dim_product$product_id
)
head(fact_sales)
## sale_id sale_date customer_id product_id store_id quantity unit_price
## 1 S005001 2025-06-12 C0068 P102 S90 1 499000
## 2 S005005 2024-10-31 C0065 P102 S90 1 499000
## 3 S005006 2025-09-10 C0114 P102 S90 1 499000
## 4 S005009 2024-08-01 C0068 P102 S90 1 499000
## 5 S005016 2025-04-12 C0044 P101 S90 3 199000
## 6 S005018 2025-04-11 C0248 P102 S90 1 499000
## discount_pct total_amount channel payment_method
## 1 0 499000 Online Debit Card
## 2 10 449100 Offline Transfer Bank
## 3 0 499000 Offline Cash
## 4 0 499000 Online Transfer Bank
## 5 0 597000 Offline Debit Card
## 6 30 349300 Offline Credit Card
dim_product <- dim_product %>%
mutate(
unit_margin = unit_price - unit_cost,
margin_pct = round(unit_margin / unit_price * 100, 2),
price_tier = case_when(
unit_price < 1000000 ~ "Low",
unit_price < 5000000 ~ "Mid",
unit_price < 10000000 ~ "High",
TRUE ~ "Premium"
)
)
head(dim_product)
## product_id product_name brand_name brand_country sub_category category
## 1 P101 Kaos Uniqlo Basic Uniqlo Jepang Pakaian Pria Fashion
## 2 P102 Dress H&M Summer H&M Swedia Pakaian Wanita Fashion
## department unit_price unit_cost launch_date unit_margin margin_pct price_tier
## 1 Retail 199000 120000 2023-01-01 79000 39.70 Low
## 2 Retail 499000 300000 2023-03-01 199000 39.88 Low
fact_sales <- fact_sales %>%
mutate(
sale_date = as.Date(sale_date),
year = year(sale_date),
quarter = quarter(sale_date),
month = month(sale_date)
) %>%
left_join(dim_product %>% select(product_id, unit_cost), by = "product_id") %>%
mutate(
line_cost = quantity * unit_cost,
gross_profit = total_amount - line_cost
) %>%
select(-unit_cost)
head(fact_sales)
## sale_id sale_date customer_id product_id store_id quantity unit_price
## 1 S005001 2025-06-12 C0068 P102 S90 1 499000
## 2 S005005 2024-10-31 C0065 P102 S90 1 499000
## 3 S005006 2025-09-10 C0114 P102 S90 1 499000
## 4 S005009 2024-08-01 C0068 P102 S90 1 499000
## 5 S005016 2025-04-12 C0044 P101 S90 3 199000
## 6 S005018 2025-04-11 C0248 P102 S90 1 499000
## discount_pct total_amount channel payment_method year quarter month line_cost
## 1 0 499000 Online Debit Card 2025 2 6 300000
## 2 10 449100 Offline Transfer Bank 2024 4 10 300000
## 3 0 499000 Offline Cash 2025 3 9 300000
## 4 0 499000 Online Transfer Bank 2024 3 8 300000
## 5 0 597000 Offline Debit Card 2025 2 4 360000
## 6 30 349300 Offline Credit Card 2025 2 4 300000
## gross_profit
## 1 199000
## 2 149100
## 3 199000
## 4 199000
## 5 237000
## 6 49300
cat("Jumlah customer aktif:", nrow(dim_customer), "\n")
## Jumlah customer aktif: 228
cat("Jumlah produk Fashion aktif:", nrow(dim_product), "\n")
## Jumlah produk Fashion aktif: 2
cat("Jumlah transaksi Fashion:", nrow(fact_sales), "\n")
## Jumlah transaksi Fashion: 586
sum(is.na(fact_sales$total_amount))
## [1] 0
sum(fact_sales$quantity <= 0)
## [1] 0
sum(duplicated(fact_sales$sale_id))
## [1] 0
#Filter data Fashion
fact_sales <- fact_sales %>%
filter(product_id %in% dim_product$product_id)
nrow(fact_sales)
## [1] 586
#Transformasi
fact_sales <- fact_sales %>%
mutate(
sale_date = as.Date(sale_date),
year = year(sale_date),
month = month(sale_date)
)
head(fact_sales)
## sale_id sale_date customer_id product_id store_id quantity unit_price
## 1 S005001 2025-06-12 C0068 P102 S90 1 499000
## 2 S005005 2024-10-31 C0065 P102 S90 1 499000
## 3 S005006 2025-09-10 C0114 P102 S90 1 499000
## 4 S005009 2024-08-01 C0068 P102 S90 1 499000
## 5 S005016 2025-04-12 C0044 P101 S90 3 199000
## 6 S005018 2025-04-11 C0248 P102 S90 1 499000
## discount_pct total_amount channel payment_method year quarter month line_cost
## 1 0 499000 Online Debit Card 2025 2 6 300000
## 2 10 449100 Offline Transfer Bank 2024 4 10 300000
## 3 0 499000 Offline Cash 2025 3 9 300000
## 4 0 499000 Online Transfer Bank 2024 3 8 300000
## 5 0 597000 Offline Debit Card 2025 2 4 360000
## 6 30 349300 Offline Credit Card 2025 2 4 300000
## gross_profit
## 1 199000
## 2 149100
## 3 199000
## 4 199000
## 5 237000
## 6 49300
#Join untuk analisis
data_fashion <- fact_sales %>%
left_join(dim_product, by = "product_id") %>%
left_join(dim_customer, by = "customer_id")
head(data_fashion)
## sale_id sale_date customer_id product_id store_id quantity unit_price.x
## 1 S005001 2025-06-12 C0068 P102 S90 1 499000
## 2 S005005 2024-10-31 C0065 P102 S90 1 499000
## 3 S005006 2025-09-10 C0114 P102 S90 1 499000
## 4 S005009 2024-08-01 C0068 P102 S90 1 499000
## 5 S005016 2025-04-12 C0044 P101 S90 3 199000
## 6 S005018 2025-04-11 C0248 P102 S90 1 499000
## discount_pct total_amount channel payment_method year quarter month line_cost
## 1 0 499000 Online Debit Card 2025 2 6 300000
## 2 10 449100 Offline Transfer Bank 2024 4 10 300000
## 3 0 499000 Offline Cash 2025 3 9 300000
## 4 0 499000 Online Transfer Bank 2024 3 8 300000
## 5 0 597000 Offline Debit Card 2025 2 4 360000
## 6 30 349300 Offline Credit Card 2025 2 4 300000
## gross_profit product_name brand_name brand_country sub_category
## 1 199000 Dress H&M Summer H&M Swedia Pakaian Wanita
## 2 149100 Dress H&M Summer H&M Swedia Pakaian Wanita
## 3 199000 Dress H&M Summer H&M Swedia Pakaian Wanita
## 4 199000 Dress H&M Summer H&M Swedia Pakaian Wanita
## 5 237000 Kaos Uniqlo Basic Uniqlo Jepang Pakaian Pria
## 6 49300 Dress H&M Summer H&M Swedia Pakaian Wanita
## category department unit_price.y unit_cost launch_date unit_margin margin_pct
## 1 Fashion Retail 499000 300000 2023-03-01 199000 39.88
## 2 Fashion Retail 499000 300000 2023-03-01 199000 39.88
## 3 Fashion Retail 499000 300000 2023-03-01 199000 39.88
## 4 Fashion Retail 499000 300000 2023-03-01 199000 39.88
## 5 Fashion Retail 199000 120000 2023-01-01 79000 39.70
## 6 Fashion Retail 499000 300000 2023-03-01 199000 39.88
## price_tier customer_name gender city province region
## 1 Low Kirana Santoso P Balikpapan Kalimantan Timur Kalimantan
## 2 Low Kirana Maharani P Banjarmasin Kalimantan Selatan Kalimantan
## 3 Low Hanif Wardana L Yogyakarta DI Yogyakarta Jawa
## 4 Low Kirana Santoso P Balikpapan Kalimantan Timur Kalimantan
## 5 Low Dewi Nugroho L Surabaya Jawa Timur Jawa
## 6 Low Kirana Setiawan P Medan Sumatera Utara Sumatera
## segment join_date
## 1 Platinum 2023-12-08
## 2 Platinum 2022-04-22
## 3 Silver 2022-01-02
## 4 Platinum 2023-12-08
## 5 Silver 2022-12-31
## 6 Silver 2021-05-18
#Eksplorasi sederhana
data_fashion %>%
group_by(sub_category) %>%
summarise(
total_sales = sum(total_amount, na.rm = TRUE),
total_qty = sum(quantity, na.rm = TRUE)
) %>%
arrange(desc(total_sales))
## # A tibble: 2 × 3
## sub_category total_sales total_qty
## <chr> <dbl> <int>
## 1 Pakaian Wanita 192963300 428
## 2 Pakaian Pria 67242100 374