library(DBI)
library(RSQLite)
## Warning: package 'RSQLite' was built under R version 4.5.3
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

#Import File Database

getwd()
## [1] "C:/Users/ARIMBY/Contacts"

#Menyambungkan Database

dw <- dbConnect(SQLite(), "enterprise_dw.db")

dbListTables(dw)
##  [1] "dim_brand"       "dim_category"    "dim_customer"    "dim_date"       
##  [5] "dim_employee"    "dim_product"     "dim_store"       "dim_subcategory"
##  [9] "fact_hr_payroll" "fact_inventory"  "fact_sales"
dim_product <- dbGetQuery(dw, "
  SELECT 
    p.product_id,
    p.product_name,
    b.brand_name,
    sc.subcategory_name AS sub_category,
    c.category_name AS category,
    p.unit_price,
    p.unit_cost
  FROM dim_product p
  JOIN dim_subcategory sc ON p.subcategory_id = sc.subcategory_id
  JOIN dim_category c ON sc.category_id = c.category_id
  JOIN dim_brand b ON p.brand_id = b.brand_id
  WHERE p.is_active = 1
    AND c.category_name = 'Fashion'
")

head(dim_product)
##   product_id      product_name brand_name   sub_category category unit_price
## 1       P101 Kaos Uniqlo Basic     Uniqlo   Pakaian Pria  Fashion     199000
## 2       P102  Dress H&M Summer        H&M Pakaian Wanita  Fashion     499000
##   unit_cost
## 1    120000
## 2    300000

#Tambah dim_customer

dim_product <- dbGetQuery(dw, "
  SELECT 
    p.product_id,
    p.product_name,
    b.brand_name,
    b.country AS brand_country,
    sc.subcategory_name AS sub_category,
    c.category_name AS category,
    c.department,
    p.unit_price,
    p.unit_cost,
    p.launch_date
  FROM dim_product p
  JOIN dim_subcategory sc ON p.subcategory_id = sc.subcategory_id
  JOIN dim_category c ON sc.category_id = c.category_id
  JOIN dim_brand b ON p.brand_id = b.brand_id
  WHERE p.is_active = 1
    AND c.category_name = 'Fashion'
")

head(dim_product)
##   product_id      product_name brand_name brand_country   sub_category category
## 1       P101 Kaos Uniqlo Basic     Uniqlo        Jepang   Pakaian Pria  Fashion
## 2       P102  Dress H&M Summer        H&M        Swedia Pakaian Wanita  Fashion
##   department unit_price unit_cost launch_date
## 1     Retail     199000    120000  2023-01-01
## 2     Retail     499000    300000  2023-03-01

Extract Dim Customer

dim_customer <- dbGetQuery(dw, "
  SELECT 
    customer_id,
    customer_name,
    gender,
    city,
    province,
    region,
    segment,
    join_date
  FROM dim_customer
  WHERE is_active = 1
")

head(dim_customer)
##   customer_id    customer_name gender        city           province
## 1       C0001     Dewi Hidayat      L    Semarang        Jawa Tengah
## 2       C0002    Hanif Permana      P   Pontianak   Kalimantan Barat
## 3       C0004   Kirana Wardana      P    Denpasar               Bali
## 4       C0005   Luthfi Hidayat      P Banjarmasin Kalimantan Selatan
## 5       C0006   Andi Anggraini      P  Yogyakarta      DI Yogyakarta
## 6       C0007 Gilang Kurniawan      L  Balikpapan   Kalimantan Timur
##                 region segment  join_date
## 1                 Jawa    Gold 2024-06-26
## 2           Kalimantan  Silver 2023-06-23
## 3 Bali & Nusa Tenggara Regular 2023-03-25
## 4           Kalimantan  Silver 2021-06-06
## 5                 Jawa Regular 2024-08-13
## 6           Kalimantan  Silver 2023-01-15

#Ambil fact_sales (Fashion)

fact_sales <- dbGetQuery(dw, "
  SELECT 
    sale_id,
    date_key AS sale_date,
    customer_id,
    product_id,
    store_id,
    quantity,
    unit_price,
    discount_pct,
    total_amount,
      channel,
    payment_method
  FROM fact_sales
  WHERE business_unit = 'Retail Fashion'
    AND status = 'COMPLETED'
    AND date_key >= '2024-01-01'
    AND date_key <= '2025-12-31'
")

fact_sales <- fact_sales %>%
  filter(
     customer_id %in% dim_customer$customer_id,
    product_id %in% dim_product$product_id
  )

head(fact_sales)
##   sale_id  sale_date customer_id product_id store_id quantity unit_price
## 1 S005001 2025-06-12       C0068       P102      S90        1     499000
## 2 S005005 2024-10-31       C0065       P102      S90        1     499000
## 3 S005006 2025-09-10       C0114       P102      S90        1     499000
## 4 S005009 2024-08-01       C0068       P102      S90        1     499000
## 5 S005016 2025-04-12       C0044       P101      S90        3     199000
## 6 S005018 2025-04-11       C0248       P102      S90        1     499000
##   discount_pct total_amount channel payment_method
## 1            0       499000  Online     Debit Card
## 2           10       449100 Offline  Transfer Bank
## 3            0       499000 Offline           Cash
## 4            0       499000  Online  Transfer Bank
## 5            0       597000 Offline     Debit Card
## 6           30       349300 Offline    Credit Card

Transformasi Dim Product

dim_product <- dim_product %>%
  mutate(
    unit_margin = unit_price - unit_cost,
    margin_pct = round(unit_margin / unit_price * 100, 2),
    price_tier = case_when(
      unit_price < 1000000 ~ "Low",
      unit_price < 5000000 ~ "Mid",
      unit_price < 10000000 ~ "High",
      TRUE ~ "Premium"
    )
  )

head(dim_product)
##   product_id      product_name brand_name brand_country   sub_category category
## 1       P101 Kaos Uniqlo Basic     Uniqlo        Jepang   Pakaian Pria  Fashion
## 2       P102  Dress H&M Summer        H&M        Swedia Pakaian Wanita  Fashion
##   department unit_price unit_cost launch_date unit_margin margin_pct price_tier
## 1     Retail     199000    120000  2023-01-01       79000      39.70        Low
## 2     Retail     499000    300000  2023-03-01      199000      39.88        Low

Transformasi Fact Sales

fact_sales <- fact_sales %>%
  mutate(
    sale_date = as.Date(sale_date),
    year = year(sale_date),
    quarter = quarter(sale_date),
    month = month(sale_date)
  ) %>%
  left_join(dim_product %>% select(product_id, unit_cost), by = "product_id") %>%
  mutate(
    line_cost = quantity * unit_cost,
    gross_profit = total_amount - line_cost
  ) %>%
  select(-unit_cost)

head(fact_sales)
##   sale_id  sale_date customer_id product_id store_id quantity unit_price
## 1 S005001 2025-06-12       C0068       P102      S90        1     499000
## 2 S005005 2024-10-31       C0065       P102      S90        1     499000
## 3 S005006 2025-09-10       C0114       P102      S90        1     499000
## 4 S005009 2024-08-01       C0068       P102      S90        1     499000
## 5 S005016 2025-04-12       C0044       P101      S90        3     199000
## 6 S005018 2025-04-11       C0248       P102      S90        1     499000
##   discount_pct total_amount channel payment_method year quarter month line_cost
## 1            0       499000  Online     Debit Card 2025       2     6    300000
## 2           10       449100 Offline  Transfer Bank 2024       4    10    300000
## 3            0       499000 Offline           Cash 2025       3     9    300000
## 4            0       499000  Online  Transfer Bank 2024       3     8    300000
## 5            0       597000 Offline     Debit Card 2025       2     4    360000
## 6           30       349300 Offline    Credit Card 2025       2     4    300000
##   gross_profit
## 1       199000
## 2       149100
## 3       199000
## 4       199000
## 5       237000
## 6        49300

Validasi Data

cat("Jumlah customer aktif:", nrow(dim_customer), "\n")
## Jumlah customer aktif: 228
cat("Jumlah produk Fashion aktif:", nrow(dim_product), "\n")
## Jumlah produk Fashion aktif: 2
cat("Jumlah transaksi Fashion:", nrow(fact_sales), "\n")
## Jumlah transaksi Fashion: 586
sum(is.na(fact_sales$total_amount))
## [1] 0
sum(fact_sales$quantity <= 0)
## [1] 0
sum(duplicated(fact_sales$sale_id))
## [1] 0

#Filter data Fashion

fact_sales <- fact_sales %>%
  filter(product_id %in% dim_product$product_id)

nrow(fact_sales)
## [1] 586

#Transformasi

fact_sales <- fact_sales %>%
  mutate(
    sale_date = as.Date(sale_date),
    year = year(sale_date),
    month = month(sale_date)
  )

head(fact_sales)
##   sale_id  sale_date customer_id product_id store_id quantity unit_price
## 1 S005001 2025-06-12       C0068       P102      S90        1     499000
## 2 S005005 2024-10-31       C0065       P102      S90        1     499000
## 3 S005006 2025-09-10       C0114       P102      S90        1     499000
## 4 S005009 2024-08-01       C0068       P102      S90        1     499000
## 5 S005016 2025-04-12       C0044       P101      S90        3     199000
## 6 S005018 2025-04-11       C0248       P102      S90        1     499000
##   discount_pct total_amount channel payment_method year quarter month line_cost
## 1            0       499000  Online     Debit Card 2025       2     6    300000
## 2           10       449100 Offline  Transfer Bank 2024       4    10    300000
## 3            0       499000 Offline           Cash 2025       3     9    300000
## 4            0       499000  Online  Transfer Bank 2024       3     8    300000
## 5            0       597000 Offline     Debit Card 2025       2     4    360000
## 6           30       349300 Offline    Credit Card 2025       2     4    300000
##   gross_profit
## 1       199000
## 2       149100
## 3       199000
## 4       199000
## 5       237000
## 6        49300

#Join untuk analisis

data_fashion <- fact_sales %>%
  left_join(dim_product, by = "product_id") %>%
  left_join(dim_customer, by = "customer_id")

head(data_fashion)
##   sale_id  sale_date customer_id product_id store_id quantity unit_price.x
## 1 S005001 2025-06-12       C0068       P102      S90        1       499000
## 2 S005005 2024-10-31       C0065       P102      S90        1       499000
## 3 S005006 2025-09-10       C0114       P102      S90        1       499000
## 4 S005009 2024-08-01       C0068       P102      S90        1       499000
## 5 S005016 2025-04-12       C0044       P101      S90        3       199000
## 6 S005018 2025-04-11       C0248       P102      S90        1       499000
##   discount_pct total_amount channel payment_method year quarter month line_cost
## 1            0       499000  Online     Debit Card 2025       2     6    300000
## 2           10       449100 Offline  Transfer Bank 2024       4    10    300000
## 3            0       499000 Offline           Cash 2025       3     9    300000
## 4            0       499000  Online  Transfer Bank 2024       3     8    300000
## 5            0       597000 Offline     Debit Card 2025       2     4    360000
## 6           30       349300 Offline    Credit Card 2025       2     4    300000
##   gross_profit      product_name brand_name brand_country   sub_category
## 1       199000  Dress H&M Summer        H&M        Swedia Pakaian Wanita
## 2       149100  Dress H&M Summer        H&M        Swedia Pakaian Wanita
## 3       199000  Dress H&M Summer        H&M        Swedia Pakaian Wanita
## 4       199000  Dress H&M Summer        H&M        Swedia Pakaian Wanita
## 5       237000 Kaos Uniqlo Basic     Uniqlo        Jepang   Pakaian Pria
## 6        49300  Dress H&M Summer        H&M        Swedia Pakaian Wanita
##   category department unit_price.y unit_cost launch_date unit_margin margin_pct
## 1  Fashion     Retail       499000    300000  2023-03-01      199000      39.88
## 2  Fashion     Retail       499000    300000  2023-03-01      199000      39.88
## 3  Fashion     Retail       499000    300000  2023-03-01      199000      39.88
## 4  Fashion     Retail       499000    300000  2023-03-01      199000      39.88
## 5  Fashion     Retail       199000    120000  2023-01-01       79000      39.70
## 6  Fashion     Retail       499000    300000  2023-03-01      199000      39.88
##   price_tier   customer_name gender        city           province     region
## 1        Low  Kirana Santoso      P  Balikpapan   Kalimantan Timur Kalimantan
## 2        Low Kirana Maharani      P Banjarmasin Kalimantan Selatan Kalimantan
## 3        Low   Hanif Wardana      L  Yogyakarta      DI Yogyakarta       Jawa
## 4        Low  Kirana Santoso      P  Balikpapan   Kalimantan Timur Kalimantan
## 5        Low    Dewi Nugroho      L    Surabaya         Jawa Timur       Jawa
## 6        Low Kirana Setiawan      P       Medan     Sumatera Utara   Sumatera
##    segment  join_date
## 1 Platinum 2023-12-08
## 2 Platinum 2022-04-22
## 3   Silver 2022-01-02
## 4 Platinum 2023-12-08
## 5   Silver 2022-12-31
## 6   Silver 2021-05-18

#Eksplorasi sederhana

data_fashion %>%
  group_by(sub_category) %>%
  summarise(
    total_sales = sum(total_amount, na.rm = TRUE),
    total_qty = sum(quantity, na.rm = TRUE)
  ) %>%
  arrange(desc(total_sales))
## # A tibble: 2 × 3
##   sub_category   total_sales total_qty
##   <chr>                <dbl>     <int>
## 1 Pakaian Wanita   192963300       428
## 2 Pakaian Pria      67242100       374