Tujuan: Mengambil dan menggabungkan data dari berbagai sumber menggunakan Python
import pandas as pd
import numpy as np
import json
import xml.etree.ElementTree as ET
import os
print('Semua library berhasil diimport')## Semua library berhasil diimport
# ============================================================
# SECTION A — TUGAS 1, 2, 3: Baca file, looping, cek struktur
# ============================================================
def read_file(filepath):
ext = os.path.splitext(filepath)[1].lower()
if ext == '.csv':
return pd.read_csv(filepath)
elif ext == '.xlsx':
return pd.read_excel(filepath)
elif ext == '.json':
with open(filepath, 'r') as f:
data = json.load(f)
return pd.DataFrame(data)
elif ext == '.txt':
return pd.read_csv(filepath, sep='|')
elif ext == '.xml':
tree = ET.parse(filepath)
root = tree.getroot()
records = []
for record in root:
row = {child.tag: child.text for child in record}
records.append(row)
return pd.DataFrame(records)
else:
raise ValueError(f'Format file tidak dikenal: {ext}')
file_list = ['ecommerce.csv', 'ecommerce.xlsx', 'ecommerce.json', 'ecommerce.txt', 'ecommerce.xml']
reference_columns = None
dataframes = {}
summary_list = []
for filename in file_list:
df = read_file(filename)
dataframes[filename] = df
if reference_columns is None:
reference_columns = list(df.columns)
status = 'Referensi Kolom'
else:
if list(df.columns) == reference_columns:
status = 'Ready to Merge'
else:
status = 'Need Adjustment'
summary_list.append({
'File': filename,
'Jumlah Baris': df.shape[0],
'Jumlah Kolom': df.shape[1],
'Status': status
})
summary_df = pd.DataFrame(summary_list)Ringkasan pembacaan 5 file dataset:
kable(py$summary_df, align = "c") |>
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "bordered"),
full_width = TRUE) |>
row_spec(0, bold = TRUE, background = "#2c3e50", color = "white")| File | Jumlah Baris | Jumlah Kolom | Status |
|---|---|---|---|
| ecommerce.csv | 2000 | 22 | Referensi Kolom |
| ecommerce.xlsx | 2000 | 22 | Ready to Merge |
| ecommerce.json | 2000 | 22 | Ready to Merge |
| ecommerce.txt | 2000 | 22 | Ready to Merge |
| ecommerce.xml | 2000 | 22 | Ready to Merge |
# ============================================================
# SECTION A — TUGAS 4: Gabungkan semua data
# ============================================================
df_combined = pd.concat(list(dataframes.values()), ignore_index=True)
gabung_info = pd.DataFrame({
'Keterangan': ['Total Baris', 'Total Kolom'],
'Nilai': [df_combined.shape[0], df_combined.shape[1]]
})Hasil penggabungan dataset:
kable(py$gabung_info, align = "lc") |>
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE) |>
row_spec(0, bold = TRUE, background = "#2c3e50", color = "white")| Keterangan | Nilai |
|---|---|
| Total Baris | 10000 |
| Total Kolom | 22 |
Preview 5 baris pertama dataset gabungan:
kable(head(py$df_combined, 5)) |>
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "bordered"),
full_width = TRUE, font_size = 12) |>
row_spec(0, bold = TRUE, background = "#2c3e50", color = "white") |>
scroll_box(width = "100%")| order_id | order_date | ship_date | platform | category | product_name | unit_price | quantity | gross_sales | campaign | voucher_code | discount_pct | discount_value | shipping_cost | net_sales | payment_method | customer_segment | region | stock_status | order_status | customer_rating | priority_flag |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| ORD00612 | 2024-04-19 | 2024/04/24 | Tokopedia | home living | Table Lamp | 188905 | 4 | 755620 | Flash Sale | DISC10 | 10 | 75562 | 12000 | 680058 | E-Wallet | VIP | Bekasi | Preorder | completed | 5 | Y |
| ORD00112 | 2024/01/24 | 29/01/2024 | TikTok Shop | Electronics | Power Bank | 1476873 | 1 | 1476873 | Normal Day | NONE | 0 | 0 | 18000 | 1476873 | cod | Returning | Makassar | In Stock | completed | 5 | N |
| ORD01186 | 2024-06-12 | 06-19-2024 | Tokopedia | Fashion | Women Dress | 231072 | 2 | 462144 | Mega Campaign | DISC20 | 20 | 92429 | 15000 | 369715 | Virtual Account | Returning | Surabaya | In Stock | delivered | 3 | Yes |
| ORD01511 | 2024/08/07 | 08-14-2024 | Tokopedia | home living | Vacuum Cleaner | 512063 | 3 | 1536189 | Flash Sale | DISC10 | 10 | 153619 | 0 | 1382570 | Transfer Bank | New | Yogyakarta | In Stock | DELIVERED | 4 | No |
| ORD00772 | 2024-12-08 | NA | Tokopedia | Beauty | Body Lotion | 221586 | 2 | 443172 | Payday Sale | DISC15 | 15 | Rp 66.476 | 0 | 376696 | COD | Returning | Surabaya | In Stock | DELIVERED | 5 | normal |
Tujuan: Memahami kondisi dataset hasil penggabungan
# ============================================================
# SECTION B — TUGAS 1: Info dataset
# ============================================================
dtypes_df = pd.DataFrame({
'Kolom': df_combined.dtypes.index,
'Tipe Data': df_combined.dtypes.values.astype(str)
}).reset_index(drop=True)
info_df = pd.DataFrame({
'Keterangan': ['Jumlah Total Baris', 'Jumlah Total Kolom'],
'Nilai': [df_combined.shape[0], df_combined.shape[1]]
})Informasi umum dataset:
kable(py$info_df, align = "lc") |>
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE) |>
row_spec(0, bold = TRUE, background = "#2c3e50", color = "white")| Keterangan | Nilai |
|---|---|
| Jumlah Total Baris | 10000 |
| Jumlah Total Kolom | 22 |
Tipe data setiap kolom:
kable(py$dtypes_df, align = "lc") |>
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "bordered"),
full_width = FALSE) |>
row_spec(0, bold = TRUE, background = "#2c3e50", color = "white")| Kolom | Tipe Data |
|---|---|
| order_id | str |
| order_date | str |
| ship_date | str |
| platform | str |
| category | str |
| product_name | str |
| unit_price | object |
| quantity | object |
| gross_sales | object |
| campaign | str |
| voucher_code | str |
| discount_pct | object |
| discount_value | object |
| shipping_cost | object |
| net_sales | object |
| payment_method | str |
| customer_segment | str |
| region | str |
| stock_status | str |
| order_status | str |
| customer_rating | object |
| priority_flag | str |
# ============================================================
# SECTION B — TUGAS 2: Missing Values & Duplicates
# ============================================================
missing = df_combined.isnull().sum()
missing_pct = (missing / len(df_combined) * 100).round(2)
missing_df = pd.DataFrame({
'Kolom': missing.index,
'Jumlah Missing': missing.values,
'Persentase (%)': missing_pct.values
}).reset_index(drop=True)
missing_df = missing_df[missing_df['Jumlah Missing'] > 0]
dup_df = pd.DataFrame({
'Keterangan': ['Jumlah Baris Duplikat'],
'Nilai': [int(df_combined.duplicated().sum())]
})Missing values per kolom:
kable(py$missing_df, align = "lcc") |>
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "bordered"),
full_width = FALSE) |>
row_spec(0, bold = TRUE, background = "#e74c3c", color = "white")| Kolom | Jumlah Missing | Persentase (%) | |
|---|---|---|---|
| 2 | ship_date | 851 | 8.51 |
| 10 | voucher_code | 245 | 2.45 |
| 11 | discount_pct | 345 | 3.45 |
| 15 | payment_method | 175 | 1.75 |
| 20 | customer_rating | 2020 | 20.20 |
| 21 | priority_flag | 940 | 9.40 |
Duplikasi data:
kable(py$dup_df, align = "lc") |>
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE) |>
row_spec(0, bold = TRUE, background = "#2c3e50", color = "white")| Keterangan | Nilai |
|---|---|
| Jumlah Baris Duplikat | 3665 |
# ============================================================
# SECTION B — TUGAS 3: Masalah Kualitas Data
# ============================================================
kualitas_df = pd.DataFrame({
'No': [1, 2, 3, 4, 5],
'Masalah': [
'Inkonsistensi Penulisan Platform',
'Inkonsistensi Penulisan Order Status',
'Missing Values di Kolom Penting',
'Tipe Data Tidak Konsisten',
'Duplikasi Data'
],
'Contoh / Keterangan': [
'"shopee", "SHOPEE", " shopee ", "Shopee" -> entitas sama',
'"delivered", "DELIVERED", "Batal", "CANCEL" -> perlu satu standar',
'customer_rating, payment_method, ship_date banyak kosong',
'net_sales bertipe object; format tanggal tidak seragam',
'Data dari 5 file berbeda berpotensi mengandung baris yang sama'
]
})Daftar masalah kualitas data:
kable(py$kualitas_df, align = "clll") |>
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "bordered"),
full_width = TRUE) |>
row_spec(0, bold = TRUE, background = "#e67e22", color = "white")| No | Masalah | Contoh / Keterangan |
|---|---|---|
| 1 | Inkonsistensi Penulisan Platform | “shopee”, “SHOPEE”, ” shopee “,”Shopee” -> entitas sama |
| 2 | Inkonsistensi Penulisan Order Status | “delivered”, “DELIVERED”, “Batal”, “CANCEL” -> perlu satu standar |
| 3 | Missing Values di Kolom Penting | customer_rating, payment_method, ship_date banyak kosong |
| 4 | Tipe Data Tidak Konsisten | net_sales bertipe object; format tanggal tidak seragam |
| 5 | Duplikasi Data | Data dari 5 file berbeda berpotensi mengandung baris yang sama |
Tujuan: Membersihkan data menggunakan logika programming
# ============================================================
# SECTION C — 1. Standardisasi Platform
# ============================================================
platform_before = df_clean['platform'].value_counts().reset_index()
platform_before.columns = ['Platform', 'Jumlah (Sebelum)']
def standardize_platform(val):
if pd.isna(val):
return 'Unknown'
val_clean = str(val).strip().lower()
if 'shopee' in val_clean:
return 'Shopee'
elif 'tokopedia' in val_clean or 'tokped' in val_clean:
return 'Tokopedia'
elif 'tiktok' in val_clean:
return 'TikTok Shop'
elif 'lazada' in val_clean:
return 'Lazada'
elif 'blibli' in val_clean:
return 'Blibli'
else:
return val.strip().title()
df_clean['platform'] = df_clean['platform'].apply(standardize_platform)
platform_after = df_clean['platform'].value_counts().reset_index()
platform_after.columns = ['Platform', 'Jumlah (Setelah)']Platform — Sebelum standardisasi:
kable(py$platform_before, align = "lc") |>
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE) |>
row_spec(0, bold = TRUE, background = "#8e44ad", color = "white")| Platform | Jumlah (Sebelum) |
|---|---|
| Shopee | 1895 |
| TikTok Shop | 1840 |
| Blibli | 1820 |
| Tokopedia | 1765 |
| Lazada | 1760 |
| blibli | 95 |
| TOKOPEDIA | 95 |
| tiktok shop | 85 |
| SHOPEE | 75 |
| lazada | 70 |
| shopee | 65 |
| lazada | 60 |
| blibli | 55 |
| LAZADA | 50 |
| BLIBLI | 50 |
| shopee | 45 |
| tokopedia | 40 |
| tiktok shop | 40 |
| Tiktok Shop | 35 |
| TIKTOK SHOP | 30 |
| tokopedia | 30 |
Platform — Setelah standardisasi:
kable(py$platform_after, align = "lc") |>
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE) |>
row_spec(0, bold = TRUE, background = "#27ae60", color = "white")| Platform | Jumlah (Setelah) |
|---|---|
| Shopee | 2080 |
| TikTok Shop | 2030 |
| Blibli | 2020 |
| Lazada | 1940 |
| Tokopedia | 1930 |
# ============================================================
# SECTION C — 2. Cleaning net_sales
# ============================================================
def clean_sales(val):
if pd.isna(val):
return 0
val_str = str(val).strip()
if val_str.startswith('Rp'):
val_str = val_str.replace('Rp', '').replace('.', '').replace(',', '').strip()
val_str = ''.join(c for c in val_str if c.isdigit() or c == '-')
try:
val_num = int(val_str)
except:
return 0
if val_num < 0:
return 0
return val_num
df_clean['net_sales'] = df_clean['net_sales'].apply(clean_sales)
desc = df_clean['net_sales'].describe().reset_index()
desc.columns = ['Statistik', 'Nilai']
desc['Nilai'] = desc['Nilai'].round(2)
netsales_info = pd.DataFrame({
'Keterangan': ['Tipe Data Setelah Cleaning', 'Jumlah Nilai Negatif'],
'Nilai': [str(df_clean['net_sales'].dtype), str(int((df_clean['net_sales'] < 0).sum()))]
})Info kolom net_sales setelah cleaning:
kable(py$netsales_info, align = "lc") |>
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE) |>
row_spec(0, bold = TRUE, background = "#2c3e50", color = "white")| Keterangan | Nilai |
|---|---|
| Tipe Data Setelah Cleaning | int64 |
| Jumlah Nilai Negatif | 0 |
Statistik deskriptif net_sales:
kable(py$desc, align = "lc") |>
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE) |>
row_spec(0, bold = TRUE, background = "#2c3e50", color = "white")| Statistik | Nilai |
|---|---|
| count | 10000 |
| mean | 1377342 |
| std | 2058400 |
| min | 0 |
| 25% | 247700 |
| 50% | 708293 |
| 75% | 1653688 |
| max | 18080224 |
# ============================================================
# SECTION C — 3. Handling Missing Values
# ============================================================
df_clean['payment_method'] = df_clean['payment_method'].apply(
lambda x: 'Unknown' if pd.isna(x) or str(x).strip() == '' else str(x).strip()
)
df_clean['customer_rating'] = pd.to_numeric(df_clean['customer_rating'], errors='coerce')
median_rating = df_clean['customer_rating'].median()
df_clean['customer_rating'] = df_clean['customer_rating'].fillna(median_rating)
mv_result = pd.DataFrame({
'Kolom': ['payment_method', 'customer_rating'],
'Missing Setelah': [
int(df_clean['payment_method'].isnull().sum()),
int(df_clean['customer_rating'].isnull().sum())
],
'Metode Imputasi': ['Diisi "Unknown"', f'Diisi median = {median_rating}']
})Hasil handling missing values:
kable(py$mv_result, align = "lcc") |>
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE) |>
row_spec(0, bold = TRUE, background = "#27ae60", color = "white")| Kolom | Missing Setelah | Metode Imputasi |
|---|---|---|
| payment_method | 0 | Diisi “Unknown” |
| customer_rating | 0 | Diisi median = 5.0 |
# ============================================================
# SECTION C — 4. Standardisasi order_status
# ============================================================
status_before = df_clean['order_status'].value_counts().reset_index()
status_before.columns = ['Order Status', 'Jumlah (Sebelum)']
def standardize_status(val):
if pd.isna(val):
return 'Unknown'
val_clean = str(val).strip().lower()
if val_clean in ['delivered', 'completed', 'complete']:
return 'Completed'
elif val_clean in ['cancelled', 'cancel', 'batal']:
return 'Cancelled'
elif val_clean in ['shipped', 'on delivery', 'on_delivery']:
return 'Shipped'
elif val_clean in ['returned', 'retur', 'return']:
return 'Returned'
else:
return val.strip().title()
df_clean['order_status'] = df_clean['order_status'].apply(standardize_status)
status_after = df_clean['order_status'].value_counts().reset_index()
status_after.columns = ['Order Status', 'Jumlah (Setelah)']Order Status — Sebelum standardisasi:
kable(py$status_before, align = "lc") |>
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE) |>
row_spec(0, bold = TRUE, background = "#8e44ad", color = "white")| Order Status | Jumlah (Sebelum) |
|---|---|
| DELIVERED | 1980 |
| Delivered | 1940 |
| completed | 1875 |
| delivered | 1855 |
| Shipped | 265 |
| shipped | 235 |
| Cancelled | 230 |
| On Delivery | 220 |
| RETUR | 205 |
| returned | 195 |
| CANCEL | 180 |
| cancelled | 170 |
| Returned | 165 |
| Batal | 130 |
| delivered | 125 |
| completed | 70 |
| COMPLETED | 55 |
| batal | 30 |
| returned | 15 |
| cancelled | 10 |
| on delivery | 10 |
| ON DELIVERY | 10 |
| retur | 10 |
| CANCELLED | 5 |
| BATAL | 5 |
| SHIPPED | 5 |
| shipped | 5 |
Order Status — Setelah standardisasi:
kable(py$status_after, align = "lc") |>
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE) |>
row_spec(0, bold = TRUE, background = "#27ae60", color = "white")| Order Status | Jumlah (Setelah) |
|---|---|
| Completed | 7900 |
| Cancelled | 760 |
| Shipped | 750 |
| Returned | 590 |
# ============================================================
# SECTION C — 5. Looping: Cleaning 3 kolom teks + hapus duplikat
# ============================================================
text_columns = ['category', 'product_name', 'region']
loop_summary = []
for col in text_columns:
sebelum = df_clean[col].nunique()
df_clean[col] = df_clean[col].apply(
lambda x: str(x).strip().title() if pd.notna(x) else x
)
sesudah = df_clean[col].nunique()
loop_summary.append({'Kolom': col, 'Unique Sebelum': sebelum, 'Unique Setelah': sesudah})
n_before = len(df_clean)
df_clean = df_clean.drop_duplicates()
n_removed = n_before - len(df_clean)
loop_df = pd.DataFrame(loop_summary)
dup_removed_df = pd.DataFrame({
'Keterangan': ['Baris Sebelum', 'Duplikat Dihapus', 'Baris Setelah'],
'Nilai': [n_before, n_removed, len(df_clean)]
})Hasil cleaning kolom teks (looping):
kable(py$loop_df, align = "lcc") |>
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE) |>
row_spec(0, bold = TRUE, background = "#2c3e50", color = "white")| Kolom | Unique Sebelum | Unique Setelah |
|---|---|---|
| category | 30 | 6 |
| product_name | 49 | 25 |
| region | 8 | 8 |
Hasil penghapusan duplikat:
kable(py$dup_removed_df, align = "lc") |>
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE) |>
row_spec(0, bold = TRUE, background = "#2c3e50", color = "white")| Keterangan | Nilai |
|---|---|
| Baris Sebelum | 10000 |
| Duplikat Dihapus | 3734 |
| Baris Setelah | 6266 |
Tujuan: Menerapkan logika bisnis menggunakan if / if-else
# ============================================================
# SECTION D — 1. Kolom is_high_value
# ============================================================
df_clean['net_sales'] = pd.to_numeric(df_clean['net_sales'], errors='coerce').fillna(0)
def is_high_value(net_sales):
if net_sales > 1_000_000:
return 'Yes'
else:
return 'No'
df_clean['is_high_value'] = df_clean['net_sales'].apply(is_high_value)
hv_df = df_clean['is_high_value'].value_counts().reset_index()
hv_df.columns = ['is_high_value', 'Jumlah']Distribusi kolom is_high_value:
kable(py$hv_df, align = "lc") |>
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE) |>
row_spec(0, bold = TRUE, background = "#2c3e50", color = "white")| is_high_value | Jumlah |
|---|---|
| No | 3889 |
| Yes | 2377 |
# ============================================================
# SECTION D — 2. Kolom order_priority (nested IF)
# ============================================================
def order_priority(net_sales):
if net_sales > 1_000_000:
return 'High'
else:
if net_sales >= 500_000:
return 'Medium'
else:
return 'Low'
df_clean['order_priority'] = df_clean['net_sales'].apply(order_priority)
op_df = df_clean['order_priority'].value_counts().reset_index()
op_df.columns = ['order_priority', 'Jumlah']Distribusi kolom order_priority:
kable(py$op_df, align = "lc") |>
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE) |>
row_spec(0, bold = TRUE, background = "#2c3e50", color = "white")| order_priority | Jumlah |
|---|---|
| Low | 2607 |
| High | 2377 |
| Medium | 1282 |
# ============================================================
# SECTION D — 3. Kolom valid_transaction
# ============================================================
def valid_transaction(order_status):
if order_status == 'Cancelled':
return 'Invalid'
else:
return 'Valid'
df_clean['valid_transaction'] = df_clean['order_status'].apply(valid_transaction)
vt_df = df_clean['valid_transaction'].value_counts().reset_index()
vt_df.columns = ['valid_transaction', 'Jumlah']
preview_df = df_clean[['order_id', 'platform', 'net_sales', 'order_status',
'is_high_value', 'order_priority', 'valid_transaction']].head(5).reset_index(drop=True)Distribusi kolom valid_transaction:
kable(py$vt_df, align = "lc") |>
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE) |>
row_spec(0, bold = TRUE, background = "#2c3e50", color = "white")| valid_transaction | Jumlah |
|---|---|
| Valid | 5666 |
| Invalid | 600 |
Preview dataset final (5 baris pertama):
kable(py$preview_df) |>
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "bordered"),
full_width = TRUE) |>
row_spec(0, bold = TRUE, background = "#2c3e50", color = "white") |>
scroll_box(width = "100%")| order_id | platform | net_sales | order_status | is_high_value | order_priority | valid_transaction |
|---|---|---|---|---|---|---|
| ORD00612 | Tokopedia | 680058 | Completed | No | Medium | Valid |
| ORD00112 | TikTok Shop | 1476873 | Completed | Yes | High | Valid |
| ORD01186 | Tokopedia | 369715 | Completed | No | Low | Valid |
| ORD01511 | Tokopedia | 1382570 | Completed | Yes | High | Valid |
| ORD00772 | Tokopedia | 376696 | Completed | No | Low | Valid |
# Simpan dataset final ke CSV
df_clean.to_csv('ecommerce_cleaned.csv', index=False)
save_info = pd.DataFrame({
'Keterangan': ['File Output', 'Total Baris', 'Total Kolom'],
'Nilai': ['ecommerce_cleaned.csv', str(df_clean.shape[0]), str(df_clean.shape[1])]
})kable(py$save_info, align = "lc") |>
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE) |>
row_spec(0, bold = TRUE, background = "#27ae60", color = "white")| Keterangan | Nilai |
|---|---|
| File Output | ecommerce_cleaned.csv |
| Total Baris | 6266 |
| Total Kolom | 25 |
Tujuan: Menarik insight sederhana dari data
# ============================================================
# SECTION E — Analytical Thinking
# ============================================================
platform_vc = df_clean['platform'].value_counts().reset_index()
platform_vc.columns = ['Platform', 'Jumlah Transaksi']
category_vc = df_clean['category'].value_counts().reset_index()
category_vc.columns = ['Category', 'Jumlah Transaksi']
status_vc = df_clean['order_status'].value_counts().reset_index()
status_vc.columns = ['Order Status', 'Jumlah Transaksi']1. Distribusi transaksi per platform:
kable(py$platform_vc, align = "lc") |>
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE) |>
row_spec(0, bold = TRUE, background = "#2980b9", color = "white") |>
row_spec(1, bold = TRUE, background = "#d5e8f5")| Platform | Jumlah Transaksi |
|---|---|
| Shopee | 1309 |
| Blibli | 1269 |
| TikTok Shop | 1265 |
| Tokopedia | 1212 |
| Lazada | 1211 |
2. Distribusi transaksi per kategori:
kable(py$category_vc, align = "lc") |>
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE) |>
row_spec(0, bold = TRUE, background = "#2980b9", color = "white") |>
row_spec(1, bold = TRUE, background = "#d5e8f5")| Category | Jumlah Transaksi |
|---|---|
| Fashion | 1302 |
| Sports | 1297 |
| Beauty | 1230 |
| Home Living | 1199 |
| Electronics | 1142 |
| Home_Living | 96 |
3. Distribusi transaksi per status order:
kable(py$status_vc, align = "lc") |>
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE) |>
row_spec(0, bold = TRUE, background = "#2980b9", color = "white") |>
row_spec(1, bold = TRUE, background = "#d5e8f5")| Order Status | Jumlah Transaksi |
|---|---|
| Completed | 4858 |
| Cancelled | 600 |
| Shipped | 452 |
| Returned | 356 |