membaca dataset retail online dari Excel,
library(readxl)
online_retail <- read_xlsx("online_retail.xlsx")
head(online_retail)
## # A tibble: 6 × 8
## InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice
## <chr> <chr> <chr> <dbl> <dttm> <dbl>
## 1 536365 85123A WHITE HANGING HEAR… 6 2010-12-01 08:26:00 2.55
## 2 536365 71053 WHITE METAL LANTERN 6 2010-12-01 08:26:00 3.39
## 3 536365 84406B CREAM CUPID HEARTS… 8 2010-12-01 08:26:00 2.75
## 4 536365 84029G KNITTED UNION FLAG… 6 2010-12-01 08:26:00 3.39
## 5 536365 84029E RED WOOLLY HOTTIE … 6 2010-12-01 08:26:00 3.39
## 6 536365 22752 SET 7 BABUSHKA NES… 2 2010-12-01 08:26:00 7.65
## # ℹ 2 more variables: CustomerID <dbl>, Country <chr>
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
memuat data Excel dan cek berapa banyak baris awalnya.
data_raw <- read_xlsx("online_retail.xlsx")
cat("Data berhasil dimuat. Jumlah baris awal:", nrow(data_raw), "\n")
## Data berhasil dimuat. Jumlah baris awal: 541909
membersihkan dataset agar siap dianalisis dengan data yang lebih valid dan konsisten.
online_retail_clean <- data_raw %>%
mutate(TotalPrice = Quantity * UnitPrice) %>%
filter(!is.na(CustomerID)) %>%
filter(!grepl("^C", InvoiceNo)) %>%
filter(Quantity > 0, UnitPrice > 0) %>%
filter(TotalPrice < 1000)
cat("Data setelah cleaning dan engineering. Jumlah baris akhir:", nrow(online_retail_clean), "\n")
## Data setelah cleaning dan engineering. Jumlah baris akhir: 397568
mempersiapkan data untuk analisis hubungan antara jumlah barang dan total harga.
online_retail_clean <- online_retail_clean %>%
mutate(TotalPrice = Quantity * UnitPrice)
data_model <- online_retail_clean %>%
select(X = Quantity, Y = TotalPrice)
cat("\nVariabel X (Independen): Quantity\n")
##
## Variabel X (Independen): Quantity
cat("Variabel Y (Dependen): TotalPrice\n")
## Variabel Y (Dependen): TotalPrice
membuat model regresi linear sederhana dan menampilkan ringkasannya untuk evaluasi.
model_regresi <- lm(Y ~ X, data = data_model)
cat("\n--- RINGKASAN MODEL REGRESI LINEAR ---\n")
##
## --- RINGKASAN MODEL REGRESI LINEAR ---
print(summary(model_regresi))
##
## Call:
## lm(formula = Y ~ X, data = data_model)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2225.85 -10.08 -5.13 2.10 896.34
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.064902 0.057192 193.5 <2e-16 ***
## X 0.755162 0.001566 482.2 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 34.02 on 397566 degrees of freedom
## Multiple R-squared: 0.369, Adjusted R-squared: 0.369
## F-statistic: 2.325e+05 on 1 and 397566 DF, p-value: < 2.2e-16
menunjukkan hubungan antara jumlah barang dan total harga dalam bentuk grafik regresi linear.
plot_regresi <- ggplot(data_model, aes(x = X, y = Y)) +
geom_point(alpha = 0.3, color = "#007BFF") +
geom_smooth(method = "lm", col = "#DC3545", se = TRUE) +
labs(title = "Regresi Linear: Total Harga vs. Kuantitas Barang",
x = "Kuantitas Barang (Quantity) - Variabel X",
y = "Total Harga (TotalPrice) - Variabel Y",
caption = "Data dibatasi TotalPrice < 1000 GBP untuk visualisasi") +
theme_minimal() +
theme(plot.title = element_text(face = "bold"))
print(plot_regresi)
## `geom_smooth()` using formula = 'y ~ x'
mengekstrak hasil model regresi dan menuliskannya sebagai persamaan
matematis yang jelas.
koefisien <- coef(model_regresi)
cat("\nPersamaan Regresi (Y = intercept + slope * X):\n")
##
## Persamaan Regresi (Y = intercept + slope * X):
cat(sprintf("TotalPrice = %.2f + %.2f * Quantity\n", koefisien[1], koefisien[2]))
## TotalPrice = 11.06 + 0.76 * Quantity
menghapus data pelanggan yang tidak lengkap (CustomerID kosong) dan melaporkan hasilnya.
online_retail_clean <- data_raw %>%
filter(!is.na(CustomerID))
cat(" - Aksi 1: Hapus NA di CustomerID. Baris terhapus:", nrow(data_raw) - nrow(online_retail_clean), "\n")
## - Aksi 1: Hapus NA di CustomerID. Baris terhapus: 135080
cat(" - Sisa baris setelah Aksi 1:", nrow(online_retail_clean), "\n\n")
## - Sisa baris setelah Aksi 1: 406829
memberi informasi tentang tipe data hasil perbaikan struktur dataset.
cat("3. Perbaikan Struktur Data (Tipe Data):\n")
## 3. Perbaikan Struktur Data (Tipe Data):
cat(" - InvoiceDate diubah menjadi Tipe Data: ", class(online_retail_clean$InvoiceDate), "\n")
## - InvoiceDate diubah menjadi Tipe Data: POSIXct POSIXt
cat(" - CustomerID diubah menjadi Tipe Data: ", class(online_retail_clean$CustomerID), "\n\n")
## - CustomerID diubah menjadi Tipe Data: numeric
menjaga kualitas data dengan hanya menyimpan transaksi yang benar-benar sahih.
online_retail_clean <- online_retail_clean %>%
filter(Quantity > 0, UnitPrice > 0)
cat(" - Aksi 4: Hapus Quantity <= 0 dan UnitPrice <= 0. Sisa baris:", nrow(online_retail_clean), "\n\n")
## - Aksi 4: Hapus Quantity <= 0 dan UnitPrice <= 0. Sisa baris: 397884
memberikan ringkasan hasil proses cleaning dan memastikan dataset akhir siap digunakan untuk analisis.
online_retail_final <- online_retail_clean %>%
mutate(TotalPrice = Quantity * UnitPrice)
cat("5. Ringkasan Proses Cleaning:\n")
## 5. Ringkasan Proses Cleaning:
cat(" - Total baris awal:", nrow(data_raw), "\n")
## - Total baris awal: 541909
cat(" - Total baris akhir:", nrow(online_retail_final), "\n")
## - Total baris akhir: 397884
cat(" - Struktur Data Akhir:\n")
## - Struktur Data Akhir:
print(glimpse(online_retail))
## Rows: 541,909
## Columns: 8
## $ InvoiceNo <chr> "536365", "536365", "536365", "536365", "536365", "536365"…
## $ StockCode <chr> "85123A", "71053", "84406B", "84029G", "84029E", "22752", …
## $ Description <chr> "WHITE HANGING HEART T-LIGHT HOLDER", "WHITE METAL LANTERN…
## $ Quantity <dbl> 6, 6, 8, 6, 6, 2, 6, 6, 6, 32, 6, 6, 8, 6, 6, 3, 2, 3, 3, …
## $ InvoiceDate <dttm> 2010-12-01 08:26:00, 2010-12-01 08:26:00, 2010-12-01 08:2…
## $ UnitPrice <dbl> 2.55, 3.39, 2.75, 3.39, 3.39, 7.65, 4.25, 1.85, 1.85, 1.69…
## $ CustomerID <dbl> 17850, 17850, 17850, 17850, 17850, 17850, 17850, 17850, 17…
## $ Country <chr> "United Kingdom", "United Kingdom", "United Kingdom", "Uni…
## # A tibble: 541,909 × 8
## InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice
## <chr> <chr> <chr> <dbl> <dttm> <dbl>
## 1 536365 85123A WHITE HANGING HEA… 6 2010-12-01 08:26:00 2.55
## 2 536365 71053 WHITE METAL LANTE… 6 2010-12-01 08:26:00 3.39
## 3 536365 84406B CREAM CUPID HEART… 8 2010-12-01 08:26:00 2.75
## 4 536365 84029G KNITTED UNION FLA… 6 2010-12-01 08:26:00 3.39
## 5 536365 84029E RED WOOLLY HOTTIE… 6 2010-12-01 08:26:00 3.39
## 6 536365 22752 SET 7 BABUSHKA NE… 2 2010-12-01 08:26:00 7.65
## 7 536365 21730 GLASS STAR FROSTE… 6 2010-12-01 08:26:00 4.25
## 8 536366 22633 HAND WARMER UNION… 6 2010-12-01 08:28:00 1.85
## 9 536366 22632 HAND WARMER RED P… 6 2010-12-01 08:28:00 1.85
## 10 536367 84879 ASSORTED COLOUR B… 32 2010-12-01 08:34:00 1.69
## # ℹ 541,899 more rows
## # ℹ 2 more variables: CustomerID <dbl>, Country <chr>
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.2
## Warning: package 'tidyr' was built under R version 4.5.2
## Warning: package 'purrr' was built under R version 4.5.2
## Warning: package 'forcats' was built under R version 4.5.2
## Warning: package 'lubridate' was built under R version 4.5.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ lubridate 1.9.4 ✔ tibble 3.3.0
## ✔ purrr 1.2.0 ✔ tidyr 1.3.1
## ✔ readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(psych)
## Warning: package 'psych' was built under R version 4.5.2
##
## Attaching package: 'psych'
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.5.2
## corrplot 0.95 loaded
data <- read_excel("online_retail.xlsx")
Overview cepat semua data dan Analisis mendalam khusus angka untuk pemodelan statistik
summary(data)
## InvoiceNo StockCode Description Quantity
## Length:541909 Length:541909 Length:541909 Min. :-80995.000
## Class :character Class :character Class :character 1st Qu.: 1.000
## Mode :character Mode :character Mode :character Median : 3.000
## Mean : 9.552
## 3rd Qu.: 10.000
## Max. : 80995.000
##
## InvoiceDate UnitPrice CustomerID
## Min. :2010-12-01 08:26:00 Min. :-11062.060 Min. :12346
## 1st Qu.:2011-03-28 11:34:00 1st Qu.: 1.250 1st Qu.:13953
## Median :2011-07-19 17:17:00 Median : 2.080 Median :15152
## Mean :2011-07-04 13:34:57 Mean : 4.611 Mean :15288
## 3rd Qu.:2011-10-19 11:27:00 3rd Qu.: 4.130 3rd Qu.:16791
## Max. :2011-12-09 12:50:00 Max. : 38970.000 Max. :18287
## NA's :135080
## Country
## Length:541909
## Class :character
## Mode :character
##
##
##
##
describe(select(data, where(is.numeric)))
## vars n mean sd median trimmed mad min
## Quantity 1 541909 9.55 218.08 3.00 5.20 2.97 -80995.00
## UnitPrice 2 541909 4.61 96.76 2.08 2.60 1.82 -11062.06
## CustomerID 3 406829 15287.69 1713.60 15152.00 15288.25 2195.73 12346.00
## max range skew kurtosis se
## Quantity 80995 161990.00 -0.26 119767.61 0.30
## UnitPrice 38970 50032.06 186.51 59004.96 0.13
## CustomerID 18287 5941.00 0.03 -1.18 2.69
memberikan statistik deskriptif untuk variabel numerik.
# Step-by-step debugging
# 1. Cek apakah dataset ada
if(exists("online_retail")) {
cat("✅ Dataset 'online_retail' ditemukan\n")
} else {
cat("❌ Dataset tidak ditemukan. Load data dulu:\n")
library(readxl)
online_retail <- read_excel("online_retail.xlsx")
}
## ✅ Dataset 'online_retail' ditemukan
# 2. Cek apakah kolom ada
cat("\nKolom dalam dataset:\n")
##
## Kolom dalam dataset:
print(names(online_retail))
## [1] "InvoiceNo" "StockCode" "Description" "Quantity" "InvoiceDate"
## [6] "UnitPrice" "CustomerID" "Country"
# 3. Cek tipe data
cat("\nTipe data masing-masing kolom:\n")
##
## Tipe data masing-masing kolom:
print(sapply(online_retail, class))
## $InvoiceNo
## [1] "character"
##
## $StockCode
## [1] "character"
##
## $Description
## [1] "character"
##
## $Quantity
## [1] "numeric"
##
## $InvoiceDate
## [1] "POSIXct" "POSIXt"
##
## $UnitPrice
## [1] "numeric"
##
## $CustomerID
## [1] "numeric"
##
## $Country
## [1] "character"
# 4. Baru jalankan describe
library(dplyr)
library(psych)
numeric_data <- online_retail %>% select(where(is.numeric))
describe(numeric_data)
## vars n mean sd median trimmed mad min
## Quantity 1 541909 9.55 218.08 3.00 5.20 2.97 -80995.00
## UnitPrice 2 541909 4.61 96.76 2.08 2.60 1.82 -11062.06
## CustomerID 3 406829 15287.69 1713.60 15152.00 15288.25 2195.73 12346.00
## max range skew kurtosis se
## Quantity 80995 161990.00 -0.26 119767.61 0.30
## UnitPrice 38970 50032.06 186.51 59004.96 0.13
## CustomerID 18287 5941.00 0.03 -1.18 2.69
Analisi penjualannya
# Load library
library(dplyr)
library(skimr)
## Warning: package 'skimr' was built under R version 4.5.2
# 1. BUAT KOLOM BARU: TotalSales di dataset online_retail
cat("=== MEMBUAT KOLOM TOTAL SALES ===\n")
## === MEMBUAT KOLOM TOTAL SALES ===
online_retail <- online_retail %>%
mutate(TotalSales = Quantity * UnitPrice)
cat("Kolom baru telah dibuat\n")
## Kolom baru telah dibuat
cat("Kolom dalam dataset:", names(online_retail), "\n\n")
## Kolom dalam dataset: InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice CustomerID Country TotalSales
# 2. RINGKASAN 3 VARIABEL UTAMA
cat("=== RINGKASAN VARIABEL TRANSAKSI ===\n")
## === RINGKASAN VARIABEL TRANSAKSI ===
skim(online_retail[, c("Quantity", "UnitPrice", "TotalSales")])
| Name | online_retail[, c(“Quanti… |
| Number of rows | 541909 |
| Number of columns | 3 |
| _______________________ | |
| Column type frequency: | |
| numeric | 3 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| Quantity | 0 | 1 | 9.55 | 218.08 | -80995.00 | 1.00 | 3.00 | 10.00 | 80995.0 | ▁▁▇▁▁ |
| UnitPrice | 0 | 1 | 4.61 | 96.76 | -11062.06 | 1.25 | 2.08 | 4.13 | 38970.0 | ▁▇▁▁▁ |
| TotalSales | 0 | 1 | 17.99 | 378.81 | -168469.60 | 3.40 | 9.75 | 17.40 | 168469.6 | ▁▁▇▁▁ |
# 3. ANALISIS LEBIH DETAIL
cat("\n=== ANALISIS DETAIL TOTAL SALES ===\n")
##
## === ANALISIS DETAIL TOTAL SALES ===
# Statistik dasar
cat("Statistik TotalSales:\n")
## Statistik TotalSales:
summary_stats <- online_retail %>%
summarise(
Rata_rata = mean(TotalSales, na.rm = TRUE),
Median = median(TotalSales, na.rm = TRUE),
Total_Penjualan = sum(TotalSales, na.rm = TRUE),
Transaksi_Tertinggi = max(TotalSales, na.rm = TRUE),
Jumlah_Transaksi = n(),
Transaksi_Negatif = sum(TotalSales < 0, na.rm = TRUE)
)
print(summary_stats)
## # A tibble: 1 × 6
## Rata_rata Median Total_Penjualan Transaksi_Tertinggi Jumlah_Transaksi
## <dbl> <dbl> <dbl> <dbl> <int>
## 1 18.0 9.75 9747748. 168470. 541909
## # ℹ 1 more variable: Transaksi_Negatif <int>
# 4. CEK DATA PROBLEMATIK
cat("\n=== CEK DATA ANOMALI ===\n")
##
## === CEK DATA ANOMALI ===
# Cek quantity atau price negatif
anomali <- online_retail %>%
filter(Quantity <= 0 | UnitPrice <= 0) %>%
select(Quantity, UnitPrice, TotalSales)
if(nrow(anomali) > 0) {
cat("Ditemukan", nrow(anomali), "transaksi anomali:\n")
print(head(anomali, 5))
if(nrow(anomali) > 5) cat("... dan", nrow(anomali) - 5, "lagi\n")
} else {
cat("Tidak ada transaksi anomali (Quantity/UnitPrice <= 0)\n")
}
## Ditemukan 11805 transaksi anomali:
## # A tibble: 5 × 3
## Quantity UnitPrice TotalSales
## <dbl> <dbl> <dbl>
## 1 -1 27.5 -27.5
## 2 -1 4.65 -4.65
## 3 -12 1.65 -19.8
## 4 -24 0.29 -6.96
## 5 -24 0.29 -6.96
## ... dan 11800 lagi
# 5. VISUALISASI CEPAT (opsional)
cat("\n=== DISTRIBUSI TOTAL SALES ===\n")
##
## === DISTRIBUSI TOTAL SALES ===
# Filter untuk visualisasi yang lebih jelas
sales_for_plot <- online_retail$TotalSales[online_retail$TotalSales > 0 &
online_retail$TotalSales < 1000]
hist(sales_for_plot,
main = "Distribusi Total Sales (< 1000 unit)",
xlab = "Total Sales",
col = "lightblue",
breaks = 30)
Membuat PETA WARNA untuk LIHAT HUBUNGAN ANGKA-ANGKA dalam data.
# Panggil library yang diperlukan
library(dplyr)
library(corrplot)
# Pilih hanya kolom numerik dari data
numeric_data <- select(data, where(is.numeric))
# Hitung matriks korelasi
cor_matrix <- cor(numeric_data, use = "complete.obs")
# Visualisasi korelasi dengan corrplot
corrplot(cor_matrix, method = "circle", type = "upper", tl.cex = 0.8)
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
ringkasan statistik (mean, median, min, max, quartil)
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example: BUAT PLOT/GRAFIK SEDERHANA
Program ini analisis statistik sederhana untuk memahami perilaku pembeli
dari data transaksi retail online.
library(ggplot2)
library(readxl)
# 1. Baca file Excel
online_retail <- read_excel("online_retail.xlsx")
# 2. Buat histogram Quantity
ggplot(online_retail, aes(x = Quantity)) +
geom_histogram(binwidth = 3, fill = "skyblue", color = "black") +
theme_minimal() +
labs(
title = "Distribusi Quantity",
x = "Quantity",
y = "Frekuensi"
)
library(tidyverse)
library(readxl)
menampilkan distribusi jumlah barang per transaksi dalam bentuk histogram berwarna dengan keterangan angka frekuensi langsung di grafik.
# Install & load packages jika belum
# install.packages("ggplot2")
# install.packages("readxl")
# install.packages("dplyr")
library(ggplot2)
library(readxl)
library(dplyr)
# 1. Baca file Excel
online_retail <- read_excel("online_retail.xlsx")
# 2. Filter data agar hanya Quantity positif
online_retail_clean <- online_retail %>%
filter(Quantity > 0)
# 3. Buat histogram Quantity dengan label frekuensi
ggplot(online_retail_clean, aes(x = Quantity)) +
geom_histogram(binwidth = 10, fill = "orange", color = "darkblue") +
geom_text(
stat = "bin",
binwidth = 10,
aes(label = ..count..),
vjust = -0.5,
color = "darkred",
size = 3
) +
theme_light(base_size = 12) +
theme(
panel.background = element_rect(fill = "lightgray"),
plot.background = element_rect(fill = "lightgray"),
panel.grid.major = element_line(color = "white"),
panel.grid.minor = element_line(color = "white")
) +
labs(
title = "Distribusi Quantity (Transaksi Positif)",
subtitle = "Setiap batang menunjukkan jumlah transaksi per interval Quantity",
x = "Quantity",
y = "Frekuensi"
)
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
menampilkan distribusi total penjualan per transaksi dalam bentuk histogram berwarna gradasi agar lebih informatif dan mudah dibaca.
# Install & load packages jika belum
# install.packages("ggplot2")
# install.packages("readxl")
# install.packages("dplyr")
library(ggplot2)
library(readxl)
library(dplyr)
# 1. Baca file Excel
online_retail <- read_excel("online_retail.xlsx")
# 2. Buat kolom TotalSales (Quantity * UnitPrice)
online_retail <- online_retail %>%
mutate(TotalSales = Quantity * UnitPrice)
# 3. Buat histogram TotalSales dengan warna gradasi
ggplot(online_retail, aes(x = TotalSales)) +
geom_histogram(aes(fill = ..count..), binwidth = 100, color = "black") +
scale_fill_gradient(low = "lightgreen", high = "darkgreen") +
theme_classic() +
labs(
title = "Distribusi Total Penjualan (TotalSales)",
x = "Total Penjualan per Transaksi",
y = "Frekuensi",
fill = "Jumlah"
)
menganalisis produk terlaris dan memvisualisasikan hasilnya dalam grafik yang informatif.
# Install & load packages jika belum
# install.packages("tidyverse")
# install.packages("readxl")
library(tidyverse)
library(readxl)
# 1. Baca file Excel
online_retail <- read_excel("online_retail.xlsx")
# 2. Buat kolom TotalSales (Quantity * UnitPrice)
online_retail <- online_retail %>%
mutate(TotalSales = Quantity * UnitPrice)
# 3. Hitung 5 produk terlaris berdasarkan StockCode
produk_terlaris <- online_retail %>%
group_by(StockCode, Description) %>% # gunakan juga nama produk agar lebih informatif
summarise(TotalSales = sum(TotalSales), .groups = "drop") %>%
arrange(desc(TotalSales)) %>%
slice_head(n = 5)
# 4. Buat grafik batang horizontal
ggplot(produk_terlaris, aes(x = reorder(Description, TotalSales), y = TotalSales)) +
geom_bar(stat = "identity", fill = "maroon") +
coord_flip() +
labs(
title = "5 Produk Terlaris Berdasarkan Penjualan",
x = "Nama Produk",
y = "Total Penjualan"
) +
theme_minimal() +
theme(axis.text.y = element_text(size = 10))
menampilkan daftar nama kolom dalam data.
colnames(online_retail)
## [1] "InvoiceNo" "StockCode" "Description" "Quantity" "InvoiceDate"
## [6] "UnitPrice" "CustomerID" "Country" "TotalSales"
menganalisis performa transaksi bulanan, membandingkan dengan target, dan memberi status pencapaian tiap bulan.
# Pastikan package yang dipakai
library(dplyr)
library(tidyr)
# 1) Buat kolom bulan (singkatan Jan, Feb, ... Dec) sebagai faktor berlevel Jan–Dec
online_retail <- online_retail %>%
mutate(
bulan_chr = format(InvoiceDate, "%b"),
bulan = factor(bulan_chr, levels = month.abb)
)
# 2) Hitung jumlah transaksi per bulan, sekaligus lengkapi bulan yang tidak muncul (isi 0)
tab <- online_retail %>%
count(bulan, name = "realisasi") %>%
complete(bulan, fill = list(realisasi = 0)) %>%
arrange(bulan)
# 3) Tambah target (10% di atas rata-rata realisasi bulanan), selisih, dan status
avg_realisasi <- mean(tab$realisasi)
target_val <- round(avg_realisasi * 1.10)
tab <- tab %>%
mutate(
target = target_val,
selisih = realisasi - target,
status = ifelse(realisasi >= target, "Tercapai", "Belum Tercapai")
)
# Lihat hasil
print(tab)
## # A tibble: 12 × 5
## bulan realisasi target selisih status
## <fct> <int> <dbl> <dbl> <chr>
## 1 Jan 35147 49675 -14528 Belum Tercapai
## 2 Feb 27707 49675 -21968 Belum Tercapai
## 3 Mar 36748 49675 -12927 Belum Tercapai
## 4 Apr 29916 49675 -19759 Belum Tercapai
## 5 May 37030 49675 -12645 Belum Tercapai
## 6 Jun 36874 49675 -12801 Belum Tercapai
## 7 Jul 39518 49675 -10157 Belum Tercapai
## 8 Aug 35284 49675 -14391 Belum Tercapai
## 9 Sep 50226 49675 551 Tercapai
## 10 Oct 60742 49675 11067 Tercapai
## 11 Nov 84711 49675 35036 Tercapai
## 12 Dec 68006 49675 18331 Tercapai
menganalisis performa transaksi bulanan dan memvisualisasikan pencapaian target dengan grafik.
library(readxl)
library(ggplot2)
# Baca file Excel
online_retail <- read_excel("online_retail.xlsx")
# Buat kolom bulan
online_retail$bulan <- format(online_retail$InvoiceDate, "%b")
# Hitung jumlah transaksi per bulan
tab <- as.data.frame(table(online_retail$bulan))
names(tab) <- c("bulan", "realisasi")
# Urutkan bulan Jan–Dec
tab$bulan <- factor(tab$bulan, levels = month.abb)
tab <- tab[order(tab$bulan), ]
# Tambah target, selisih, status
tab$target <- round(mean(tab$realisasi) * 1.10)
tab$selisih <- tab$realisasi - tab$target
tab$status <- ifelse(tab$realisasi >= tab$target, "Tercapai", "Belum Tercapai")
# Grafik
ggplot(tab, aes(x = bulan)) +
geom_col(aes(y = target), fill = "lightblue", alpha = 0.6) +
geom_line(aes(y = realisasi, group = 1), color = "darkblue", linewidth = 1) +
geom_point(aes(y = realisasi, color = status), size = 3) +
scale_color_manual(values = c("Tercapai" = "forestgreen", "Belum Tercapai" = "red")) +
theme_minimal()