Bab 1 Assignment13

if (!require(DT)) install.packages("DT")
## Loading required package: DT
library(DT)


data <- read.csv("8 Visualisasi Deskriptif – Pemrograman Ilmu Data.csv")
datatable(head(data, 10), options = list(pageLength = 10))

1.1 Combo

1.1.1 Heatmap

library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Baca file CSV
data <- read.csv("8 Visualisasi Deskriptif – Pemrograman Ilmu Data.csv", stringsAsFactors = FALSE)

# Hitung rata-rata penjualan per kombinasi Kategori Produk dan Wilayah
ringkas <- data %>%
  group_by(Kategori_Produk, Wilayah) %>%
  summarise(Rata2_Penjualan = mean(Jumlah_Wilayah_Penjualan, na.rm = TRUE)) %>%
  ungroup()
## `summarise()` has grouped output by 'Kategori_Produk'. You can override using
## the `.groups` argument.
# Buat heatmap
ggplot(ringkas, aes(x = Kategori_Produk, y = Wilayah, fill = Rata2_Penjualan)) +
  geom_tile(color = "white") +
  geom_text(aes(label = round(Rata2_Penjualan, 2)), size = 4) +  # Tambahkan nilai ke dalam kotak
  scale_fill_gradient(low = "lightyellow", high = "red") +
  labs(
    title = "Heatmap Sales by Category and Region",
    x = "Product Category",
    y = "Region",
    fill = "Rata-rata Penjualan"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 16),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 10)
  )

1.2 Relationship

1.2.1 Scatter Plot

library(ggplot2)

# Baca data
df <- read.csv("8 Visualisasi Deskriptif – Pemrograman Ilmu Data.csv", check.names = FALSE)

# Membuat plot
ggplot(df, aes(x = `Harga satuan`, y = Total_Harga)) +
  geom_point(aes(color = Wilayah), alpha = 0.7, shape = 16, size = 3) +
  labs(
    title = "Scatter Plot: Harga Satuan vs Total Harga",
    x = "Harga Satuan",
    y = "Total Harga"
  ) +
  theme_minimal() +
  theme(legend.position = "right")

1.2.2 Bubble Chart

library(ggplot2)
data <- read.csv("8 Visualisasi Deskriptif – Pemrograman Ilmu Data.csv")

# Bubble chart dengan warna berdasarkan kategori produk
ggplot(data, aes(x = Kuantitas, y = Harga.satuan, size = Total_Harga, color = Kategori_Produk)) +
  geom_point(alpha = 0.6) +
  scale_size_continuous(range = c(3, 12)) +
  theme_minimal() +
  labs(
    title = "Bubble Chart: Kuantitas vs Harga Satuan per Kategori Produk",
    x = "Kuantitas",
    y = "Harga Satuan",
    size = "Total Harga",
    color = "Kategori Produk"
  )

1.2.3 Correlation Matrix

library(ggplot2)
library(reshape2)
data <- read.csv("8 Visualisasi Deskriptif – Pemrograman Ilmu Data.csv")

# Ambil hanya kolom numerik
numeric_data <- data[sapply(data, is.numeric)]

# Hitung matriks korelasi (Pearson default)
cor_matrix <- cor(numeric_data, use = "complete.obs")
## Warning in cor(numeric_data, use = "complete.obs"): the standard deviation is
## zero
# Ubah ke format long (melting)
cor_data <- melt(cor_matrix)

# Visualisasi heatmap korelasi
ggplot(cor_data, aes(x = Var1, y = Var2, fill = value)) +
  geom_tile(color = "white") +
  scale_fill_gradient2(
    low = "blue", high = "red", mid = "white",
    midpoint = 0, limit = c(-1, 1),
    name = "Korelasi"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
  coord_fixed() +
  labs(
    title = "Correlation Matrix (Heatmap)",
    x = "",
    y = ""
  )

1.3 Time Series

1.3.1 Line Chart

library(ggplot2)
library(dplyr)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(readr)

# Baca data
data <- read_csv("8 Visualisasi Deskriptif – Pemrograman Ilmu Data.csv")
## New names:
## • `` -> `...1`
## Rows: 500 Columns: 25
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (8): ID_Transaksi, ID_Pelanggan, Kategori_Produk, ID_Produk, Wilayah, ...
## dbl  (15): ...1, Kuantitas, Harga satuan, Diskon, Waktu Pengiriman, Total_Ha...
## lgl   (1): ID_MemilikiPola
## date  (1): Tanggal_Transaksi
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Ubah kolom tanggal ke format Date
data$Tanggal_Transaksi <- ymd(data$Tanggal_Transaksi)

# Buat kolom Bulan untuk agregasi bulanan
data$Bulan <- floor_date(data$Tanggal_Transaksi, unit = "month")

# Hitung total penjualan per bulan per kategori
data_summary <- data %>%
  mutate(Total = Kuantitas * `Harga satuan`) %>%
  group_by(Bulan, Kategori_Produk) %>%
  summarise(Total_Penjualan = sum(Total, na.rm = TRUE), .groups = "drop")

# Line chart yang lebih rapi
ggplot(data_summary, aes(x = Bulan, y = Total_Penjualan, color = Kategori_Produk)) +
  geom_line(size = 1, alpha = 0.85) +
  scale_color_brewer(palette = "Set2") +
  scale_x_date(date_labels = "%b %Y", date_breaks = "2 months") +
  labs(
    title = "Tren Penjualan Bulanan per Kategori Produk",
    subtitle = "Visualisasi Time Series: Clothing, Electronics, Home, dll",
    x = "Bulan",
    y = "Total Penjualan",
    color = "Kategori Produk"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 10),
    plot.title = element_text(face = "bold", size = 16),
    plot.subtitle = element_text(size = 12, margin = margin(b = 10)),
    legend.position = "bottom"
  )
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

1.3.2 Area Chart

library(ggplot2)
library(dplyr)
library(lubridate)
library(readr)

data <- read_csv("8 Visualisasi Deskriptif – Pemrograman Ilmu Data.csv")
## New names:
## Rows: 500 Columns: 25
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (8): ID_Transaksi, ID_Pelanggan, Kategori_Produk, ID_Produk, Wilayah, ... dbl
## (15): ...1, Kuantitas, Harga satuan, Diskon, Waktu Pengiriman, Total_Ha... lgl
## (1): ID_MemilikiPola date (1): Tanggal_Transaksi
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
# Format tanggal
data$Tanggal_Transaksi <- ymd(data$Tanggal_Transaksi)
data$Bulan <- floor_date(data$Tanggal_Transaksi, unit = "month")

# Hitung total penjualan
data_summary <- data %>%
  mutate(Total = Kuantitas * `Harga satuan`) %>%
  group_by(Bulan, Kategori_Produk) %>%
  summarise(Total_Penjualan = sum(Total, na.rm = TRUE), .groups = "drop")

# Buat Area Chart
ggplot(data_summary, aes(x = Bulan, y = Total_Penjualan, fill = Kategori_Produk)) +
  geom_area(position = "stack", alpha = 0.8) +
  scale_fill_brewer(palette = "Set2") +
  scale_x_date(date_labels = "%b %Y", date_breaks = "2 months") +
  labs(
    title = "Area Chart: Total Penjualan Bulanan per Kategori",
    subtitle = "Visualisasi Kumlatif Penjualan Produk per Bulan",
    x = "Bulan",
    y = "Total Penjualan",
    fill = "Kategori Produk"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(face = "bold", size = 16),
    plot.subtitle = element_text(size = 12),
    legend.position = "bottom"
  )