Descriptive Visualizations

Programming Data Science

awokwowk


Dataset

library(readr)

Data_Bisnis <- read_csv("Data_Bisnis.csv")

Data_Bisnis

1. Categorical Data

1.1 Bar Chart

# Load required libraries
library(dplyr)        # For data manipulation
library(ggplot2)      # For creating the bar chart
library(RColorBrewer) # For color palette
library(scales)       # For formatting currency labels
library(readr)        # For read csv

# Step 1: Prepare the data
data_bisnis <- read_csv("Data_Bisnis.csv")
sales_summary <- data_bisnis %>%
  group_by(Product_Category) %>%                              
  summarise(Total_Sales = sum(Total_Price, na.rm = TRUE)) %>% 
  arrange(desc(Total_Sales))                                  

# Step 2: Generate a color palette
custom_colors <- brewer.pal(n = nrow(sales_summary), name = "Pastel1")     

# Step 3: Create bar chart with value labels
ggplot(sales_summary, aes(x = reorder(Product_Category, -Total_Sales), 
                          y = Total_Sales, 
                          fill = Product_Category)) +
  geom_col(show.legend = FALSE) +                         
  geom_text(aes(label = scales::label_comma(prefix = "Rp ")(Total_Sales)),
            vjust = -0.5, size = 2) +                       
  scale_fill_manual(values = custom_colors) +             
  scale_y_continuous(labels = scales::label_comma(prefix = "Rp "),
                     expand = expansion(mult = c(0, 0.1))) + 
  labs(
    title = "Total Sales by Product Category (2020–2024)",
    subtitle = "Based on Transaction Value",
    x = "Product Category",
    y = "Total Sales",
    caption = "@sssiiipaaa")                             


1.2 Pie Chart

# Muat pustaka yang diperlukan
library(dplyr)         # Untuk manipulasi data
library(ggplot2)       # Untuk visualisasi data
library(RColorBrewer)  # Untuk palet warna
library(scales)        # Untuk format persentase
library(readr)

# Langkah 1: Ringkasan total penjualan berdasarkan kategori produk

data_bisnis <- read.csv("Data_Bisnis.csv")
ringkasan_penjualan <- data_bisnis %>%
  group_by(Product_Category) %>%
  summarise(Total_Penjualan = sum(Total_Price, na.rm = TRUE)) %>%
  arrange(desc(Total_Penjualan)) %>%
  mutate(
    Persentase = Total_Penjualan / sum(Total_Penjualan), # Hitung pangsa
    Label = paste0(Product_Category, "\n",
                   scales::percent(Persentase, accuracy = 1)) # Buat label dengan garis baru
  )

# Langkah 2: Buat palet warna khusus
warna_khusus <- brewer.pal(n = nrow(ringkasan_penjualan), name = "Pastel1")

# Langkah 3: Buat diagram donat
ggplot(ringkasan_penjualan, aes(x = 2, y = Persentase, fill = Product_Category)) +
  geom_col(width = 1, color = "white", show.legend = FALSE) + # potongan donat
  coord_polar(theta = "y") +  # Ubah ke layout melingkar
  geom_text(aes(label = Label),   # Tambahkan label di dalam potongan
            position = position_stack(vjust = 0.5),
            size = 2, color = "white", fontface = "bold") +
  scale_fill_manual(values = warna_khusus) +
  xlim(0.5, 2.5) +  # Perluas ukuran donat
  labs(
    title = "Distribusi Penjualan per Kategori Produk (2020–2024)",
    subtitle = "Berdasarkan Total Nilai Transaksi",
    caption = "@sssiiipaaaa"
  ) +
  theme_void(base_size = 10) +  # Tema bersih
  theme(
    plot.title = element_text(face = "bold", hjust = 0.5), # Judul di tengah
    plot.subtitle = element_text(margin = margin(t = 8, b = 20), hjust = 0.5),
    plot.caption = element_text(margin = margin(t = 15), hjust = 1.5,
                                color = "gray20", face = "italic")
  )


1.3 Word Cloud

# ==============================
# 1. Instal & Muat Paket yang Diperlukan
# ==============================

library(dplyr)
library(tm)
library(wordcloud)
library(RColorBrewer)

# ==============================
# 2. Baca dan Gabungkan Kolom Teks
# ==============================
data_bisnis <- read.csv("Data_Bisnis.csv")

# Gabungkan kolom teks menjadi satu
data_teks <- paste(data_bisnis$Product_Category,
                   data_bisnis$Region,
                   data_bisnis$Sales_Channel,
                   sep = " ")

# ==============================
# 3. Bersihkan dan Siapkan Teks
# ==============================
korpus <- VCorpus(VectorSource(data_teks))

korpus_bersih <- korpus %>%
  tm_map(content_transformer(tolower)) %>%       # ubah ke huruf kecil
  tm_map(removePunctuation) %>%                  # hapus tanda baca
  tm_map(removeNumbers) %>%                      # hapus angka
  tm_map(removeWords, stopwords("english")) %>%  # hapus stopwords bahasa Inggris
  tm_map(stripWhitespace)                        # hapus spasi berlebih

# Hapus dokumen kosong (jika ada)
indeks_tidak_kosong <- sapply(korpus_bersih, function(doc) {
  nchar(content(doc)) > 0
})
korpus_bersih <- korpus_bersih[indeks_tidak_kosong]

# ==============================
# 4. Buat Term-Document Matrix & Frekuensi Kata
# ==============================
tdm <- TermDocumentMatrix(korpus_bersih)
m <- as.matrix(tdm)
frekuensi_kata <- sort(rowSums(m), decreasing = TRUE)
df_kata <- data.frame(kata = names(frekuensi_kata), frekuensi = frekuensi_kata)


# ==============================
# 5. Generate Word Cloud (Full Screen)
# ==============================

set.seed(123)
wordcloud(words = df_kata$kata,
          freq = df_kata$frekuensi,
          scale = c(1, 5),       # sesuaikan untuk ukuran besar
          min.freq = 1,
          max.words = 100,
          random.order = FALSE,
          rot.per = 0.3,
          colors = brewer.pal(5, "Pastel1"))  # Ganti ke warna pastel


1.4 TreeMap

# ==============================
# 1. Instal & Muat Paket yang Diperlukan
# ==============================

# Muat pustaka
library(treemapify)
library(ggplot2)
library(dplyr)
library(readr)

# ==============================
# 2. Siapkan Data Treemap yang Telah Diagregasi
# ==============================
data_bisnis <- read_csv("Data_Bisnis.csv")
data_treemap <- data_bisnis %>%
  group_by(Kategori_Produk = Product_Category, Wilayah = Region) %>%
  summarise(
    Total_Penjualan = sum(Total_Price, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(
    label_gabungan = paste0(Wilayah, "\n", round(Total_Penjualan, 0))
  )

# ==============================
# 3. Buat Treemap Statis dengan Label Gabungan
# ==============================

# Palet warna pastel
warna_pastel <- c(
  "#FFB3BA", "#FFDFBA", "#FFFFBA", "#BAFFC9", "#BAE1FF",
  "#EAD1DC", "#D5AAFF", "#A0C4FF", "#BDB2FF", "#FFC6FF"
)

ggplot(data_treemap, aes(
  area = Total_Penjualan,
  fill = Kategori_Produk,
  subgroup = Kategori_Produk
)) +
  geom_treemap() +
  geom_treemap_subgroup_border(color = "white") +

  geom_treemap_text(
    aes(label = label_gabungan),
    colour = "black",
    place = "centre",
    grow = FALSE,
    reflow = TRUE,
    size = 20 / .pt,
    min.size = 2
  ) +

  scale_fill_manual(values = warna_pastel) +
  labs(
    title = "Peta Pohon Total Penjualan berdasarkan Kategori Produk dan Wilayah",
    fill = "Kategori Produk"
  ) +
  theme_minimal()


2. Numerical Data

2.1 Histogram

# ==============================
# 1. Load Required Libraries
# ==============================
library(ggplot2)
library(dplyr)
library(readr)

# ==============================
# 2. Prepare Data
# ==============================
data_bisnis <- read.csv("Data_Bisnis.csv")
data_bisnis <- data_bisnis %>%
  mutate(Quantity = as.numeric(Quantity))

# ==============================
# 3. Create Histogram of Quantity with Custom Font Sizes
# ==============================
ggplot(data_bisnis, aes(x = Quantity)) +
  geom_histogram(binwidth = 1,
                 fill = "lightpink",
                 color = "skyblue",
                 alpha = 0.7) +
  labs(
    title = "Histogram of Quantity Distribution",
    x = "Quantity",
    y = "Frequency"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 15, face = "bold"),  # Title size and bold
    axis.title.x = element_text(size = 10),               # X label size
    axis.title.y = element_text(size = 10),               # Y label size
    axis.text.x = element_text(size = 8),                # X axis numbers size
    axis.text.y = element_text(size = 8)                 # Y axis numbers size
  )


2.2 Density Plot


# ==============================
# 1. Load Required Libraries
# ==============================
library(ggplot2)
library(dplyr)
library(readr)

# ==============================
# 2. Prepare Data
# ==============================
data_bisnis <- read.csv("Data_Bisnis.csv")

# Ensure Quantity is numeric and remove NAs
data_bisnis <- data_bisnis %>%
  mutate(Quantity = as.numeric(Quantity)) %>%
  filter(!is.na(Quantity))

# Calculate mean of Quantity
mean_quantity <- mean(data_bisnis$Quantity, na.rm = TRUE)

# Estimate density to get y-position for label
density_data <- density(data_bisnis$Quantity)
max_y <- max(density_data$y)

# ==============================
# 3. Create Density Plot with Mean Line and Label
# ==============================
ggplot(data_bisnis, aes(x = Quantity)) +
  geom_density(fill = "skyblue", alpha = 0.6) +
  geom_vline(xintercept = mean_quantity, color = "lightpink", 
             linetype = "dashed", linewidth = 1) +
  geom_text(
    data = data.frame(x = mean_quantity, y = max_y * 0.8),
    aes(x = x, y = y),
    label = paste("Mean =", round(mean_quantity, 2)),
    color = "black",
    angle = 90,
    vjust = -0.5,
    size = 3,
    fontface = "bold",
    inherit.aes = FALSE
  ) +
  labs(
    title = "Density Plot of Quantity with Mean",
    x = "Quantity",
    y = "Density"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 15, face = "bold"),
    axis.title = element_text(size = 10),
    axis.text = element_text(size = 8)
  )

2.3 Boxplot

# ==============================
# 1. Load Libraries
# ==============================
library(ggplot2)
library(dplyr)
library(readr)

# ==============================
# 2. Load and Prepare Data
# ==============================
data_bisnis <- read.csv("Data_Bisnis.csv", stringsAsFactors = FALSE)

# Convert Quantity to numeric and filter missing
data_bisnis <- data_bisnis %>%
  mutate(Quantity = as.numeric(Quantity)) %>%
  filter(!is.na(Quantity))

# Compute IQR-based outlier bounds
Q1 <- quantile(data_bisnis$Quantity, 0.25)
Q3 <- quantile(data_bisnis$Quantity, 0.75)
IQR_value <- IQR(data_bisnis$Quantity)
lower_whisker <- Q1 - 1.5 * IQR_value
upper_whisker <- Q3 + 1.5 * IQR_value

# ==============================
# 3. Summarize Statistics
# ==============================
stats <- data_bisnis %>%
  summarise(
    Mean = mean(Quantity),
    Q1 = Q1,
    Median = median(Quantity),
    Q3 = Q3,
    Min = min(Quantity),
    Max = max(Quantity),
    Outliers = sum(Quantity < lower_whisker | Quantity > upper_whisker)
  )

# ==============================
# 4. Basic Boxplot with Jitter and Annotations
# ==============================
ggplot(data_bisnis, aes(x = factor(1), y = Quantity)) +
  # Basic boxplot
  geom_boxplot(fill = "lightpink", outlier.shape = NA) +
  
  # Add jittered points, highlight outliers in red
  geom_jitter(aes(color = Quantity < lower_whisker | Quantity > upper_whisker),
              width = 0.1, size = 2, alpha = 0.5) +
  scale_color_manual(values = c("FALSE" = "skyblue", "TRUE" = "red"), guide = "none") +
  
  # Highlight max point if not an outlier
  geom_point(data = data_bisnis %>% filter(Quantity == stats$Max[[1]] & Quantity <= upper_whisker),
             aes(x = factor(1), y = Quantity),
             color = "red", size = 8) +
  
  # Annotations
  ggplot2::annotate("text", x = 1.2, y = stats$Mean[[1]], 
           label = paste("Mean:", round(stats$Mean[[1]], 2)), 
           hjust = 0, fontface = "bold", color = "black") +
  ggplot2::annotate("text", x = 1.2, y = stats$Q1[[1]], 
           label = paste("Q1:", round(stats$Q1[[1]], 2)), 
           hjust = 0, color = "black") +
  ggplot2::annotate("text", x = 1.2, y = stats$Median[[1]], 
           label = paste("Median:", round(stats$Median[[1]], 2)), 
           hjust = 0, color = "purple") +
  ggplot2::annotate("text", x = 1.2, y = stats$Q3[[1]], 
           label = paste("Q3:", round(stats$Q3[[1]], 2)), 
           hjust = 0, color = "black") +
  ggplot2::annotate("text", x = 1.2, y = stats$Min[[1]], 
           label = paste("Min:", round(stats$Min[[1]], 2)), 
           hjust = 0, color = "violet") +
  ggplot2::annotate("text", x = 1.2, y = stats$Max[[1]], 
           label = paste("Max:", round(stats$Max[[1]], 2)), 
           hjust = 0, color = "violet") +
  ggplot2::annotate("text", x = 1, y = stats$Max[[1]] + 0.05 * stats$Max[[1]], 
           label = paste("Outliers:", stats$Outliers[[1]]), 
           color = "red", fontface = "italic", hjust = 0.5) +

  # Plot formatting
  labs(
    title = "Boxplot of Quantity with Jitter and Annotations",
    x = NULL,
    y = "Quantity"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    plot.title = element_text(size = 15, face = "bold"),
    axis.title = element_text(size = 10),
    axis.text = element_text(size = 8)
  )


2.4 Violin Plot

# ==============================
# 1. Load Libraries
# ==============================
library(ggplot2)
library(dplyr)

# ==============================
# 2. Load and Prepare Data
# ==============================
data_bisnis <- read.csv("Data_Bisnis.csv")

# Clean and convert Quantity to numeric
data_bisnis <- data_bisnis %>%
  mutate(Quantity = as.numeric(Quantity)) %>%
  filter(!is.na(Quantity))

# Calculate quartiles and IQR for outlier detection
Q1 <- quantile(data_bisnis$Quantity, 0.25)
Q3 <- quantile(data_bisnis$Quantity, 0.75)
IQR_value <- IQR(data_bisnis$Quantity)
upper_whisker <- Q3 + 1.5 * IQR_value
lower_whisker <- Q1 - 1.5 * IQR_value

# Mark outliers
data_bisnis <- data_bisnis %>%
  mutate(
    is_outlier = ifelse(Quantity < lower_whisker | Quantity > upper_whisker, "Outlier", "Normal")
  )

# ==============================
# 3. Summarize Statistics
# ==============================
stats <- data_bisnis %>%
  summarise(
    Mean = mean(Quantity),
    Q1 = Q1,
    Median = median(Quantity),
    Q3 = Q3,
    Min = min(Quantity),
    Max = max(Quantity),
    Outliers = sum(is_outlier == "Outlier")
  )

# ==============================
# 4. Create Violin Plot with Colored Jitter and Annotations
# ==============================
ggplot(data_bisnis, aes(x = factor(1), y = Quantity)) +
  geom_violin(fill = "pink", trim = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA, color = "black") +
  geom_jitter(aes(color = is_outlier), width = 0.1, alpha = 0.5, size = 2) +
  geom_point(data = data_bisnis %>%
               filter(Quantity == stats$Max[[1]] & Quantity <= upper_whisker),
             aes(x = factor(1), y = Quantity),
             color = "red", size = 8) +

  # Annotations via geom_text
  geom_text(data = stats, aes(x = 1.2, y = Mean, label = paste("Mean:", round(Mean, 2))),
            hjust = 0, color = "black", fontface = "bold") +
  geom_text(data = stats, aes(x = 1.2, y = Q1, label = paste("Q1:", round(Q1, 2))),
            hjust = 0, color = "darkblue") +
  geom_text(data = stats, aes(x = 1.2, y = Median, label = paste("Median:", round(Median, 2))),
            hjust = 0, color = "purple") +
  geom_text(data = stats, aes(x = 1.2, y = Q3, label = paste("Q3:", round(Q3, 2))),
            hjust = 0, color = "darkblue") +
  geom_text(data = stats, aes(x = 1.2, y = Min, label = paste("Min:", round(Min, 2))),
            hjust = 0, color = "violet") +
  geom_text(data = stats, aes(x = 1.2, y = Max, label = paste("Max:", round(Max, 2))),
            hjust = 0, color = "violet") +
  geom_text(data = stats, aes(x = 1, y = Max + 0.05 * Max,
                             label = paste("Outliers:", Outliers)),
            color = "red", fontface = "italic", hjust = 0.5) +

  scale_color_manual(values = c("Normal" = "skyblue", "Outlier" = "red")) +

  labs(
    title = "Violin Plot of Quantity with Outlier Highlighted",
    x = NULL,
    y = "Quantity",
    color = "Point Type"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    plot.title = element_text(size = 20, face = "bold"),
    axis.title = element_text(size = 10),
    axis.text = element_text(size = 10),
    legend.position = "right",
    legend.title = element_text(size = 5),
    legend.text = element_text(size = 8)
  )


3. Combo

3.1 Grouped Bar Chart

# ==============================
# 1. Load Libraries
# ==============================
library(ggplot2)
library(dplyr)
library(readr)

# ==============================
# 2. Load Data
# ==============================
data_bisnis <- read.csv("Data_Bisnis.csv")

# ==============================
# 3. Data Summarization
# ==============================
sales_summary <- data_bisnis %>%
  group_by(Product_Category, Region) %>%
  summarise(Total_Sales = sum(Total_Price, na.rm = TRUE), .groups = "drop")

# ==============================
# 4. Plot Grouped Bar Chart
# ==============================
ggplot(sales_summary, aes(x = Product_Category, y = Total_Sales, fill = Region)) +
  geom_bar(stat = "identity", position = position_dodge()) +
  labs(
    title = "Total Sales by Product Category and Region",
    x = "Product Category",
    y = "Total Sales (USD)",
    fill = "Region"
  ) +
  theme_minimal(base_size = 15) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(face = "bold", hjust = 0.5)
  )


3.2 Ridgeline Plot

# ==============================
# 1. Muat Library
# ==============================
library(ggridges)
library(ggplot2)
library(dplyr)
library(scales)
library(readr)

# ==============================
# 2. Filter Data Valid
# ==============================
# Menghapus baris dengan nilai Price_per_Unit yang NA, Inf, atau NaN
data_bisnis <- read.csv("Data_Bisnis.csv", stringsAsFactors = FALSE)

data_bisnis_filtered <- data_bisnis %>%
  filter(is.finite(Price_per_Unit))

# ==============================
# 3. Buat Plot Ridgeline
# ==============================
# Menggunakan warna pastel dari scale_fill_brewer
ggplot(data_bisnis_filtered, aes(x = Price_per_Unit, y = Region, fill = Region)) +
  geom_density_ridges(alpha = 0.7, scale = 1.2) +
  scale_x_continuous(labels = dollar_format(prefix = "Rp", big.mark = ".", decimal.mark = ",")) +
  labs(
    title = "Distribution of Price per Unit by Region",
    x = "Price per Unit",
    y = "Region"
  ) +
  theme_minimal() +
  theme(legend.position = "none")


3.3 Boxplot by Category

# ==============================
# 1. Load Required Libraries
# ==============================
library(ggplot2)
library(dplyr)

# ==============================
# 2. Prepare Data
# ==============================
# Convert Quantity to numeric and remove NA
data_bisnis <- read.csv("Data_Bisnis.csv", stringsAsFactors = FALSE)
data_bisnis <- data_bisnis %>%
  mutate(Quantity = as.numeric(Quantity)) %>%
  filter(!is.na(Quantity))

# ==============================
# 3. Create Boxplot
# ==============================
ggplot(data_bisnis, aes(x = Product_Category, y = Quantity, fill = Product_Category)) +
  geom_boxplot(outlier.colour = "red", outlier.shape = 16, outlier.size = 2) +  # Boxplot with red outliers
  labs(
    title = "Boxplot of Quantity by Product Category",
    x = "Product Category",
    y = "Quantity"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 16, face = "bold"),
    axis.title = element_text(size = 14),
    axis.text = element_text(size = 12),
    legend.position = "none"
  )


3.4 Lolipop Chart

# ==============================
# 1. Load Required Libraries
# ==============================
library(ggplot2)
library(dplyr)
library(readr)

# ==============================
# 2. Prepare Data
# ==============================
data_bisnis <- read.csv("Data_Bisnis.csv", stringsAsFactors = FALSE)

# Summarize total sales by Product_Category and Region
sales_grouped <- data_bisnis %>%
  group_by(Product_Category, Region) %>%
  summarise(Total_Sales = sum(Total_Price, na.rm = TRUE), .groups = "drop")

# ==============================
# 3. Grouped Lollipop Chart
# ==============================
ggplot(sales_grouped, aes(x = Total_Sales, y = reorder(Product_Category, Total_Sales), color = Region)) +
  geom_segment(aes(x = 0, xend = Total_Sales, y = Product_Category, yend = Product_Category), size = 1) +
  geom_point(size = 4) +
  labs(
    title = "Grouped Lollipop Chart",
    x = "Total Sales",
    y = "Product Category"
  ) +
  theme_minimal() +
  theme(
    axis.text = element_text(size = 12),
    axis.title = element_text(size = 14),
    plot.title = element_text(size = 16, face = "bold")
  )

# ==============================
# 4. Faceted Lollipop Chart
# ==============================
ggplot(sales_grouped, aes(x = Total_Sales, y = reorder(Product_Category, Total_Sales))) +
  geom_segment(aes(x = 0, xend = Total_Sales, y = Product_Category, yend = Product_Category), color = "skyblue", size = 1) +
  geom_point(color = "blue", size = 4) +
  facet_wrap(~ Region, scales = "free_x") +
  labs(
    title = "Faceted Lollipop Chart",
    x = "Total Sales",
    y = "Product Category"
  ) +
  theme_minimal() +
  theme(
    axis.text = element_text(size = 12),
    axis.title = element_text(size = 14),
    plot.title = element_text(size = 16, face = "bold")
  )


3.5 Heatmap

library(ggplot2)
library(dplyr)
library(readr)

# Baca data
data <- read_csv("Data_Bisnis.csv")

# Hitung agregat total sales
agg_data <- data %>%
  group_by(Region, Product_Category) %>%
  summarise(Total_Sales = sum(Total_Price, na.rm = TRUE)) %>%
  ungroup()

# Hitung batas outlier menggunakan IQR
q1 <- quantile(agg_data$Total_Sales, 0.25)
q3 <- quantile(agg_data$Total_Sales, 0.75)
iqr <- q3 - q1
upper_outlier <- q3 + 1.5 * iqr

# Tandai sel outlier
agg_data <- agg_data %>%
  mutate(is_outlier = Total_Sales > upper_outlier)

# Buat heatmap
ggplot(agg_data, aes(x = Product_Category, y = Region, fill = Total_Sales)) +
  geom_tile(color = "white", linewidth = 0.7) +
  geom_text(aes(label = round(Total_Sales, 0)), color = "black", size = 4) +
  geom_point(data = agg_data %>% filter(is_outlier),
             aes(x = Product_Category, y = Region),
             color = "red", size = 3, shape = 16) +  # tanda bintang
  scale_fill_gradient(low = "#E0BBE4", high = "#5D3A9B", name = "Total Sales") +
  labs(
    title = "Heatmap Penjualan dengan Penanda Outlier",
    x = "Kategori Produk",
    y = "Wilayah"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    panel.grid = element_blank()
  )


4 Relationship

4.1 Scatter Plot

library(ggplot2)
library(readr)

# Membaca data
data <- read_csv("Data_Bisnis.csv")

# Scatter plot sederhana
ggplot(data, aes(x = Unit_Price, y = Total_Price,
                 color = Product_Category)) +
  geom_point(alpha = 0.7, shape = 16) +
  labs(title = "Hubungan Harga Satuan dan Total Harga",
       x = "Harga Satuan (Unit_Price)",
       y = "Total Harga (Total_Price)",
       color = "Kategori Produk") +
  theme_minimal()


4.2 Bubble Chart

library(ggplot2)
library(readr)

# Membaca data
data <- read_csv("Data_Bisnis.csv")

# Bubble chart
ggplot(data, aes(x = Unit_Price, y = Total_Price,
                 size = Quantity, color = Product_Category)) +
  geom_point(alpha = 0.5) +
  scale_size_continuous(range = c(3, 7)) +
  labs(title = "Bubble Chart: Harga Satuan, Total Harga, dan Kuantitas",
       x = "Harga Satuan (Unit_Price)",
       y = "Total Harga (Total_Price)",
       color = "Kategori Produk",
       size = "Jumlah Produk") +
  theme_minimal()


4.3 Correlation Matrix

library(ggplot2)
library(reshape2)
library(readr)
library(dplyr)

# Baca data dari file CSV
data <- read_csv("Data_Bisnis.csv")

# Pilih hanya kolom numerik yang relevan
data_numeric <- data %>% select(
  Quantity, Unit_Price, Discount, Delivery_Time,
  Total_Price, Price_per_Unit, Efficiency, Feature_Interaction,
  ID_Length, Discount_Level, Sales_Rank,
  Avg_Quantity_Region, Sum_Sales_Region, Count_Product_Region
)

# Hitung matriks korelasi
cor_matrix <- cor(data_numeric, use = "complete.obs")

# Ubah ke format long
cor_melt <- melt(cor_matrix)

ggplot(data = cor_melt, aes(x = Var1, y = Var2, fill = value)) +
  geom_tile(color = "white", width = 1.2, height = 1) +  # Lebarkan tile
  geom_text(aes(label = round(value, 2)), color = "black", size = 2) +
  scale_fill_gradient2(
    low = "#CBAACB",
    mid = "#FDDDE6",
    high = "#87CEEB",
    midpoint = 0, limit = c(-1, 1),
    name = "Korelasi"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
    panel.grid = element_blank(),
    aspect.ratio = 0.4   # Tinggi/lebar → nilai < 1 akan buat kotak lebih lebar
  ) +
  labs(
    title = "Visualisasi Matriks Korelasi",
    x = "", y = ""
  )


5 Time Series

5.1 Line Chart

# Load Required Libraries
library(dplyr)
library(ggplot2)
library(lubridate)
library(readr)

# Load Data
# ==============================
business_data <- read.csv("Data_Bisnis.csv", stringsAsFactors = FALSE)

# Step 1: Calculate Total Sales and Convert Transaction Date
business_data <- business_data %>%
  mutate(
    Transaction_Date = as.Date(Transaction_Date),
    Total_Sales = Unit_Price * Quantity
  )

# Step 2: Summarize Total Sales by Date
sales_by_date <- business_data %>%
  group_by(Transaction_Date, Product_Category) %>%
  summarise(Total_Sales = sum(Total_Sales, na.rm = TRUE), .groups = "drop")

# Step 3: Create a Line Chart with Pastel Colors
ggplot(sales_by_date, aes(x = Transaction_Date, y = Total_Sales, line(Product_Category))) +
  geom_line(color = "violet", size = 1) +  
  labs(
    title = "Total Sales Over Time",
    x = "Transaction Date",
    y = "Total Sales"
  ) +
  theme_minimal(base_size = 10) +
  theme(
    plot.title = element_text(face = "bold", color = "black"),
    axis.title = element_text(color = "black"),
    axis.text = element_text(color = "black")
  )


5.2 Area

library(dplyr)
library(ggplot2)
library(lubridate)

# ==============================
# Load Data
# ==============================
business_data <- read.csv("Data_Bisnis.csv", stringsAsFactors = FALSE)

# ==============================
# Data Processing: Convert date & calculate total sales
# ==============================
business_data <- business_data %>%
  mutate(
    Transaction_Date = as.Date(Transaction_Date),
    Total_Sales = Unit_Price * Quantity
  )

# ==============================
# Summarize total sales per date and category
# ==============================
sales_by_date_cat <- business_data %>%
  group_by(Transaction_Date, Product_Category) %>%
  summarise(Total_Sales = sum(Total_Sales, na.rm = TRUE), .groups = "drop")

# ==============================
# Create Stacked Area Chart
# ==============================
ggplot(sales_by_date_cat, aes(x = Transaction_Date, y = Total_Sales, fill = Product_Category)) +
  geom_area(alpha = 0.7) +
  labs(
    title = "Stacked Area Chart of Total Sales by Category",
    x = "Transaction Date",
    y = "Total Sales",
    fill = "Category"
  ) +
  scale_fill_brewer(palette = "Pastel1") +  # pastel color palette
  theme_minimal(base_size = 10)