Data Science Programming

Descriptive Visualization

Logo


1 Combo

1.1 Heatmap

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data <- read.csv("~/Data/Bab8/Data_bisnis.csv")

heatmap_data <- data %>%
  group_by(Region, Product_Category) %>%
  summarise(Avg_Total_Price = mean(Total_Price, na.rm = TRUE)) %>%
  ungroup()
## `summarise()` has grouped output by 'Region'. You can override using the
## `.groups` argument.
ggplot(heatmap_data, aes(x = Product_Category, y = Region, fill = Avg_Total_Price)) +
  geom_tile(color = "white", linewidth = 0.5) +
  geom_text(aes(label = round(Avg_Total_Price, 1)), color = "black", size = 3) +
  scale_fill_gradientn(colours = c("#ffffd9", "#a1dab4", "#41b6c4", "#2c7fb8", "#253494"),
                       name = "Avg Total Price") +
  labs(title = "Average Total Price by Region and Product Category",
       x = "Product Category", y = "Region") +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

2 Relationship

2.1 Scatter Plot

# ==============================
# 1. Load Required Libraries
# ==============================
library(ggplot2)
library(dplyr)

# ==============================
# 2. Load Dataset
# ==============================
data_bisnis <- read.csv("~/Data/Bab8/Data_bisnis.csv", stringsAsFactors = FALSE)

# ==============================
# 3. Scatter Plot
# ==============================
ggplot(data_bisnis, aes(x = Quantity, y = Total_Price)) +
  geom_point(color = "blue", size = 3, alpha = 0.7) +
  labs(
    title = "Scatter Plot of Quantity vs Total Price",
    x = "Quantity",
    y = "Total Price"
  ) +
  theme_minimal(base_size = 14)

2.2 Bubble Chart

# ==============================
# 1. Load Required Libraries
# ==============================
library(ggplot2)
library(dplyr)

# ==============================
# 2. Load Dataset
# ==============================
data_bisnis <- read.csv("~/Data/Bab8/Data_bisnis.csv", stringsAsFactors = FALSE)

# ==============================
# 3. Bubble Chart
# ==============================
ggplot(data_bisnis, aes(x = Quantity, y = Total_Price, size = Unit_Price)) +
  geom_point(color = "skyblue", alpha = 0.6) +
  scale_size(range = c(3, 15)) +
  labs(
    title = "Bubble Chart: Quantity vs Total Price (size = Unit Price)",
    x = "Quantity",
    y = "Total Price",
    size = "Unit Price"
  ) +
  theme_minimal(base_size = 14)

2.3 Correlation Matrix

library(ggplot2)
library(dplyr)

# Baca data
data <- read.csv("~/Data/Bab8/Data_bisnis.csv")

# Pilih hanya kolom numerik
numeric_data <- data %>%
  select(where(is.numeric))

# Hitung korelasi
correlation_matrix <- cor(numeric_data)
## Warning in cor(numeric_data): the standard deviation is zero
# Ubah ke format long untuk ggplot
correlation_df <- as.data.frame(as.table(correlation_matrix))

# Plot heatmap dengan ggplot2
ggplot(correlation_df, aes(Var1, Var2, fill = Freq)) +
  geom_tile(color = "white") +
  scale_fill_gradient2(low = "blue", high = "red", mid = "white",
                       midpoint = 0, limit = c(-1, 1), space = "Lab",
                       name = "Pearson\nCorrelation") +
  theme_minimal() +
  coord_fixed() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(title = "Correlation Matrix (Numerical Features Only)", x = "", y = "")

3 Time Series

3.1 Line Chart

# ==============================
# 1. Load Required Libraries
# ==============================
library(ggplot2)
library(dplyr)

# ==============================
# 2. Load Dataset
# ==============================
data <- read.csv("~/Data/Bab8/Data_bisnis.csv", stringsAsFactors = FALSE)

# Ubah Transaction_Date ke format Date
data$Transaction_Date <- as.Date(data$Transaction_Date)

# ==============================
# 3. Aggregate Total Sales per Tanggal dan Kategori Produk
# ==============================
sales_time <- data %>%
  group_by(Transaction_Date, Product_Category) %>%
  summarise(Total_Sales = sum(Total_Price, na.rm = TRUE), .groups = "drop")

# ==============================
# 4. Buat Line Chart
# ==============================
ggplot(sales_time, aes(x = Transaction_Date, y = Total_Sales, color = Product_Category)) +
  geom_line(size = 1.2) +
  labs(
    title = "Total Sales Over Time by Product Category",
    x = "Transaction Date",
    y = "Total Sales",
    color = "Product Category"
  ) +
  theme_minimal(base_size = 14) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## Area Chart

# ==============================
# 1. Load Required Libraries
# ==============================
library(ggplot2)
library(dplyr)

# ==============================
# 2. Load Dataset
# ==============================
data <- read.csv("~/Data/Bab8/Data_bisnis.csv", stringsAsFactors = FALSE)

# Ubah Transaction_Date ke format Date
data$Transaction_Date <- as.Date(data$Transaction_Date)

# ==============================
# 3. Group by Date dan Product_Category
# ==============================
sales_time <- data %>%
  group_by(Transaction_Date, Product_Category) %>%
  summarise(Total_Sales = sum(Total_Price, na.rm = TRUE), .groups = "drop")

# ==============================
# 4. Area Chart
# ==============================
ggplot(sales_time, aes(x = Transaction_Date, y = Total_Sales, fill = Product_Category)) +
  geom_area(alpha = 0.6, position = "stack") +
  labs(
    title = "Area Chart of Total Sales Over Time by Product Category",
    x = "Transaction Date",
    y = "Total Sales",
    fill = "Product Category"
  ) +
  theme_minimal(base_size = 14) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))