Descriptive Visualizations

Data Science Programming

Logo

1. Visualizations Combo

1.1 Heatmap

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(readr)

# Baca data
setwd("C:/Users/M Nabil Pratama/Downloads/dsp")
df <- read_csv("Data_Business.csv")
## New names:
## Rows: 500 Columns: 25
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (8): Transaction_ID, Customer_ID, Product_Category, Product_ID, Region... dbl
## (15): ...1, Quantity, Unit_Price, Discount, Delivery_Time, Total_Price,... lgl
## (1): ID_HasPattern date (1): Transaction_Date
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
# Totalin dulu
pivot_df <- df %>%
  group_by(Product_Category, Region) %>%
  summarise(Total_Sales = sum(Total_Price, na.rm = TRUE), .groups = "drop")

# Bikin heatmap-nya
ggplot(pivot_df, aes(x = Region, y = Product_Category, fill = Total_Sales)) +
  geom_tile(color = "white") +
  scale_fill_gradientn(colours = rev(RColorBrewer::brewer.pal(9, "BuPu")),
                       name = "Total Sales (Rp)") +
  geom_text(aes(label = round(Total_Sales, 0)), color = "black", size = 3) +
  labs(title = "🔥 Heatmap Total Sales by Product Category and Region",
       x = "Region", y = "Product Category") +
  theme_minimal(base_size = 12)

2. Relationship

2.1 Scatter Plot

library(ggplot2)
library(readr)

# Load data
setwd("C:/Users/M Nabil Pratama/Downloads/dsp")
df <- read_csv("Data_Business.csv")
## New names:
## Rows: 500 Columns: 25
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (8): Transaction_ID, Customer_ID, Product_Category, Product_ID, Region... dbl
## (15): ...1, Quantity, Unit_Price, Discount, Delivery_Time, Total_Price,... lgl
## (1): ID_HasPattern date (1): Transaction_Date
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
# Plot
ggplot(df, aes(x = Quantity, y = Total_Price)) +
  geom_point(color = "darkorange", fill = "orange", alpha = 0.6, shape = 21, size = 2.5, stroke = 0.3) +
  labs(
    title = "Visualisasi Relasi Quantity & Total Price",
    x = "Jumlah Produk (Quantity)",
    y = "Total Harga (Rp)"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(face = "bold", hjust = 0),
    panel.grid.major = element_line(linetype = "dashed", color = "grey80")
  )

2.2 Bubble Chart

library(ggplot2)
library(readr)
library(dplyr)

# Load data
setwd("C:/Users/M Nabil Pratama/Downloads/dsp")
df <- read_csv("Data_Business.csv")
## New names:
## Rows: 500 Columns: 25
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (8): Transaction_ID, Customer_ID, Product_Category, Product_ID, Region... dbl
## (15): ...1, Quantity, Unit_Price, Discount, Delivery_Time, Total_Price,... lgl
## (1): ID_HasPattern date (1): Transaction_Date
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
# Aggregasi
df_grouped <- df %>%
  group_by(Product_Category, Region) %>%
  summarise(
    Quantity = sum(Quantity),
    Total_Price = sum(Total_Price),
    .groups = 'drop'
  )

# Visualisasi bubble
ggplot(df_grouped, aes(x = Product_Category, y = Quantity)) +
  geom_point(aes(size = Total_Price, fill = Region), shape = 21, alpha = 0.6, color = "grey30", stroke = 0.4) +
  geom_text(aes(label = Product_Category), vjust = 0.5, fontface = "bold", size = 3.5) +
  scale_size_continuous(range = c(5, 20)) +
  scale_fill_brewer(palette = "Pastel1") +
  labs(
    title = "Creative 4D Bubble Chart:\nCategory vs Quantity (Size = Total Price, Color = Region)",
    x = "Product Category",
    y = "Total Quantity",
    size = "Total Price",
    fill = "Region"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(face = "bold"),
    panel.grid.major = element_line(linetype = "dotted", color = "gray80")
  )

2.3 Correlation Matrix

# Load packages
library(ggplot2)
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.4.3
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
# Load data
setwd("C:/Users/M Nabil Pratama/Downloads/dsp")
df <- read.csv("Data_Business.csv")

# Compute correlation matrix
cor_matrix <- cor(df[sapply(df, is.numeric)], use = "complete.obs")
## Warning in cor(df[sapply(df, is.numeric)], use = "complete.obs"): the standard
## deviation is zero
# Melt the matrix for ggplot2
cor_melt <- melt(cor_matrix)
colnames(cor_melt) <- c("Var1", "Var2", "Correlation")

# Bubble plot
ggplot(cor_melt, aes(x = Var1, y = Var2, fill = Correlation, size = abs(Correlation))) +
  geom_point(shape = 21, color = "black", alpha = 0.8) +
  geom_text(aes(label = round(Correlation, 2)), size = 3, color = "black", fontface = "bold") +
  scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0) +
  scale_size(range = c(1, 15)) +
  theme_minimal(base_size = 12) +
  labs(
    title = "4D Correlation Matrix (Size & Color = Strength)",
    x = "Variable",
    y = "Variable",
    fill = "Correlation",
    size = "Abs(Correlation)"
  ) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    panel.grid = element_blank()
  )
## Warning: Removed 28 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 28 rows containing missing values or values outside the scale range
## (`geom_text()`).

3. Time Series

3.1 Line Chart

library(ggplot2)
library(dplyr)

# Load data
setwd("C:/Users/M Nabil Pratama/Downloads/dsp")
df <- read.csv("Data_Business.csv")

# Ubah ke format Date
df$Transaction_Date <- as.Date(df$Transaction_Date)

# Agregasi per tanggal
daily_total <- df %>%
  group_by(Transaction_Date) %>%
  summarise(Total_Price = sum(Total_Price))

# Plot line chart
ggplot(daily_total, aes(x = Transaction_Date, y = Total_Price)) +
  geom_line(color = "steelblue", size = 1) +
  geom_point(color = "darkblue") +
  labs(title = "Total Harga per Hari",
       x = "Tanggal", y = "Total Harga") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

3.2 Area Chart

# Load libraries
library(ggplot2)
library(dplyr)
library(readr)

# Load data (Pastikan file ada di direktori yang benar)
setwd("C:/Users/M Nabil Pratama/Downloads/dsp")
df <- read_csv("Data_Business.csv")
## New names:
## Rows: 500 Columns: 25
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (8): Transaction_ID, Customer_ID, Product_Category, Product_ID, Region... dbl
## (15): ...1, Quantity, Unit_Price, Discount, Delivery_Time, Total_Price,... lgl
## (1): ID_HasPattern date (1): Transaction_Date
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
# Convert Transaction_Date to Date type
df$Transaction_Date <- as.Date(df$Transaction_Date)

# Agregasi total harga per tanggal, kategori, dan region
agg_df <- df %>%
  group_by(Transaction_Date, Product_Category, Region) %>%
  summarise(Total_Price = sum(Total_Price), .groups = 'drop')

# Plot area chart dengan facet per Region
ggplot(agg_df, aes(x = Transaction_Date, y = Total_Price, fill = Product_Category)) +
  geom_area(alpha = 0.8, position = "stack") +
  facet_wrap(~Region, scales = "free_y") +
  labs(
    title = "Area Chart Total Harga per Kategori Produk - Tiap Region",
    x = "Tanggal",
    y = "Total Harga"
  ) +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

