Data Science Programming
Descriptive Visualization
1 Combo
1.1 Heatmap
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data <- read.csv("~/Data/Bab8/Data_bisnis.csv")
heatmap_data <- data %>%
group_by(Region, Product_Category) %>%
summarise(Avg_Total_Price = mean(Total_Price, na.rm = TRUE)) %>%
ungroup()## `summarise()` has grouped output by 'Region'. You can override using the
## `.groups` argument.
ggplot(heatmap_data, aes(x = Product_Category, y = Region, fill = Avg_Total_Price)) +
geom_tile(color = "white", linewidth = 0.5) +
geom_text(aes(label = round(Avg_Total_Price, 1)), color = "black", size = 3) +
scale_fill_gradientn(colours = c("#ffffd9", "#a1dab4", "#41b6c4", "#2c7fb8", "#253494"),
name = "Avg Total Price") +
labs(title = "Average Total Price by Region and Product Category",
x = "Product Category", y = "Region") +
theme_minimal(base_size = 12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))2 Relationship
2.1 Scatter Plot
# ==============================
# 1. Load Required Libraries
# ==============================
library(ggplot2)
library(dplyr)
# ==============================
# 2. Load Dataset
# ==============================
data_bisnis <- read.csv("~/Data/Bab8/Data_bisnis.csv", stringsAsFactors = FALSE)
# ==============================
# 3. Scatter Plot
# ==============================
ggplot(data_bisnis, aes(x = Quantity, y = Total_Price)) +
geom_point(color = "blue", size = 3, alpha = 0.7) +
labs(
title = "Scatter Plot of Quantity vs Total Price",
x = "Quantity",
y = "Total Price"
) +
theme_minimal(base_size = 14)2.2 Bubble Chart
# ==============================
# 1. Load Required Libraries
# ==============================
library(ggplot2)
library(dplyr)
# ==============================
# 2. Load Dataset
# ==============================
data_bisnis <- read.csv("~/Data/Bab8/Data_bisnis.csv", stringsAsFactors = FALSE)
# ==============================
# 3. Bubble Chart
# ==============================
ggplot(data_bisnis, aes(x = Quantity, y = Total_Price, size = Unit_Price)) +
geom_point(color = "skyblue", alpha = 0.6) +
scale_size(range = c(3, 15)) +
labs(
title = "Bubble Chart: Quantity vs Total Price (size = Unit Price)",
x = "Quantity",
y = "Total Price",
size = "Unit Price"
) +
theme_minimal(base_size = 14)2.3 Correlation Matrix
library(ggplot2)
library(dplyr)
# Baca data
data <- read.csv("~/Data/Bab8/Data_bisnis.csv")
# Pilih hanya kolom numerik
numeric_data <- data %>%
select(where(is.numeric))
# Hitung korelasi
correlation_matrix <- cor(numeric_data)## Warning in cor(numeric_data): the standard deviation is zero
# Ubah ke format long untuk ggplot
correlation_df <- as.data.frame(as.table(correlation_matrix))
# Plot heatmap dengan ggplot2
ggplot(correlation_df, aes(Var1, Var2, fill = Freq)) +
geom_tile(color = "white") +
scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 0, limit = c(-1, 1), space = "Lab",
name = "Pearson\nCorrelation") +
theme_minimal() +
coord_fixed() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Correlation Matrix (Numerical Features Only)", x = "", y = "")3 Time Series
3.1 Line Chart
# ==============================
# 1. Load Required Libraries
# ==============================
library(ggplot2)
library(dplyr)
# ==============================
# 2. Load Dataset
# ==============================
data <- read.csv("~/Data/Bab8/Data_bisnis.csv", stringsAsFactors = FALSE)
# Ubah Transaction_Date ke format Date
data$Transaction_Date <- as.Date(data$Transaction_Date)
# ==============================
# 3. Aggregate Total Sales per Tanggal dan Kategori Produk
# ==============================
sales_time <- data %>%
group_by(Transaction_Date, Product_Category) %>%
summarise(Total_Sales = sum(Total_Price, na.rm = TRUE), .groups = "drop")
# ==============================
# 4. Buat Line Chart
# ==============================
ggplot(sales_time, aes(x = Transaction_Date, y = Total_Sales, color = Product_Category)) +
geom_line(size = 1.2) +
labs(
title = "Total Sales Over Time by Product Category",
x = "Transaction Date",
y = "Total Sales",
color = "Product Category"
) +
theme_minimal(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Area Chart
# ==============================
# 1. Load Required Libraries
# ==============================
library(ggplot2)
library(dplyr)
# ==============================
# 2. Load Dataset
# ==============================
data <- read.csv("~/Data/Bab8/Data_bisnis.csv", stringsAsFactors = FALSE)
# Ubah Transaction_Date ke format Date
data$Transaction_Date <- as.Date(data$Transaction_Date)
# ==============================
# 3. Group by Date dan Product_Category
# ==============================
sales_time <- data %>%
group_by(Transaction_Date, Product_Category) %>%
summarise(Total_Sales = sum(Total_Price, na.rm = TRUE), .groups = "drop")
# ==============================
# 4. Area Chart
# ==============================
ggplot(sales_time, aes(x = Transaction_Date, y = Total_Sales, fill = Product_Category)) +
geom_area(alpha = 0.6, position = "stack") +
labs(
title = "Area Chart of Total Sales Over Time by Product Category",
x = "Transaction Date",
y = "Total Sales",
fill = "Product Category"
) +
theme_minimal(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))