Dokumen ini berisi analisis dashboard penjualan mobil menggunakan R Markdown. Visualisasi yang dibuat meliputi total unit terjual, total revenue, tren penjualan bulanan, revenue berdasarkan region, penjualan berdasarkan merek, body style, transmission, harga rata-rata, hubungan pendapatan pelanggan dengan harga mobil, serta heatmap kombinasi company dan body style.
library(readxl)
library(dplyr)
library(ggplot2)
library(tidyr)
library(knitr)
file_path <- "C:/Users/LENOVO/Downloads/car sales.xlsx"
data_raw <- read_excel(file_path)
clean_col <- function(x) {
x <- trimws(x)
x <- tolower(x)
x <- gsub("\\$", "", x)
x <- gsub("[()]", "", x)
x <- gsub("[^a-z0-9]+", "_", x)
x <- gsub("^_|_$", "", x)
return(x)
}
names(data_raw) <- clean_col(names(data_raw))
names(data_raw)
## [1] "car_id" "date" "customer_name" "gender"
## [5] "annual_income" "dealer_name" "company" "model"
## [9] "engine" "transmission" "color" "price"
## [13] "dealer_no" "body_style" "phone" "dealer_region"
if (!"car_id" %in% names(data_raw)) {
data_raw$car_id <- 1:nrow(data_raw)
}
data <- data_raw
if ("date" %in% names(data)) {
if (inherits(data$date, "Date")) {
data$date <- data$date
} else if (inherits(data$date, "POSIXct") | inherits(data$date, "POSIXt")) {
data$date <- as.Date(data$date)
} else if (is.numeric(data$date)) {
data$date <- as.Date(data$date, origin = "1899-12-30")
} else {
data$date <- as.Date(data$date, tryFormats = c(
"%Y-%m-%d",
"%d/%m/%Y",
"%m/%d/%Y",
"%d-%m-%Y",
"%Y/%m/%d"
))
}
}
data$price <- as.numeric(gsub("[,$ ]", "", as.character(data$price)))
if ("annual_income" %in% names(data)) {
data$annual_income <- as.numeric(gsub("[,$ ]", "", as.character(data$annual_income)))
}
kolom_kategori <- c("gender", "dealer_name", "company", "model", "engine",
"transmission", "color", "body_style", "dealer_region")
for (kolom in kolom_kategori) {
if (kolom %in% names(data)) {
data[[kolom]][is.na(data[[kolom]]) | data[[kolom]] == ""] <- "Unknown"
}
}
data <- data %>%
filter(!is.na(price))
data <- data %>%
mutate(
year = format(date, "%Y"),
month = as.Date(cut(date, breaks = "month")),
price_segment = case_when(
price < 20000 ~ "Low Price",
price < 40000 ~ "Medium Price",
TRUE ~ "High Price"
),
income_segment = case_when(
annual_income < 500000 ~ "Low Income",
annual_income < 1000000 ~ "Middle Income",
TRUE ~ "High Income"
)
)
cat("Jumlah data setelah preprocessing:", nrow(data), "\n")
## Jumlah data setelah preprocessing: 23906
summary_table <- data %>%
summarise(
Total_Data = n(),
Total_Unit_Sold = n_distinct(car_id),
Total_Revenue = sum(price, na.rm = TRUE),
Average_Price = mean(price, na.rm = TRUE),
Minimum_Price = min(price, na.rm = TRUE),
Maximum_Price = max(price, na.rm = TRUE)
)
kable(summary_table)
| Total_Data | Total_Unit_Sold | Total_Revenue | Average_Price | Minimum_Price | Maximum_Price |
|---|---|---|---|---|---|
| 23906 | 23906 | 671525465 | 28090.25 | 1200 | 85800 |
total_unit <- n_distinct(data$car_id)
ggplot() +
annotate(
"text",
x = 1,
y = 1.2,
label = "Total Unit Sold",
size = 8,
fontface = "bold"
) +
annotate(
"text",
x = 1,
y = 0.8,
label = format(total_unit, big.mark = ","),
size = 12,
fontface = "bold"
) +
xlim(0, 2) +
ylim(0, 2) +
theme_void()
total_revenue <- sum(data$price, na.rm = TRUE)
ggplot() +
annotate(
"text",
x = 1,
y = 1.2,
label = "Total Revenue",
size = 8,
fontface = "bold"
) +
annotate(
"text",
x = 1,
y = 0.8,
label = paste0("$", format(round(total_revenue, 0), big.mark = ",")),
size = 10,
fontface = "bold"
) +
xlim(0, 2) +
ylim(0, 2) +
theme_void()
monthly_sales <- data %>%
group_by(month) %>%
summarise(
total_sales = n_distinct(car_id),
.groups = "drop"
)
ggplot(monthly_sales, aes(x = month, y = total_sales)) +
geom_line(linewidth = 1) +
geom_point(size = 2) +
labs(
title = "Monthly Car Sales Trend",
x = "Month",
y = "Total Unit Sold"
) +
theme_minimal()
revenue_region <- data %>%
group_by(dealer_region) %>%
summarise(
total_revenue = sum(price, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(desc(total_revenue))
ggplot(revenue_region, aes(x = reorder(dealer_region, total_revenue), y = total_revenue)) +
geom_col() +
coord_flip() +
labs(
title = "Total Revenue by Dealer Region",
x = "Dealer Region",
y = "Total Revenue"
) +
theme_minimal()
top_company <- data %>%
group_by(company) %>%
summarise(
total_sales = n_distinct(car_id),
.groups = "drop"
) %>%
arrange(desc(total_sales)) %>%
slice_head(n = 10)
ggplot(top_company, aes(x = reorder(company, total_sales), y = total_sales)) +
geom_col() +
coord_flip() +
labs(
title = "Top 10 Car Companies by Sales Volume",
x = "Company",
y = "Total Unit Sold"
) +
theme_minimal()
avg_price_body <- data %>%
group_by(body_style) %>%
summarise(
average_price = mean(price, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(desc(average_price))
ggplot(avg_price_body, aes(x = reorder(body_style, average_price), y = average_price)) +
geom_col() +
coord_flip() +
labs(
title = "Average Price by Body Style",
x = "Body Style",
y = "Average Price"
) +
theme_minimal()
ggplot(data, aes(x = annual_income, y = price, color = gender)) +
geom_point(alpha = 0.5) +
labs(
title = "Relationship between Annual Income and Car Price",
x = "Annual Income",
y = "Car Price",
color = "Gender"
) +
theme_minimal()
top10_company_name <- data %>%
count(company, sort = TRUE) %>%
slice_head(n = 10) %>%
pull(company)
heatmap_data <- data %>%
filter(company %in% top10_company_name) %>%
group_by(company, body_style) %>%
summarise(
total_sales = n_distinct(car_id),
.groups = "drop"
)
ggplot(heatmap_data, aes(x = body_style, y = company, fill = total_sales)) +
geom_tile() +
geom_text(aes(label = total_sales), size = 3) +
labs(
title = "Sales Heatmap by Company and Body Style",
x = "Body Style",
y = "Company",
fill = "Total Sales"
) +
theme_minimal()
Berdasarkan hasil visualisasi, dashboard penjualan mobil dapat digunakan untuk melihat performa penjualan dari berbagai sudut pandang. Analisis tidak hanya menunjukkan jumlah unit yang terjual dan total revenue, tetapi juga memperlihatkan pola penjualan berdasarkan waktu, wilayah dealer, merek mobil, body style, transmission, harga, dan karakteristik pelanggan.
Secara umum, dashboard ini membantu mengidentifikasi wilayah dengan kontribusi revenue terbesar, merek mobil yang paling banyak terjual, body style yang paling diminati, serta pola hubungan antara pendapatan pelanggan dan harga mobil yang dibeli. Informasi ini dapat menjadi dasar dalam pengambilan keputusan bisnis, terutama dalam strategi penjualan, segmentasi pelanggan, dan pengelolaan stok mobil.