Import Data
library(readxl)
df <- read_excel("C:/Users/Gusde/Downloads/car sales.xlsx")
View(df)
names(df) <- c(
"car_id", "date", "customer_name", "gender", "annual_income",
"dealer_name", "company", "model", "engine", "transmission",
"color", "price", "dealer_no", "body_style", "phone", "dealer_region"
)
if (is.numeric(df$date)) {
df$date <- as.Date(df$date, origin = "1899-12-30")
} else {
df$date <- as.Date(df$date)
}
df <- df %>%
mutate(
price = as.numeric(price),
annual_income = as.numeric(annual_income),
month = floor_date(date, "month")
) %>%
filter(!is.na(price))
head(df)
## # A tibble: 6 × 17
## car_id date customer_name gender annual_income dealer_name company model
## <chr> <date> <chr> <chr> <dbl> <chr> <chr> <chr>
## 1 C_CND… 2022-01-02 Geraldine Male 13500 Buddy Stor… Ford Expe…
## 2 C_CND… 2022-01-02 Gia Male 1480000 C & M Moto… Dodge Dura…
## 3 C_CND… 2022-01-02 Gianna Male 1035000 Capitol KIA Cadill… Eldo…
## 4 C_CND… 2022-01-02 Giselle Male 13500 Chrysler o… Toyota Celi…
## 5 C_CND… 2022-01-02 Grace Male 1465000 Chrysler P… Acura TL
## 6 C_CND… 2022-01-02 Guadalupe Male 850000 Classic Ch… Mitsub… Diam…
## # ℹ 9 more variables: engine <chr>, transmission <chr>, color <chr>,
## # price <dbl>, dealer_no <chr>, body_style <chr>, phone <dbl>,
## # dealer_region <chr>, month <date>
Ringkasan Data
summary_data <- df %>%
summarise(
total_transaksi = n(),
total_sales = sum(price, na.rm = TRUE),
rata_rata_harga = mean(price, na.rm = TRUE),
median_harga = median(price, na.rm = TRUE),
jumlah_brand = n_distinct(company),
jumlah_region = n_distinct(dealer_region)
)
summary_data
## # A tibble: 1 × 6
## total_transaksi total_sales rata_rata_harga median_harga jumlah_brand
## <int> <dbl> <dbl> <dbl> <int>
## 1 23906 671525465 28090. 23000 30
## # ℹ 1 more variable: jumlah_region <int>
Visualisasi 1 — Tren Total Penjualan per Bulan
sales_month <- df %>%
group_by(month) %>%
summarise(
total_sales = sum(price, na.rm = TRUE),
jumlah_unit = n(),
.groups = "drop"
)
ggplot(sales_month, aes(x = month, y = total_sales)) +
geom_line(linewidth = 1) +
geom_point(size = 2) +
scale_y_continuous(labels = dollar) +
labs(
title = "Tren Total Penjualan Mobil per Bulan",
x = "Bulan",
y = "Total Sales"
)

Visualisasi 2 — Top 10 Brand dengan Total Sales Tertinggi
top_company <- df %>%
group_by(company) %>%
summarise(
total_sales = sum(price, na.rm = TRUE),
jumlah_unit = n(),
.groups = "drop"
) %>%
arrange(desc(total_sales)) %>%
slice_head(n = 10)
ggplot(top_company, aes(x = reorder(company, total_sales), y = total_sales)) +
geom_col() +
coord_flip() +
scale_y_continuous(labels = dollar) +
labs(
title = "Top 10 Brand Mobil Berdasarkan Total Sales",
x = "Brand Mobil",
y = "Total Sales"
)

Visualisasi 3 — Distribusi Harga Mobil
ggplot(df, aes(x = price)) +
geom_histogram(bins = 30, color = "white") +
scale_x_continuous(labels = dollar) +
labs(
title = "Distribusi Harga Mobil",
x = "Harga Mobil",
y = "Jumlah Transaksi"
)

Visualisasi 4 — Perbandingan Harga Mobil Berdasarkan Body Style
ggplot(df, aes(x = reorder(body_style, price, median), y = price)) +
geom_boxplot(outlier.alpha = 0.3) +
coord_flip() +
scale_y_continuous(labels = dollar) +
labs(
title = "Perbandingan Harga Mobil Berdasarkan Body Style",
x = "Body Style",
y = "Harga Mobil"
)

Visualisasi 5 — Proporsi Body Style Berdasarkan Jenis Transmisi
transmission_body <- df %>%
count(transmission, body_style)
ggplot(transmission_body, aes(x = transmission, y = n, fill = body_style)) +
geom_col(position = "fill") +
scale_y_continuous(labels = percent) +
labs(
title = "Proporsi Body Style Berdasarkan Jenis Transmisi",
x = "Transmisi",
y = "Proporsi",
fill = "Body Style"
)

Visualisasi 6 — Hubungan Annual Income dan Harga Mobil
ggplot(df, aes(x = annual_income, y = price, color = body_style)) +
geom_point(alpha = 0.45) +
scale_x_continuous(labels = dollar) +
scale_y_continuous(labels = dollar) +
labs(
title = "Hubungan Annual Income dan Harga Mobil",
x = "Annual Income",
y = "Harga Mobil",
color = "Body Style"
)
