df <- read_excel("C:/Users/User/Downloads/car sales.xlsx")
df <- df %>%
mutate(
Year = year(Date),
Month = month(Date, label = TRUE, abbr = TRUE),
Month_num = month(Date)
)
str(df)
## tibble [23,906 × 19] (S3: tbl_df/tbl/data.frame)
## $ Car_id : chr [1:23906] "C_CND_000001" "C_CND_000002" "C_CND_000003" "C_CND_000004" ...
## $ Date : POSIXct[1:23906], format: "2022-01-02" "2022-01-02" ...
## $ Customer Name: chr [1:23906] "Geraldine" "Gia" "Gianna" "Giselle" ...
## $ Gender : chr [1:23906] "Male" "Male" "Male" "Male" ...
## $ Annual Income: num [1:23906] 13500 1480000 1035000 13500 1465000 ...
## $ Dealer_Name : chr [1:23906] "Buddy Storbeck's Diesel Service Inc" "C & M Motors Inc" "Capitol KIA" "Chrysler of Tri-Cities" ...
## $ Company : chr [1:23906] "Ford" "Dodge" "Cadillac" "Toyota" ...
## $ Model : chr [1:23906] "Expedition" "Durango" "Eldorado" "Celica" ...
## $ Engine : chr [1:23906] "Double Overhead Camshaft" "Double Overhead Camshaft" "Overhead Camshaft" "Overhead Camshaft" ...
## $ Transmission : chr [1:23906] "Auto" "Auto" "Manual" "Manual" ...
## $ Color : chr [1:23906] "Black" "Black" "Red" "Pale White" ...
## $ Price ($) : num [1:23906] 26000 19000 31500 14000 24500 12000 14000 42000 82000 15000 ...
## $ Dealer_No : chr [1:23906] "06457-3834" "60504-7114" "38701-8047" "99301-3882" ...
## $ Body Style : chr [1:23906] "SUV" "SUV" "Passenger" "SUV" ...
## $ Phone : num [1:23906] 8264678 6848189 7298798 6257557 7081483 ...
## $ Dealer_Region: chr [1:23906] "Middletown" "Aurora" "Greenville" "Pasco" ...
## $ Year : num [1:23906] 2022 2022 2022 2022 2022 ...
## $ Month : Ord.factor w/ 12 levels "Jan"<"Feb"<"Mar"<..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Month_num : num [1:23906] 1 1 1 1 1 1 1 1 1 1 ...
head(df)
## # A tibble: 6 × 19
## Car_id Date `Customer Name` Gender `Annual Income` Dealer_Name
## <chr> <dttm> <chr> <chr> <dbl> <chr>
## 1 C_CND_… 2022-01-02 00:00:00 Geraldine Male 13500 Buddy Stor…
## 2 C_CND_… 2022-01-02 00:00:00 Gia Male 1480000 C & M Moto…
## 3 C_CND_… 2022-01-02 00:00:00 Gianna Male 1035000 Capitol KIA
## 4 C_CND_… 2022-01-02 00:00:00 Giselle Male 13500 Chrysler o…
## 5 C_CND_… 2022-01-02 00:00:00 Grace Male 1465000 Chrysler P…
## 6 C_CND_… 2022-01-02 00:00:00 Guadalupe Male 850000 Classic Ch…
## # ℹ 13 more variables: Company <chr>, Model <chr>, Engine <chr>,
## # Transmission <chr>, Color <chr>, `Price ($)` <dbl>, Dealer_No <chr>,
## # `Body Style` <chr>, Phone <dbl>, Dealer_Region <chr>, Year <dbl>,
## # Month <ord>, Month_num <dbl>
df_tren <- df %>%
group_by(Year, Month_num, Month) %>%
summarise(Total_Revenue = sum(`Price ($)`, na.rm = TRUE),
Jumlah_Transaksi = n(), .groups = "drop")
ggplot(df_tren, aes(x = Month_num, y = Total_Revenue / 1e6,
color = factor(Year), group = factor(Year))) +
geom_line(linewidth = 1.2) +
geom_point(size = 2.5) +
scale_x_continuous(breaks = 1:12,
labels = c("Jan","Feb","Mar","Apr","Mei","Jun",
"Jul","Agt","Sep","Okt","Nov","Des")) +
scale_color_manual(values = c("2022" = "blue", "2023" = "red")) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(
title = "Tren Penjualan Bulanan per Tahun",
subtitle = "Total pendapatan penjualan mobil (Juta USD)",
x = "Bulan",
y = "Revenue (Juta USD)",
color = "Tahun"
)
## Top 10 Perusahaan Berdasarkan Revenue
df_company <- df %>%
group_by(Company) %>%
summarise(Total_Revenue = sum(`Price ($)`, na.rm = TRUE),
Jumlah = n(), .groups = "drop") %>%
arrange(desc(Total_Revenue)) %>%
slice_head(n = 10)
ggplot(df_company, aes(x = reorder(Company, Total_Revenue),
y = Total_Revenue / 1e6, fill = Total_Revenue)) +
geom_col() +
geom_text(aes(label = paste0("$", round(Total_Revenue / 1e6, 1), "M")),
hjust = -0.1, size = 3.2) +
coord_flip() +
scale_fill_gradient(low = "yellow", high = "darkgreen") +
theme_minimal() +
theme(legend.position = "none") +
labs(
title = "Top 10 Perusahaan Berdasarkan Total Revenue",
subtitle = "Periode 2022-2023",
x = "Perusahaan",
y = "Total Revenue (Juta USD)"
)
## Distribusi Penjualan per Body Style
df_body <- df %>%
group_by(`Body Style`) %>%
summarise(Total_Revenue = sum(`Price ($)`, na.rm = TRUE),
Jumlah = n(), .groups = "drop") %>%
mutate(Persen = round(Jumlah / sum(Jumlah) * 100, 1))
ggplot(df_body, aes(x = reorder(`Body Style`, -Total_Revenue),
y = Total_Revenue / 1e6,
fill = `Body Style`)) +
geom_col() +
geom_text(aes(label = paste0(Persen, "%\n(", format(Jumlah, big.mark=","), " unit)")),
vjust = -0.3, size = 3) +
scale_fill_manual(values = c(
"SUV" = "blue",
"Hatchback" = "darkgreen",
"Sedan" = "maroon",
"Passenger" = "orange",
"Hardtop" = "purple"
)) +
theme_minimal() +
theme(legend.position = "none") +
labs(
title = "Distribusi Penjualan per Body Style",
subtitle = "Berdasarkan total revenue dan persentase unit terjual",
x = "Body Style",
y = "Total Revenue (Juta USD)",
)
## Perbandingan Penjualan berdasarkan Gender dan Transmisi
df_gt <- df %>%
group_by(Gender, Transmission) %>%
summarise(Total_Revenue = sum(`Price ($)`, na.rm = TRUE),
Jumlah = n(), .groups = "drop")
ggplot(df_gt, aes(x = Gender, y = Total_Revenue / 1e6,
fill = Transmission)) +
geom_col(position = "dodge") +
geom_text(aes(label = paste0("$", round(Total_Revenue / 1e6, 1), "M")),
position = position_dodge(width = 0.9),
vjust = -0.4, size = 3.5) +
scale_fill_manual(values = c("Auto" = "blue", "Manual" = "red")) +
theme_minimal() +
labs(
title = "Perbandingan Revenue berdasarkan Gender dan Jenis Transmisi",
subtitle = "Auto vs Manual — Pria vs Wanita",
x = "Gender",
y = "Total Revenue (Juta USD)",
fill = "Transmisi"
)
## Boxplot Distribusi Harga per Body Style
ggplot(df, aes(x = reorder(`Body Style`, `Price ($)`, median),
y = `Price ($)` / 1000,
fill = `Body Style`)) +
geom_boxplot(alpha = 0.8, outlier.size = 0.8, outlier.alpha = 0.4) +
scale_fill_manual(values = c(
"SUV" = "blue",
"Hatchback" = "darkgreen",
"Sedan" = "red",
"Passenger" = "orange",
"Hardtop" = "purple"
)) +
theme_minimal() +
theme(legend.position = "none") +
labs(
title = "Distribusi Harga Mobil per Body Style",
subtitle = "Median, kuartil, dan outlier harga (ribu USD)",
x = "Body Style",
y = "Harga (Ribu USD)"
)
## Performa Revenue per Dealer Region
df_region <- df %>%
group_by(Dealer_Region) %>%
summarise(
Total_Revenue = sum(`Price ($)`, na.rm = TRUE),
Jumlah_Transaksi = n(),
Avg_Price = mean(`Price ($)`, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(desc(Total_Revenue)) %>%
mutate(Kategori = ifelse(Total_Revenue > mean(Total_Revenue),
"Di atas rata-rata", "Di bawah rata-rata"))
ggplot(df_region, aes(x = reorder(Dealer_Region, Total_Revenue),
y = Total_Revenue / 1e6,
fill = Kategori)) +
geom_col() +
geom_text(aes(label = paste0("$", round(Total_Revenue / 1e6, 1), "M")),
hjust = -0.1, size = 3.5) +
coord_flip() +
scale_fill_manual(values = c(
"Di atas rata-rata" = "navy",
"Di bawah rata-rata" = "lightblue"
)) +
theme_minimal() +
labs(
title = "Performa Revenue per Dealer Region",
subtitle = "Warna biru tua = di atas rata-rata regional",
x = "Dealer Region",
y = "Total Revenue (Juta USD)",
fill = "Kategori"
)