Persiapan Data

df <- read_excel("C:/Users/User/Downloads/car sales.xlsx")

df <- df %>%
  mutate(
    Year       = year(Date),
    Month      = month(Date, label = TRUE, abbr = TRUE),
    Month_num  = month(Date)
  )

str(df)
## tibble [23,906 × 19] (S3: tbl_df/tbl/data.frame)
##  $ Car_id       : chr [1:23906] "C_CND_000001" "C_CND_000002" "C_CND_000003" "C_CND_000004" ...
##  $ Date         : POSIXct[1:23906], format: "2022-01-02" "2022-01-02" ...
##  $ Customer Name: chr [1:23906] "Geraldine" "Gia" "Gianna" "Giselle" ...
##  $ Gender       : chr [1:23906] "Male" "Male" "Male" "Male" ...
##  $ Annual Income: num [1:23906] 13500 1480000 1035000 13500 1465000 ...
##  $ Dealer_Name  : chr [1:23906] "Buddy Storbeck's Diesel Service Inc" "C & M Motors Inc" "Capitol KIA" "Chrysler of Tri-Cities" ...
##  $ Company      : chr [1:23906] "Ford" "Dodge" "Cadillac" "Toyota" ...
##  $ Model        : chr [1:23906] "Expedition" "Durango" "Eldorado" "Celica" ...
##  $ Engine       : chr [1:23906] "Double Overhead Camshaft" "Double Overhead Camshaft" "Overhead Camshaft" "Overhead Camshaft" ...
##  $ Transmission : chr [1:23906] "Auto" "Auto" "Manual" "Manual" ...
##  $ Color        : chr [1:23906] "Black" "Black" "Red" "Pale White" ...
##  $ Price ($)    : num [1:23906] 26000 19000 31500 14000 24500 12000 14000 42000 82000 15000 ...
##  $ Dealer_No    : chr [1:23906] "06457-3834" "60504-7114" "38701-8047" "99301-3882" ...
##  $ Body Style   : chr [1:23906] "SUV" "SUV" "Passenger" "SUV" ...
##  $ Phone        : num [1:23906] 8264678 6848189 7298798 6257557 7081483 ...
##  $ Dealer_Region: chr [1:23906] "Middletown" "Aurora" "Greenville" "Pasco" ...
##  $ Year         : num [1:23906] 2022 2022 2022 2022 2022 ...
##  $ Month        : Ord.factor w/ 12 levels "Jan"<"Feb"<"Mar"<..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Month_num    : num [1:23906] 1 1 1 1 1 1 1 1 1 1 ...
head(df)
## # A tibble: 6 × 19
##   Car_id  Date                `Customer Name` Gender `Annual Income` Dealer_Name
##   <chr>   <dttm>              <chr>           <chr>            <dbl> <chr>      
## 1 C_CND_… 2022-01-02 00:00:00 Geraldine       Male             13500 Buddy Stor…
## 2 C_CND_… 2022-01-02 00:00:00 Gia             Male           1480000 C & M Moto…
## 3 C_CND_… 2022-01-02 00:00:00 Gianna          Male           1035000 Capitol KIA
## 4 C_CND_… 2022-01-02 00:00:00 Giselle         Male             13500 Chrysler o…
## 5 C_CND_… 2022-01-02 00:00:00 Grace           Male           1465000 Chrysler P…
## 6 C_CND_… 2022-01-02 00:00:00 Guadalupe       Male            850000 Classic Ch…
## # ℹ 13 more variables: Company <chr>, Model <chr>, Engine <chr>,
## #   Transmission <chr>, Color <chr>, `Price ($)` <dbl>, Dealer_No <chr>,
## #   `Body Style` <chr>, Phone <dbl>, Dealer_Region <chr>, Year <dbl>,
## #   Month <ord>, Month_num <dbl>

Tren Penjualan Bulanan per Tahun

df_tren <- df %>%
  group_by(Year, Month_num, Month) %>%
  summarise(Total_Revenue = sum(`Price ($)`, na.rm = TRUE),
            Jumlah_Transaksi = n(), .groups = "drop")

ggplot(df_tren, aes(x = Month_num, y = Total_Revenue / 1e6,
                    color = factor(Year), group = factor(Year))) +
  geom_line(linewidth = 1.2) +
  geom_point(size = 2.5) +
  scale_x_continuous(breaks = 1:12,
                     labels = c("Jan","Feb","Mar","Apr","Mei","Jun",
                                "Jul","Agt","Sep","Okt","Nov","Des")) +
  scale_color_manual(values = c("2022" = "blue", "2023" = "red")) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(
    title   = "Tren Penjualan Bulanan per Tahun",
    subtitle = "Total pendapatan penjualan mobil (Juta USD)",
    x       = "Bulan",
    y       = "Revenue (Juta USD)",
    color   = "Tahun"
  )

## Top 10 Perusahaan Berdasarkan Revenue

df_company <- df %>%
  group_by(Company) %>%
  summarise(Total_Revenue = sum(`Price ($)`, na.rm = TRUE),
            Jumlah = n(), .groups = "drop") %>%
  arrange(desc(Total_Revenue)) %>%
  slice_head(n = 10)

ggplot(df_company, aes(x = reorder(Company, Total_Revenue),
                       y = Total_Revenue / 1e6, fill = Total_Revenue)) +
  geom_col() +
  geom_text(aes(label = paste0("$", round(Total_Revenue / 1e6, 1), "M")),
            hjust = -0.1, size = 3.2) +
  coord_flip() +
  scale_fill_gradient(low = "yellow", high = "darkgreen") +
  theme_minimal() +
  theme(legend.position = "none") +
  labs(
    title   = "Top 10 Perusahaan Berdasarkan Total Revenue",
    subtitle = "Periode 2022-2023",
    x       = "Perusahaan",
    y       = "Total Revenue (Juta USD)"
  )

## Distribusi Penjualan per Body Style

df_body <- df %>%
  group_by(`Body Style`) %>%
  summarise(Total_Revenue = sum(`Price ($)`, na.rm = TRUE),
            Jumlah = n(), .groups = "drop") %>%
  mutate(Persen = round(Jumlah / sum(Jumlah) * 100, 1))

ggplot(df_body, aes(x = reorder(`Body Style`, -Total_Revenue),
                    y = Total_Revenue / 1e6,
                    fill = `Body Style`)) +
  geom_col() +
  geom_text(aes(label = paste0(Persen, "%\n(", format(Jumlah, big.mark=","), " unit)")),
            vjust = -0.3, size = 3) +
  scale_fill_manual(values = c(
    "SUV"       = "blue",
    "Hatchback" = "darkgreen",
    "Sedan"     = "maroon",
    "Passenger" = "orange",
    "Hardtop"   = "purple"
  )) +
  theme_minimal() +
  theme(legend.position = "none") +
  labs(
    title   = "Distribusi Penjualan per Body Style",
    subtitle = "Berdasarkan total revenue dan persentase unit terjual",
    x       = "Body Style",
    y       = "Total Revenue (Juta USD)",
  )

## Perbandingan Penjualan berdasarkan Gender dan Transmisi

df_gt <- df %>%
  group_by(Gender, Transmission) %>%
  summarise(Total_Revenue = sum(`Price ($)`, na.rm = TRUE),
            Jumlah = n(), .groups = "drop")

ggplot(df_gt, aes(x = Gender, y = Total_Revenue / 1e6,
                  fill = Transmission)) +
  geom_col(position = "dodge") +
  geom_text(aes(label = paste0("$", round(Total_Revenue / 1e6, 1), "M")),
            position = position_dodge(width = 0.9),
            vjust = -0.4, size = 3.5) +
  scale_fill_manual(values = c("Auto" = "blue", "Manual" = "red")) +
  theme_minimal() +
  labs(
    title   = "Perbandingan Revenue berdasarkan Gender dan Jenis Transmisi",
    subtitle = "Auto vs Manual — Pria vs Wanita",
    x       = "Gender",
    y       = "Total Revenue (Juta USD)",
    fill    = "Transmisi"
  )

## Boxplot Distribusi Harga per Body Style

ggplot(df, aes(x = reorder(`Body Style`, `Price ($)`, median),
               y = `Price ($)` / 1000,
               fill = `Body Style`)) +
  geom_boxplot(alpha = 0.8, outlier.size = 0.8, outlier.alpha = 0.4) +
  scale_fill_manual(values = c(
    "SUV"       = "blue",
    "Hatchback" = "darkgreen",
    "Sedan"     = "red",
    "Passenger" = "orange",
    "Hardtop"   = "purple"
  )) +
  theme_minimal() +
  theme(legend.position = "none") +
  labs(
    title   = "Distribusi Harga Mobil per Body Style",
    subtitle = "Median, kuartil, dan outlier harga (ribu USD)",
    x       = "Body Style",
    y       = "Harga (Ribu USD)"
  )

## Performa Revenue per Dealer Region

df_region <- df %>%
  group_by(Dealer_Region) %>%
  summarise(
    Total_Revenue    = sum(`Price ($)`, na.rm = TRUE),
    Jumlah_Transaksi = n(),
    Avg_Price        = mean(`Price ($)`, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  arrange(desc(Total_Revenue)) %>%
  mutate(Kategori = ifelse(Total_Revenue > mean(Total_Revenue),
                           "Di atas rata-rata", "Di bawah rata-rata"))

ggplot(df_region, aes(x = reorder(Dealer_Region, Total_Revenue),
                      y = Total_Revenue / 1e6,
                      fill = Kategori)) +
  geom_col() +
  geom_text(aes(label = paste0("$", round(Total_Revenue / 1e6, 1), "M")),
            hjust = -0.1, size = 3.5) +
  coord_flip() +
  scale_fill_manual(values = c(
    "Di atas rata-rata"  = "navy",
    "Di bawah rata-rata" = "lightblue"
  )) +
  theme_minimal() +
  labs(
    title   = "Performa Revenue per Dealer Region",
    subtitle = "Warna biru tua = di atas rata-rata regional",
    x       = "Dealer Region",
    y       = "Total Revenue (Juta USD)",
    fill    = "Kategori"
  )

DashBoard