Import Data

library(readxl)
df <- read_excel("C:/Users/Gusde/Downloads/car sales.xlsx")
View(df)

names(df) <- c(
  "car_id", "date", "customer_name", "gender", "annual_income",
  "dealer_name", "company", "model", "engine", "transmission",
  "color", "price", "dealer_no", "body_style", "phone", "dealer_region"
)

if (is.numeric(df$date)) {
  df$date <- as.Date(df$date, origin = "1899-12-30")
} else {
  df$date <- as.Date(df$date)
}

df <- df %>%
  mutate(
    price = as.numeric(price),
    annual_income = as.numeric(annual_income),
    month = floor_date(date, "month")
  ) %>%
  filter(!is.na(price))

head(df)
## # A tibble: 6 × 17
##   car_id date       customer_name gender annual_income dealer_name company model
##   <chr>  <date>     <chr>         <chr>          <dbl> <chr>       <chr>   <chr>
## 1 C_CND… 2022-01-02 Geraldine     Male           13500 Buddy Stor… Ford    Expe…
## 2 C_CND… 2022-01-02 Gia           Male         1480000 C & M Moto… Dodge   Dura…
## 3 C_CND… 2022-01-02 Gianna        Male         1035000 Capitol KIA Cadill… Eldo…
## 4 C_CND… 2022-01-02 Giselle       Male           13500 Chrysler o… Toyota  Celi…
## 5 C_CND… 2022-01-02 Grace         Male         1465000 Chrysler P… Acura   TL   
## 6 C_CND… 2022-01-02 Guadalupe     Male          850000 Classic Ch… Mitsub… Diam…
## # ℹ 9 more variables: engine <chr>, transmission <chr>, color <chr>,
## #   price <dbl>, dealer_no <chr>, body_style <chr>, phone <dbl>,
## #   dealer_region <chr>, month <date>

Ringkasan Data

summary_data <- df %>%
  summarise(
    total_transaksi = n(),
    total_sales = sum(price, na.rm = TRUE),
    rata_rata_harga = mean(price, na.rm = TRUE),
    median_harga = median(price, na.rm = TRUE),
    jumlah_brand = n_distinct(company),
    jumlah_region = n_distinct(dealer_region)
  )

summary_data
## # A tibble: 1 × 6
##   total_transaksi total_sales rata_rata_harga median_harga jumlah_brand
##             <int>       <dbl>           <dbl>        <dbl>        <int>
## 1           23906   671525465          28090.        23000           30
## # ℹ 1 more variable: jumlah_region <int>

Visualisasi 1 — Tren Total Penjualan per Bulan

sales_month <- df %>%
  group_by(month) %>%
  summarise(
    total_sales = sum(price, na.rm = TRUE),
    jumlah_unit = n(),
    .groups = "drop"
  )

ggplot(sales_month, aes(x = month, y = total_sales)) +
  geom_line(linewidth = 1) +
  geom_point(size = 2) +
  scale_y_continuous(labels = dollar) +
  labs(
    title = "Tren Total Penjualan Mobil per Bulan",
    x = "Bulan",
    y = "Total Sales"
  )

Visualisasi 2 — Top 10 Brand dengan Total Sales Tertinggi

top_company <- df %>%
  group_by(company) %>%
  summarise(
    total_sales = sum(price, na.rm = TRUE),
    jumlah_unit = n(),
    .groups = "drop"
  ) %>%
  arrange(desc(total_sales)) %>%
  slice_head(n = 10)

ggplot(top_company, aes(x = reorder(company, total_sales), y = total_sales)) +
  geom_col() +
  coord_flip() +
  scale_y_continuous(labels = dollar) +
  labs(
    title = "Top 10 Brand Mobil Berdasarkan Total Sales",
    x = "Brand Mobil",
    y = "Total Sales"
  )

Visualisasi 3 — Distribusi Harga Mobil

ggplot(df, aes(x = price)) +
  geom_histogram(bins = 30, color = "white") +
  scale_x_continuous(labels = dollar) +
  labs(
    title = "Distribusi Harga Mobil",
    x = "Harga Mobil",
    y = "Jumlah Transaksi"
  )

Visualisasi 4 — Perbandingan Harga Mobil Berdasarkan Body Style

ggplot(df, aes(x = reorder(body_style, price, median), y = price)) +
  geom_boxplot(outlier.alpha = 0.3) +
  coord_flip() +
  scale_y_continuous(labels = dollar) +
  labs(
    title = "Perbandingan Harga Mobil Berdasarkan Body Style",
    x = "Body Style",
    y = "Harga Mobil"
  )

Visualisasi 5 — Proporsi Body Style Berdasarkan Jenis Transmisi

transmission_body <- df %>%
  count(transmission, body_style)

ggplot(transmission_body, aes(x = transmission, y = n, fill = body_style)) +
  geom_col(position = "fill") +
  scale_y_continuous(labels = percent) +
  labs(
    title = "Proporsi Body Style Berdasarkan Jenis Transmisi",
    x = "Transmisi",
    y = "Proporsi",
    fill = "Body Style"
  )

Visualisasi 6 — Hubungan Annual Income dan Harga Mobil

ggplot(df, aes(x = annual_income, y = price, color = body_style)) +
  geom_point(alpha = 0.45) +
  scale_x_continuous(labels = dollar) +
  scale_y_continuous(labels = dollar) +
  labs(
    title = "Hubungan Annual Income dan Harga Mobil",
    x = "Annual Income",
    y = "Harga Mobil",
    color = "Body Style"
  )