##INPUT DATA

head(datakuis)
## # A tibble: 6 × 16
##   Car_id  Date                `Customer Name` Gender `Annual Income` Dealer_Name
##   <chr>   <dttm>              <chr>           <chr>            <dbl> <chr>      
## 1 C_CND_… 2022-01-02 00:00:00 Geraldine       Male             13500 Buddy Stor…
## 2 C_CND_… 2022-01-02 00:00:00 Gia             Male           1480000 C & M Moto…
## 3 C_CND_… 2022-01-02 00:00:00 Gianna          Male           1035000 Capitol KIA
## 4 C_CND_… 2022-01-02 00:00:00 Giselle         Male             13500 Chrysler o…
## 5 C_CND_… 2022-01-02 00:00:00 Grace           Male           1465000 Chrysler P…
## 6 C_CND_… 2022-01-02 00:00:00 Guadalupe       Male            850000 Classic Ch…
## # ℹ 10 more variables: Company <chr>, Model <chr>, Engine <chr>,
## #   Transmission <chr>, Color <chr>, `Price ($)` <dbl>, Dealer_No <chr>,
## #   `Body Style` <chr>, Phone <dbl>, Dealer_Region <chr>
# Ubah format tanggal
datakuis2 <- datakuis %>%
  mutate(
    Tanggal = ym(Date),
    Tahun = year(Date),
    Bulan = month(Date, label = TRUE, abbr = TRUE)
  )

# Rename Nama Kolom
datakuis2 <- datakuis %>%
  rename(
    income= `Annual Income`,
    body_style=`Body Style`,
    price=`Price ($)`,
  )

# Cek data kosong
colSums(is.na(datakuis2))
##        Car_id          Date Customer Name        Gender        income 
##             0             0             0             0             0 
##   Dealer_Name       Company         Model        Engine  Transmission 
##             0             0             0             0             0 
##         Color         price     Dealer_No    body_style         Phone 
##             0             0             0             0             0 
## Dealer_Region 
##             0
statistik_income <- datakuis2 %>%
  group_by(Gender) %>%
  summarise(
    rata2_income = mean(income, na.rm = TRUE),
    sd_income = sd(income, na.rm = TRUE),
    min_income = min(income, na.rm = TRUE),
    max_income = max(income, na.rm = TRUE),
  ) %>%
  arrange(desc(rata2_income))

print(statistik_income)
## # A tibble: 2 × 5
##   Gender rata2_income sd_income min_income max_income
##   <chr>         <dbl>     <dbl>      <dbl>      <dbl>
## 1 Male        851184.   729096.      10080   11200000
## 2 Female      755973.   680366.      13500    6460000

BOXPLOT PEMASUKAN

ggplot(datakuis2, aes(x = reorder(Gender,income, median), y = income)) +
  geom_boxplot(outlier.color = "red", outlier.size = 2) +
  coord_flip() +
  labs(
    title = "Pemasukkan Berdasarkan Gender",
    x = "Gender",
    y = "Pemasukkan"
  ) +
  scale_y_continuous(labels = comma) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold"),
    axis.text.y = element_text(size = 7)
  )

CEK OUTLIER DARI PEMASUKAN

outlier_pemasukkan <- datakuis2 %>%
  group_by(Gender) %>%
  mutate(
    Q1 = quantile(income, 0.25, na.rm = TRUE),
    Q3 = quantile(income, 0.75, na.rm = TRUE),
    IQR_income = Q3 - Q1,
    batas_bawah = Q1 - 1.5 * IQR_income,
    batas_atas = Q3 + 1.5 * IQR_income,
    status_outlier = ifelse(
      income < batas_bawah | income > batas_atas,
      "Outlier",
      "Normal"
    )
  ) %>%
  ungroup()

jumlah_outlier <- outlier_pemasukkan %>%
  filter(status_outlier == "Outlier") %>%
  group_by(Gender) %>%
  summarise(jumlah_outlier = n()) %>%
  arrange(desc(jumlah_outlier))

print(jumlah_outlier)
## # A tibble: 2 × 2
##   Gender jumlah_outlier
##   <chr>           <int>
## 1 Male              601
## 2 Female            233
ggplot(datakuis2, aes(x =body_style , y = price, color = Gender)) +
  geom_line(linewidth = 1.2) +
  geom_point(size = 1.8) +
  scale_color_viridis_d(option = "magma")+
  scale_y_continuous(labels = comma) +
  labs(
    title = "Daya Beli dan Minat Beli Berdasarkan Gender",
 
    x = "Tipe Kendaraan",
    y = "Rata-Rata Pemasukan",
    color = "Gender",
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 16),
    plot.subtitle = element_text(size = 11),
    legend.position = "bottom"
  )

# Filter Perusahaan
perusahaan_pilihan <- c("BMW")

data_ts <- datakuis2 %>%
  filter(Company %in% perusahaan_pilihan)

# Time series harga beras
ggplot(data_ts, aes(x = Date, y = price, color = Company)) +
  geom_line(linewidth = 1) +
  geom_point(size = 2) +
  labs(
    title = "Nilai Mobil yang Terjual",
    subtitle = "Pada Perusahaan BMW",
    x = "Tanggal",
    y = "Harga Mobil",
    color = "Perusahaan"
  ) +
  scale_y_continuous(labels = comma) +
  theme_minimal() +
  theme(plot.title = element_text(face = "bold"))

# Filter Perusahaan
perusahaan_pilihan2 <- c("Toyota")

data_ts2 <- datakuis2 %>%
  filter(Company %in% perusahaan_pilihan2)

# Time series harga beras
ggplot(data_ts2, aes(x = Date, y = price, color = Company)) +
  geom_line(linewidth = 1) +
  geom_point(size = 2) +
  labs(
    title = "Nilai Mobil yang Terjual",
    subtitle = "Pada Perusahaan Toyota",
    x = "Tanggal",
    y = "Harga Mobil",
    color = "Perusahaan"
  ) +
  scale_y_continuous(labels = comma) +
  theme_minimal() +
  theme(plot.title = element_text(face = "bold"))

# Filter Perusahaan
perusahaan_pilihan3 <- c("Acura")

data_ts3 <- datakuis2 %>%
  filter(Company %in% perusahaan_pilihan3)

# Time series harga beras
ggplot(data_ts3, aes(x = Date, y = price, color = Company)) +
  geom_line(linewidth = 1) +
  geom_point(size = 2) +
  labs(
    title = "Nilai Mobil yang Terjual",
    subtitle = "Pada Perusahaan Acura",
    x = "Tanggal",
    y = "Harga Mobil",
    color = "Perusahaan"
  ) +
  scale_y_continuous(labels = comma) +
  theme_minimal() +
  theme(plot.title = element_text(face = "bold"))