##INPUT DATA
head(datakuis)
## # A tibble: 6 × 16
## Car_id Date `Customer Name` Gender `Annual Income` Dealer_Name
## <chr> <dttm> <chr> <chr> <dbl> <chr>
## 1 C_CND_… 2022-01-02 00:00:00 Geraldine Male 13500 Buddy Stor…
## 2 C_CND_… 2022-01-02 00:00:00 Gia Male 1480000 C & M Moto…
## 3 C_CND_… 2022-01-02 00:00:00 Gianna Male 1035000 Capitol KIA
## 4 C_CND_… 2022-01-02 00:00:00 Giselle Male 13500 Chrysler o…
## 5 C_CND_… 2022-01-02 00:00:00 Grace Male 1465000 Chrysler P…
## 6 C_CND_… 2022-01-02 00:00:00 Guadalupe Male 850000 Classic Ch…
## # ℹ 10 more variables: Company <chr>, Model <chr>, Engine <chr>,
## # Transmission <chr>, Color <chr>, `Price ($)` <dbl>, Dealer_No <chr>,
## # `Body Style` <chr>, Phone <dbl>, Dealer_Region <chr>
# Ubah format tanggal
datakuis2 <- datakuis %>%
mutate(
Tanggal = ym(Date),
Tahun = year(Date),
Bulan = month(Date, label = TRUE, abbr = TRUE)
)
# Rename Nama Kolom
datakuis2 <- datakuis %>%
rename(
income= `Annual Income`,
body_style=`Body Style`,
price=`Price ($)`,
)
# Cek data kosong
colSums(is.na(datakuis2))
## Car_id Date Customer Name Gender income
## 0 0 0 0 0
## Dealer_Name Company Model Engine Transmission
## 0 0 0 0 0
## Color price Dealer_No body_style Phone
## 0 0 0 0 0
## Dealer_Region
## 0
statistik_income <- datakuis2 %>%
group_by(Gender) %>%
summarise(
rata2_income = mean(income, na.rm = TRUE),
sd_income = sd(income, na.rm = TRUE),
min_income = min(income, na.rm = TRUE),
max_income = max(income, na.rm = TRUE),
) %>%
arrange(desc(rata2_income))
print(statistik_income)
## # A tibble: 2 × 5
## Gender rata2_income sd_income min_income max_income
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Male 851184. 729096. 10080 11200000
## 2 Female 755973. 680366. 13500 6460000
ggplot(datakuis2, aes(x = reorder(Gender,income, median), y = income)) +
geom_boxplot(outlier.color = "red", outlier.size = 2) +
coord_flip() +
labs(
title = "Pemasukkan Berdasarkan Gender",
x = "Gender",
y = "Pemasukkan"
) +
scale_y_continuous(labels = comma) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold"),
axis.text.y = element_text(size = 7)
)
outlier_pemasukkan <- datakuis2 %>%
group_by(Gender) %>%
mutate(
Q1 = quantile(income, 0.25, na.rm = TRUE),
Q3 = quantile(income, 0.75, na.rm = TRUE),
IQR_income = Q3 - Q1,
batas_bawah = Q1 - 1.5 * IQR_income,
batas_atas = Q3 + 1.5 * IQR_income,
status_outlier = ifelse(
income < batas_bawah | income > batas_atas,
"Outlier",
"Normal"
)
) %>%
ungroup()
jumlah_outlier <- outlier_pemasukkan %>%
filter(status_outlier == "Outlier") %>%
group_by(Gender) %>%
summarise(jumlah_outlier = n()) %>%
arrange(desc(jumlah_outlier))
print(jumlah_outlier)
## # A tibble: 2 × 2
## Gender jumlah_outlier
## <chr> <int>
## 1 Male 601
## 2 Female 233
ggplot(datakuis2, aes(x =body_style , y = price, color = Gender)) +
geom_line(linewidth = 1.2) +
geom_point(size = 1.8) +
scale_color_viridis_d(option = "magma")+
scale_y_continuous(labels = comma) +
labs(
title = "Daya Beli dan Minat Beli Berdasarkan Gender",
x = "Tipe Kendaraan",
y = "Rata-Rata Pemasukan",
color = "Gender",
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(size = 11),
legend.position = "bottom"
)
# Filter Perusahaan
perusahaan_pilihan <- c("BMW")
data_ts <- datakuis2 %>%
filter(Company %in% perusahaan_pilihan)
# Time series harga beras
ggplot(data_ts, aes(x = Date, y = price, color = Company)) +
geom_line(linewidth = 1) +
geom_point(size = 2) +
labs(
title = "Nilai Mobil yang Terjual",
subtitle = "Pada Perusahaan BMW",
x = "Tanggal",
y = "Harga Mobil",
color = "Perusahaan"
) +
scale_y_continuous(labels = comma) +
theme_minimal() +
theme(plot.title = element_text(face = "bold"))
# Filter Perusahaan
perusahaan_pilihan2 <- c("Toyota")
data_ts2 <- datakuis2 %>%
filter(Company %in% perusahaan_pilihan2)
# Time series harga beras
ggplot(data_ts2, aes(x = Date, y = price, color = Company)) +
geom_line(linewidth = 1) +
geom_point(size = 2) +
labs(
title = "Nilai Mobil yang Terjual",
subtitle = "Pada Perusahaan Toyota",
x = "Tanggal",
y = "Harga Mobil",
color = "Perusahaan"
) +
scale_y_continuous(labels = comma) +
theme_minimal() +
theme(plot.title = element_text(face = "bold"))
# Filter Perusahaan
perusahaan_pilihan3 <- c("Acura")
data_ts3 <- datakuis2 %>%
filter(Company %in% perusahaan_pilihan3)
# Time series harga beras
ggplot(data_ts3, aes(x = Date, y = price, color = Company)) +
geom_line(linewidth = 1) +
geom_point(size = 2) +
labs(
title = "Nilai Mobil yang Terjual",
subtitle = "Pada Perusahaan Acura",
x = "Tanggal",
y = "Harga Mobil",
color = "Perusahaan"
) +
scale_y_continuous(labels = comma) +
theme_minimal() +
theme(plot.title = element_text(face = "bold"))