Load library
library(ggplot2)
library(readxl)
library(ggthemes)
library(gridExtra)
Import Data
setwd("C:/Users/Dell 7490/Documents/statistika")
data <- read_excel("used_car_sales.xlsx")
head(data)
## # A tibble: 6 × 25
## ID `Distributor Name` Location `Car Name` `Manufacturer Name` `Car Type`
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 O2KE17 Carmudi California Fortuner Toyota SUV
## 2 EPMPC8 Carousell Philadelp… Creta Hyundai Hatchback
## 3 SQKXAP Carsome North Car… Scorpio Mahindra SUV
## 4 PWP2QK Trivett North Car… Plato Prazo Convertib…
## 5 FNDDKM Zupps Portland Dzire Maruti Sedan
## 6 I5D584 Zupps Denver Fortuner Toyota SUV
## # ℹ 19 more variables: Color <chr>, Gearbox <chr>, `Number of Seats` <dbl>,
## # `Number of Doors` <dbl>, Energy <chr>, `Manufactured Year` <dbl>,
## # `Price-$` <dbl>, `Mileage-KM` <dbl>, `Engine Power-HP` <dbl>,
## # `Purchased Date` <dttm>, `Car Sale Status` <chr>, `Sold Date` <dttm>,
## # `Purchased Price-$` <dbl>, `Sold Price-$` <dbl>, `Margin-%` <dbl>,
## # `Sales Agent Name` <chr>, `Sales Rating` <dbl>, `Sales Commission-$` <dbl>,
## # Feedback <chr>
print(data)
## # A tibble: 10,000 × 25
## ID `Distributor Name` Location `Car Name` `Manufacturer Name` `Car Type`
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 O2KE17 Carmudi Californ… Fortuner Toyota SUV
## 2 EPMPC8 Carousell Philadel… Creta Hyundai Hatchback
## 3 SQKXAP Carsome North Ca… Scorpio Mahindra SUV
## 4 PWP2QK Trivett North Ca… Plato Prazo Convertib…
## 5 FNDDKM Zupps Portland Dzire Maruti Sedan
## 6 I5D584 Zupps Denver Fortuner Toyota SUV
## 7 KPRE75 Cars24 New York Thar Mahindra SUV
## 8 8C1O0B Ahg Denver Etriga Maruti Hatchback
## 9 KCVVNQ Olx Detroit Yodha Tata Truck
## 10 HGW6RK Nufor Tennessee Creta Hyundai Hatchback
## # ℹ 9,990 more rows
## # ℹ 19 more variables: Color <chr>, Gearbox <chr>, `Number of Seats` <dbl>,
## # `Number of Doors` <dbl>, Energy <chr>, `Manufactured Year` <dbl>,
## # `Price-$` <dbl>, `Mileage-KM` <dbl>, `Engine Power-HP` <dbl>,
## # `Purchased Date` <dttm>, `Car Sale Status` <chr>, `Sold Date` <dttm>,
## # `Purchased Price-$` <dbl>, `Sold Price-$` <dbl>, `Margin-%` <dbl>,
## # `Sales Agent Name` <chr>, `Sales Rating` <dbl>, …
1. Pie Chart
pie_chart <- ggplot(data, aes(x = " ", fill = `Car Name`)) +
geom_bar(width = 1) +
coord_polar("y", start = 0) +
theme_minimal() +
labs(title = "Lokasi") +
theme(axis.text.x = element_blank())
pie_chart

2. Bar Chart
bar_chart <- ggplot(data, aes(x = `Location`, fill = `Location`)) +
geom_bar() +
theme_minimal() +
labs(title = "Lokasi Penjualan", x = "Lokasi Penjualan", y = "Frekuensi")
bar_chart

3. Histogram
# 3. Histogram
histogram <- ggplot(data, aes(x = `Manufactured Year`)) +
geom_histogram(binwidth = 5, fill = "blue", color = "black", alpha = 0.7) +
theme_minimal() +
labs(title = "Histogram: Tahun Dibuat Mobil", x = "Tahun", y = "Jumlah Kendaraan")
histogram

4. Bloxplot
boxplot_data <- ggplot(data) +
geom_boxplot(aes(y = `Price-$`, fill = "Harga"), alpha = 0.6) +
theme_minimal() +
labs(title = "Boxplot: Harga ($)", fill = "Harga")
boxplot_data

5. Density
density_plot <- ggplot(data, aes(x = `Engine Power-HP`,fill = "Engine Power-HP")) +
geom_density(alpha = 0.5) +
theme_minimal() +
labs(title = "Density Plot: Engine Power", x = "Horse Power", y = "Density")
density_plot

6. Penyebaran Data
# Menghitung Statistik Deskriptif
# Misalnya, kita akan menghitung untuk kolom `Price-$` dan `Manufactured Year`
# 1. Mean (Rata-rata)
mean_price <- mean(data$`Price-$`, na.rm = TRUE)
mean_year <- mean(data$`Manufactured Year`, na.rm = TRUE)
# 2. Median
median_price <- median(data$`Price-$`, na.rm = TRUE)
median_year <- median(data$`Manufactured Year`, na.rm = TRUE)
# 3. Modus (Mode)
# Fungsi untuk menghitung modus
get_mode <- function(x) {
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
mode_price <- get_mode(data$`Price-$`)
mode_year <- get_mode(data$`Manufactured Year`)
# 4. Kuartil 1 (Q1)
q1_price <- quantile(data$`Price-$`, probs = 0.25, na.rm = TRUE)
q1_year <- quantile(data$`Manufactured Year`, probs = 0.25, na.rm = TRUE)
# 5. Kuartil 3 (Q3)
q3_price <- quantile(data$`Price-$`, probs = 0.75, na.rm = TRUE)
q3_year <- quantile(data$`Manufactured Year`, probs = 0.75, na.rm = TRUE)
# 6. Range (Jangkauan)
range_price <- range(data$`Price-$`, na.rm = TRUE)
range_year <- range(data$`Manufactured Year`, na.rm = TRUE)
# Menampilkan Hasil
cat("Statistik Deskriptif untuk Harga ($):\n")
## Statistik Deskriptif untuk Harga ($):
cat("Mean:", mean_price, "\n")
## Mean: 7975.1
cat("Median:", median_price, "\n")
## Median: 7900
cat("Modus:", mode_price, "\n")
## Modus: 7700
cat("Q1:", q1_price, "\n")
## Q1: 7200
cat("Q3:", q3_price, "\n")
## Q3: 8700
cat("Range:", range_price[1], "-", range_price[2], "\n\n")
## Range: 6000 - 10900
cat("Statistik Deskriptif untuk Tahun Pembuatan:\n")
## Statistik Deskriptif untuk Tahun Pembuatan:
cat("Mean:", mean_year, "\n")
## Mean: 2019.525
cat("Median:", median_year, "\n")
## Median: 2020
cat("Modus:", mode_year, "\n")
## Modus: 2024
cat("Q1:", q1_year, "\n")
## Q1: 2017
cat("Q3:", q3_year, "\n")
## Q3: 2022
cat("Range:", range_year[1], "-", range_year[2], "\n")
## Range: 2015 - 2024
# Menghitung Varians
var_year <- var(data$`Manufactured Year`, na.rm = TRUE)
print(paste("Varians Manufactured Year:", var_year))
## [1] "Varians Manufactured Year: 8.33299825982598"
# Menghitung Standar Deviasi
sd_year <- sd(data$`Manufactured Year`, na.rm = TRUE)
print(paste("Standar Deviasi Manufactured Year:", sd_year))
## [1] "Standar Deviasi Manufactured Year: 2.88669330893082"
menampilkan semua data
grid.arrange(pie_chart, bar_chart, histogram, boxplot_data, density_plot, ncol = 2)
