library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggthemes)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(readr)
library(DT)
setwd("C:/Besok aja/Semester-2/Probabilitas dan Statistika/Week-3")
df <- read.csv("CarPrice_Assignment.csv")
df_clean <- df %>% select(fueltype, carbody, price, horsepower)
datatable(df_clean, options = list(pageLength = 5))
fuel_table <- table(df_clean$fueltype)
fuel_pct <- round(prop.table(fuel_table) * 100, 1)
fuel_labels <- paste(names(fuel_table), "\n", fuel_pct, "%")
pie(fuel_table,
labels = fuel_labels,
main = "Pie Chart: Proporsi Tipe Bahan Bakar",
col = c("green", "purple"),
border = "black")
legend("bottomright",
legend = c("diesel", "gas"),
fill = c("green", "purple"),
title = "Tipe BBM")
Statistik Deskriptif: Mobil bensin = 185 unit (90.2%), diesel = 20 unit (9.8%). Bensin mendominasi pasar.
Statistik Inferensia: Berdasarkan uji proporsi satu sampel, diperoleh p-value < 0.05 yang menunjukkan bahwa proporsi mobil bensin berbeda signifikan dari 50%. Dengan kata lain, proporsi mobil bensin (90.2%) secara statistik lebih besar daripada mobil diesel.
–
ggplot(df_clean, aes(x=carbody, fill=carbody)) +
geom_bar(color="black") +
theme_minimal() +
labs(title ="Jumlah Mobil Berdasarkan Tipe Body Mobil",
x="Tipe Body Mobil",
y="Jumlah Mobil")
Statistik Deskriptif: Distribusi jenis body mobil dari yang
terbanyak adalah sedan (96 unit), diikuti hatchback (70 unit), wagon (25
unit),
hardtop (8 unit), dan convertible (6 unit). Sedan dan hatchback
mendominasi pasar dengan total 81% dari seluruh mobil.
Statistik Inferensia: Uji chi-square goodness of fit menghasilkan p-value < 0.05, yang berarti distribusi jenis body mobil tidak merata. Hal ini mengindikasikan adanya preferensi pasar yang signifikan terhadap jenis body mobil tertentu, terutama sedan dan hatchback.
–
hist(df_clean$horsepower,
breaks = 15,
col = "lightblue",
border = "black",
main = "Distribusi Horsepower",
xlab = "Horsepower",
ylab = "Frekuensi",
probability = FALSE)
# garis
abline(v = mean(df_clean$horsepower), col = "red", lwd = 2, lty = 2)
abline(v = median(df_clean$horsepower), col = "blue", lwd = 2, lty = 2)
legend("topright",
legend = c(paste("Mean =", round(mean(df_clean$horsepower), 1)),
paste("Median =", median(df_clean$horsepower))),
col = c("blue", "red"),
lty = 2, lwd = 2)
Statistik Deskriptif: Nilai rata-rata (mean) horsepower adalah 104.12 dengan nilai tengah (median) 95. Rentang horsepower berada antara 48 hingga 288.
Statistik Inferensia: Distribusi horsepower cenderung menceng ke kanan (positively skewed) karena nilai mean (104.12) lebih besar dari median (95). Uji normalitas Shapiro-Wilk menghasilkan p-value < 0.05, yang mengindikasikan bahwa data tidak berdistribusi normal. Hal ini menunjukkan adanya mobil-mobil dengan horsepower tinggi yang menarik mean ke kanan.
–
options(scipen = 999)
density_price <- density(df_clean$price)
plot(density_price,
main = "Density Plot: Distribusi Harga Mobil",
xlab = "Harga (USD)",
ylab = "Density",
col = "darkgreen",
lwd = 2)
polygon(density_price, col = rgb(0, 0.8, 0, 0.2), border = "darkgreen")
abline(v = mean(df_clean$price), col = "red", lwd = 2, lty = 2)
abline(v = median(df_clean$price), col = "blue", lwd = 2, lty = 2)
legend("topright",
legend = c(paste("Mean = $", round(mean(df_clean$price), 0)),
paste("Median = $", median(df_clean$price))),
col = c("red", "blue"),
lty = 2, lwd = 2)
Statistik Deskriptif: Rata-rata harga mobil adalah $13276.71 dengan nilai tengah $10295. Harga mobil bervariasi dari $5118 hingga $45400.
Statistik Inferensia: Distribusi harga sangat menceng ke kanan
(ekor panjang ke kanan) terlihat dari nilai mean ($13276.71) yang jauh
lebih besar dari median ($10295). Hal ini mengindikasikan adanya
mobil-mobil mewah dengan harga sangat tinggi yang menjadi outlier dan
menarik nilai rata-rata ke atas. Interval kepercayaan 95% untuk mean
harga berada di sekitar nilai tersebut dengan variasi yang cukup
besar.
–
ggplot(df_clean, aes(x=fueltype, y=price, fill=fueltype)) +
geom_boxplot() +
theme_minimal() +
labs(title="Perbandingan Gas vs Diesel",
x="Tipe Bahan Bakar",
y="Harga (USD)")
Statistik Deskriptif: Mobil diesel memiliki median harga $13852.5 dengan kuartil 1 (Q1) $9120 dan kuartil 3 (Q3) $19375.5. Sementara mobil bensin memiliki median harga $9989 dengan Q1 $7689 dan Q3 $15998.
Statistik Inferensia: Berdasarkan uji t independent sample,
diperoleh p-value = 0.1346. Karena p-value > 0.05, maka tidak
terdapat perbedaan yang signifikan antara rata-rata harga mobil
diesel
dan bensin. Dengan kata lain, secara statistik, harga mobil diesel dan
bensin relatif sama.
–
# horsepower
mean_hp <- mean(df_clean$horsepower)
cat("Rata-rata horsepower:", mean_hp)
## Rata-rata horsepower: 104.1171
# price
mean_price <- mean(df_clean$price)
cat("\nRata-rata harga mobil: $", round(mean_price, 2), sep = "")
##
## Rata-rata harga mobil: $13276.71
# horsepower
median_hp <- median(df_clean$horsepower)
cat("Median horsepower:", median_hp)
## Median horsepower: 95
# price
median_price <- median(df_clean$price)
cat("\nMedian harga mobil: $", median_price, sep = "")
##
## Median harga mobil: $10295
modus <- function(x) {
uniqx <- unique(x)
uniqx[which.max(tabulate(match(x, uniqx)))]
}
# horsepower
modus_hp <- modus(df_clean$horsepower)
cat("Modus horsepower:", modus_hp)
## Modus horsepower: 68
# price
modus_price <- modus(df_clean$price)
cat("\nModus harga mobil: $", modus_price, sep = "")
##
## Modus harga mobil: $16500
# Q1
q1_hp <- quantile(df_clean$horsepower, 0.25)
q1_price <- quantile(df_clean$price, 0.25)
# Q3
q3_hp <- quantile(df_clean$horsepower, 0.75)
q3_price <- quantile(df_clean$price, 0.75)
cat("Q1 Horsepower:", q1_hp, "\n")
## Q1 Horsepower: 70
cat("Q3 Horsepower:", q3_hp, "\n")
## Q3 Horsepower: 116
cat("Q1 Harga: $", q1_price, "\n")
## Q1 Harga: $ 7788
cat("Q3 Harga: $", q3_price, sep = "")
## Q3 Harga: $16503
# horsepower
range_hp <- max(df_clean$horsepower) - min(df_clean$horsepower)
cat("Range horsepower:", range_hp,
"(", min(df_clean$horsepower), "-", max(df_clean$horsepower), ")")
## Range horsepower: 240 ( 48 - 288 )
# price
range_price <- max(df_clean$price) - min(df_clean$price)
cat("\nRange harga mobil: $", range_price,
"($", min(df_clean$price), "- $", max(df_clean$price), ")", sep = "")
##
## Range harga mobil: $40282($5118- $45400)
# horsepower
var_hp <- var(df_clean$horsepower)
cat("Varians horsepower:", round(var_hp, 2))
## Varians horsepower: 1563.74
# price
var_price <- var(df_clean$price)
cat("\nVarians harga mobil: $", round(var_price, 2), sep = "")
##
## Varians harga mobil: $63821762
# horsepower
sd_hp <- sd(df_clean$horsepower)
cat("Standar deviasi horsepower:", round(sd_hp, 2))
## Standar deviasi horsepower: 39.54
# price
sd_price <- sd(df_clean$price)
cat("\nStandar deviasi harga mobil: $", round(sd_price, 2), sep = "")
##
## Standar deviasi harga mobil: $7988.85