library(ggplot2) EDA pada dataset mtcars
str(mtcars)
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
summary(mtcars)
## mpg cyl disp hp
## Min. :10.40 Min. :4.000 Min. : 71.1 Min. : 52.0
## 1st Qu.:15.43 1st Qu.:4.000 1st Qu.:120.8 1st Qu.: 96.5
## Median :19.20 Median :6.000 Median :196.3 Median :123.0
## Mean :20.09 Mean :6.188 Mean :230.7 Mean :146.7
## 3rd Qu.:22.80 3rd Qu.:8.000 3rd Qu.:326.0 3rd Qu.:180.0
## Max. :33.90 Max. :8.000 Max. :472.0 Max. :335.0
## drat wt qsec vs
## Min. :2.760 Min. :1.513 Min. :14.50 Min. :0.0000
## 1st Qu.:3.080 1st Qu.:2.581 1st Qu.:16.89 1st Qu.:0.0000
## Median :3.695 Median :3.325 Median :17.71 Median :0.0000
## Mean :3.597 Mean :3.217 Mean :17.85 Mean :0.4375
## 3rd Qu.:3.920 3rd Qu.:3.610 3rd Qu.:18.90 3rd Qu.:1.0000
## Max. :4.930 Max. :5.424 Max. :22.90 Max. :1.0000
## am gear carb
## Min. :0.0000 Min. :3.000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:3.000 1st Qu.:2.000
## Median :0.0000 Median :4.000 Median :2.000
## Mean :0.4062 Mean :3.688 Mean :2.812
## 3rd Qu.:1.0000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :1.0000 Max. :5.000 Max. :8.000
hubungan mpg vs wt
library(ggplot2)
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point(size = 3, color = "darkblue") + # Membuat titik
geom_smooth(method = "lm", se = FALSE, color = "red") + # Tambahan garis tren
labs(title = "Visualisasi mpg vs wt",
x = "Berat (wt)",
y = "Efisiensi (mpg)") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
dataset chiken weight
avg_weight <- aggregate(weight ~ Time, data = ChickWeight, mean)
plot(avg_weight$Time, avg_weight$weight, type = "o",
col = "blue", pch = 16,
main = "Tren Berat Anak Ayam terhadap Waktu",
xlab = "Waktu (Hari)", ylab = "Rata-rata Berat")
membedakan distribusi sepal.length antara spesies dalam dataset iris
boxplot(Sepal.Length ~ Species, data = iris,
main = "Distribusi Sepal Length per Spesies",
xlab = "Spesies", ylab = "Sepal Length",
col = c("orange", "green", "purple"))
missing values
head (airquality)
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
colSums(is.na(airquality))
## Ozone Solar.R Wind Temp Month Day
## 37 7 0 0 0 0
clean_data <- na.omit(airquality)
dim(clean_data)
## [1] 111 6
data diamonds
data("diamonds")
head(diamonds)
## # A tibble: 6 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.2 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
diamonds
class(diamonds$cut)
## [1] "ordered" "factor"
str(diamonds$cut)
## Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
korelasi erupsi
plot(faithful$eruptions, faithful$waiting,
main="Korelasi Eruptions vs Waiting",
xlab="Durasi Eruptions",
ylab="Waiting Time")
abline(lm(faithful$waiting ~ faithful$eruptions), col="red")
boxplot(price ~ cut, data = diamonds,
main = "Distribusi Price berdasarkan Cut",
xlab = "Cut",
ylab = "Price",
col = "lightblue")
korelasi antara mpg dan cyl
cor(mtcars$mpg, mtcars$cyl)
## [1] -0.852162
titanic
library(titanic)
## Warning: package 'titanic' was built under R version 4.5.3
margin.table(Titanic, 4)
## Survived
## No Yes
## 1490 711