library(ggplot2)
diamonds
data("diamonds")
head(diamonds)
## # A tibble: 6 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.2 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
titanic selamat
library(titanic)
Titanic <- titanic_train
total_selamat <- sum(Titanic$Survived)
print(paste("Total penumpang yang selamat adalah:", total_selamat))
## [1] "Total penumpang yang selamat adalah: 342"
ddata faithful
data(faithful)
head(faithful)
## eruptions waiting
## 1 3.600 79
## 2 1.800 54
## 3 3.333 74
## 4 2.283 62
## 5 4.533 85
## 6 2.883 55
cor(faithful$eruptions, faithful$waiting)
## [1] 0.9008112
plot(faithful$eruptions, faithful$waiting,
main = "Scatter Plot Eruptions vs Waiting",
xlab = "Durasi Erupsi",
ylab = "Waktu Tunggu",
pch = 19)
abline(lm(waiting ~ eruptions, data = faithful), col = "red")

data chickweight
data("ChickWeight")
ggplot(data = ChickWeight, aes(x = Time, y = weight, group = Chick, color = Diet)) +
geom_line(alpha = 0.5) + # Menampilkan garis tren setiap anak ayam
geom_smooth(aes(group = 1), method = "loess", color = "black", se = FALSE) + # Garis tren rata-rata
labs(title = "Tren Pertumbuhan Berat Anak Ayam",
subtitle = "Berdasarkan Waktu dan Jenis Diet",
x = "Waktu (Hari)",
y = "Berat (gram)") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

eda dalam mtcars
str(mtcars)
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
summary(mtcars)
## mpg cyl disp hp
## Min. :10.40 Min. :4.000 Min. : 71.1 Min. : 52.0
## 1st Qu.:15.43 1st Qu.:4.000 1st Qu.:120.8 1st Qu.: 96.5
## Median :19.20 Median :6.000 Median :196.3 Median :123.0
## Mean :20.09 Mean :6.188 Mean :230.7 Mean :146.7
## 3rd Qu.:22.80 3rd Qu.:8.000 3rd Qu.:326.0 3rd Qu.:180.0
## Max. :33.90 Max. :8.000 Max. :472.0 Max. :335.0
## drat wt qsec vs
## Min. :2.760 Min. :1.513 Min. :14.50 Min. :0.0000
## 1st Qu.:3.080 1st Qu.:2.581 1st Qu.:16.89 1st Qu.:0.0000
## Median :3.695 Median :3.325 Median :17.71 Median :0.0000
## Mean :3.597 Mean :3.217 Mean :17.85 Mean :0.4375
## 3rd Qu.:3.920 3rd Qu.:3.610 3rd Qu.:18.90 3rd Qu.:1.0000
## Max. :4.930 Max. :5.424 Max. :22.90 Max. :1.0000
## am gear carb
## Min. :0.0000 Min. :3.000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:3.000 1st Qu.:2.000
## Median :0.0000 Median :4.000 Median :2.000
## Mean :0.4062 Mean :3.688 Mean :2.812
## 3rd Qu.:1.0000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :1.0000 Max. :5.000 Max. :8.000
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
dim(mtcars)
## [1] 32 11
plot(mtcars$hp, mtcars$mpg, main="Hubungan HP vs MPG", xlab="Horsepower", ylab="Miles per Gallon")

missing value
sum(is.na(airquality$Ozone))
## [1] 37
airquality$Ozone[is.na(airquality$Ozone)] <- median(airquality$Ozone, na.rm = TRUE)
sum(is.na(airquality$Ozone))
## [1] 0
head(airquality$Ozone)
## [1] 41.0 36.0 12.0 18.0 31.5 28.0
diamonds
str(diamonds$cut)
## Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
class(diamonds$cut)
## [1] "ordered" "factor"
levels(diamonds$cut)
## [1] "Fair" "Good" "Very Good" "Premium" "Ideal"
hubungan dalam mpg dan cyl
korelasi <- cor(mtcars$mpg, mtcars$cyl)
print(korelasi)
## [1] -0.852162
membedakan distribusi sepal.leght antar species dalam dataset
iris
data(iris)
ggplot(iris, aes(x = Species, y = Sepal.Length, fill = Species)) +
geom_boxplot() +
theme_minimal() +
labs(title = "Box Plot Sepal Length per Spesies")
