No 5 TItanic

data("Titanic")
titanic <- as.data.frame(Titanic)
titanic
##    Class    Sex   Age Survived Freq
## 1    1st   Male Child       No    0
## 2    2nd   Male Child       No    0
## 3    3rd   Male Child       No   35
## 4   Crew   Male Child       No    0
## 5    1st Female Child       No    0
## 6    2nd Female Child       No    0
## 7    3rd Female Child       No   17
## 8   Crew Female Child       No    0
## 9    1st   Male Adult       No  118
## 10   2nd   Male Adult       No  154
## 11   3rd   Male Adult       No  387
## 12  Crew   Male Adult       No  670
## 13   1st Female Adult       No    4
## 14   2nd Female Adult       No   13
## 15   3rd Female Adult       No   89
## 16  Crew Female Adult       No    3
## 17   1st   Male Child      Yes    5
## 18   2nd   Male Child      Yes   11
## 19   3rd   Male Child      Yes   13
## 20  Crew   Male Child      Yes    0
## 21   1st Female Child      Yes    1
## 22   2nd Female Child      Yes   13
## 23   3rd Female Child      Yes   14
## 24  Crew Female Child      Yes    0
## 25   1st   Male Adult      Yes   57
## 26   2nd   Male Adult      Yes   14
## 27   3rd   Male Adult      Yes   75
## 28  Crew   Male Adult      Yes  192
## 29   1st Female Adult      Yes  140
## 30   2nd Female Adult      Yes   80
## 31   3rd Female Adult      Yes   76
## 32  Crew Female Adult      Yes   20
# Misalnya dataset bernama titanic
total_selamat <- sum(titanic$Survived == 1)

# Tampilkan hasil
total_selamat
## [1] 0

No 8 Distribusi Price Berdasarkan cut

# Load dataset diamonds dari ggplot2
library(ggplot2)

# Tampilkan boxplot
ggplot(diamonds, aes(x = cut, y = price)) +
  geom_boxplot(fill = "lightblue") +
  labs(
    title = "Distribusi Price berdasarkan Cut",
    x = "Cut",
    y = "Price"
  )

# No 9 Menampilkan Hubungan mpg vs wt

# Load dataset bawaan
data(mtcars)

# Scatter plot base R
plot(mtcars$wt, mtcars$mpg,
     main = "Hubungan antara Weight (wt) dan MPG",
     xlab = "Weight (wt)",
     ylab = "MPG",
     pch = 19,
     col = "blue")

# No 15 Korelasi eruptions dan waiting

# Load dataset bawaan
data(faithful)

# Hitung korelasi
cor(faithful$eruptions, faithful$waiting)
## [1] 0.9008112
plot(faithful$eruptions, faithful$waiting,
     main = "Hubungan Eruptions vs Waiting",
     xlab = "Durasi Erupsi",
     ylab = "Waktu Tunggu",
     pch = 19,
     col = "darkgreen")

plot(faithful$eruptions, faithful$waiting,
     pch = 19, col = "darkgreen")

abline(lm(waiting ~ eruptions, data = faithful), col = "red", lwd = 2)

# No 16 Korelasi Antara mpg dan cyl

# Load dataset
data(mtcars)

# Hitung korelasi mpg dan cyl
cor(mtcars$mpg, mtcars$cyl)
## [1] -0.852162

No 18 Berat Anak Ayam

# Load dataset
data(ChickWeight)

# Plot line chart (contoh untuk 1 ayam)
chick1 <- subset(ChickWeight, Chick == 1)

plot(chick1$Time, chick1$weight, type = "l",
     main = "Tren Berat Ayam terhadap Waktu",
     xlab = "Time",
     ylab = "Weight",
     col = "blue", lwd = 2)

library(ggplot2)

ggplot(ChickWeight, aes(x = Time, y = weight, group = Chick, color = Chick)) +
  geom_line() +
  labs(
    title = "Tren Berat Ayam terhadap Waktu",
    x = "Time",
    y = "Weight"
  )

# No 21. Tujuan EDA Pada Dataset mtcars

# Load dataset
data(mtcars)

# Melihat struktur data
str(mtcars)
## 'data.frame':    32 obs. of  11 variables:
##  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
##  $ disp: num  160 160 108 258 360 ...
##  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec: num  16.5 17 18.6 19.4 17 ...
##  $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb: num  4 4 1 1 2 1 4 2 2 4 ...
# Ringkasan statistik
summary(mtcars)
##       mpg             cyl             disp             hp       
##  Min.   :10.40   Min.   :4.000   Min.   : 71.1   Min.   : 52.0  
##  1st Qu.:15.43   1st Qu.:4.000   1st Qu.:120.8   1st Qu.: 96.5  
##  Median :19.20   Median :6.000   Median :196.3   Median :123.0  
##  Mean   :20.09   Mean   :6.188   Mean   :230.7   Mean   :146.7  
##  3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:326.0   3rd Qu.:180.0  
##  Max.   :33.90   Max.   :8.000   Max.   :472.0   Max.   :335.0  
##       drat             wt             qsec             vs        
##  Min.   :2.760   Min.   :1.513   Min.   :14.50   Min.   :0.0000  
##  1st Qu.:3.080   1st Qu.:2.581   1st Qu.:16.89   1st Qu.:0.0000  
##  Median :3.695   Median :3.325   Median :17.71   Median :0.0000  
##  Mean   :3.597   Mean   :3.217   Mean   :17.85   Mean   :0.4375  
##  3rd Qu.:3.920   3rd Qu.:3.610   3rd Qu.:18.90   3rd Qu.:1.0000  
##  Max.   :4.930   Max.   :5.424   Max.   :22.90   Max.   :1.0000  
##        am              gear            carb      
##  Min.   :0.0000   Min.   :3.000   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:2.000  
##  Median :0.0000   Median :4.000   Median :2.000  
##  Mean   :0.4062   Mean   :3.688   Mean   :2.812  
##  3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :1.0000   Max.   :5.000   Max.   :8.000
# Melihat beberapa baris awal
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
# Korelasi antar variabel
cor(mtcars)
##             mpg        cyl       disp         hp        drat         wt
## mpg   1.0000000 -0.8521620 -0.8475514 -0.7761684  0.68117191 -0.8676594
## cyl  -0.8521620  1.0000000  0.9020329  0.8324475 -0.69993811  0.7824958
## disp -0.8475514  0.9020329  1.0000000  0.7909486 -0.71021393  0.8879799
## hp   -0.7761684  0.8324475  0.7909486  1.0000000 -0.44875912  0.6587479
## drat  0.6811719 -0.6999381 -0.7102139 -0.4487591  1.00000000 -0.7124406
## wt   -0.8676594  0.7824958  0.8879799  0.6587479 -0.71244065  1.0000000
## qsec  0.4186840 -0.5912421 -0.4336979 -0.7082234  0.09120476 -0.1747159
## vs    0.6640389 -0.8108118 -0.7104159 -0.7230967  0.44027846 -0.5549157
## am    0.5998324 -0.5226070 -0.5912270 -0.2432043  0.71271113 -0.6924953
## gear  0.4802848 -0.4926866 -0.5555692 -0.1257043  0.69961013 -0.5832870
## carb -0.5509251  0.5269883  0.3949769  0.7498125 -0.09078980  0.4276059
##             qsec         vs          am       gear        carb
## mpg   0.41868403  0.6640389  0.59983243  0.4802848 -0.55092507
## cyl  -0.59124207 -0.8108118 -0.52260705 -0.4926866  0.52698829
## disp -0.43369788 -0.7104159 -0.59122704 -0.5555692  0.39497686
## hp   -0.70822339 -0.7230967 -0.24320426 -0.1257043  0.74981247
## drat  0.09120476  0.4402785  0.71271113  0.6996101 -0.09078980
## wt   -0.17471588 -0.5549157 -0.69249526 -0.5832870  0.42760594
## qsec  1.00000000  0.7445354 -0.22986086 -0.2126822 -0.65624923
## vs    0.74453544  1.0000000  0.16834512  0.2060233 -0.56960714
## am   -0.22986086  0.1683451  1.00000000  0.7940588  0.05753435
## gear -0.21268223  0.2060233  0.79405876  1.0000000  0.27407284
## carb -0.65624923 -0.5696071  0.05753435  0.2740728  1.00000000

No 22. Tipe Data dari Kolom Cut Dataset Diamonds

# Load dataset
library(ggplot2)

# Cek struktur data
str(diamonds$cut)
##  Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
class(diamonds$cut)
## [1] "ordered" "factor"

28. Missing Value Kolom Ozone

# Load dataset
data(airquality)

# Cek jumlah NA sebelum
sum(is.na(airquality$Ozone))
## [1] 37
# Isi NA dengan median
airquality$Ozone[is.na(airquality$Ozone)] <- median(airquality$Ozone, na.rm = TRUE)

# Cek jumlah NA setelah
sum(is.na(airquality$Ozone))
## [1] 0

29. Membedakan Distribusi Sepal Length

# Load dataset
data(iris)

# Boxplot
boxplot(Sepal.Length ~ Species, data = iris,
        main = "Distribusi Sepal Length berdasarkan Species",
        xlab = "Species",
        ylab = "Sepal Length",
        col = c("lightblue", "lightgreen", "lightpink"))

library(ggplot2)

ggplot(iris, aes(x = Species, y = Sepal.Length, fill = Species)) +
  geom_boxplot() +
  labs(
    title = "Distribusi Sepal Length berdasarkan Species",
    x = "Species",
    y = "Sepal Length"
  )