Data titanic

data("Titanic")

# Menampilkan jumlah penumpang berdasarkan status selamat
survival_count <- margin.table(Titanic, 4)
survival_count
## Survived
##   No  Yes 
## 1490  711
# Menampilkan total penumpang yang selamat
survival_count["Yes"]
## Yes 
## 711

SUM Titanic

data("Titanic")

titanic_df <- as.data.frame(Titanic)

sum(titanic_df$Freq[titanic_df$Survived == "Yes"])
## [1] 711

Hubungan Durasi Erupsi vs Waktu Tunggu

# 1. Memanggil dataset
data(faithful)

# 2. Menghitung Korelasi (Pearson)
korelasi <- cor(faithful$eruptions, faithful$waiting)
print(paste("Nilai Korelasi:", korelasi))
## [1] "Nilai Korelasi: 0.900811168321813"
# 3. Membuat Visualisasi untuk melihat hubungan
library(ggplot2)
ggplot(faithful, aes(x = eruptions, y = waiting)) +
  geom_point(color = "blue") + 
  geom_smooth(method = "lm", col = "red") +
  labs(title = "Hubungan Durasi Erupsi vs Waktu Tunggu",
       x = "Durasi Erupsi (menit)",
       y = "Waktu Tunggu (menit)")
## `geom_smooth()` using formula = 'y ~ x'

Distribusi Harga Berlian berdasarkan Kualitas Potongan

library(ggplot2)

# Memastikan data diamonds terload
data("diamonds")

# Membuat Box Plot untuk melihat distribusi harga berdasarkan cut
ggplot(diamonds, aes(x = cut, y = price, fill = cut)) +
  geom_boxplot() +
  labs(title = "Distribusi Harga Berlian berdasarkan Kualitas Potongan (Cut)",
       x = "Kualitas Potongan (Cut)",
       y = "Harga (Price)") +
  theme_minimal()

Menangani Missing Value di kolom Ozone dalam dataset airquality

# Memanggil data
data(airquality)

# Menghitung median kolom Ozone (na.rm = TRUE agar NA tidak ikut dihitung)
median_ozone <- median(airquality$Ozone, na.rm = TRUE)

# Mengisi NA dengan median tersebut
airquality$Ozone[is.na(airquality$Ozone)] <- median_ozone

# Cek apakah masih ada NA
sum(is.na(airquality$Ozone))
## [1] 0

Membedakan distribusi Sepal. length antar Species dalam dataset Iris

# Load data
data("iris")

# Lihat struktur data
str(iris)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
# Ringkasan statistik per Species
aggregate(Sepal.Length ~ Species, data = iris, summary)
##      Species Sepal.Length.Min. Sepal.Length.1st Qu. Sepal.Length.Median
## 1     setosa             4.300                4.800               5.000
## 2 versicolor             4.900                5.600               5.900
## 3  virginica             4.900                6.225               6.500
##   Sepal.Length.Mean Sepal.Length.3rd Qu. Sepal.Length.Max.
## 1             5.006                5.200             5.800
## 2             5.936                6.300             7.000
## 3             6.588                6.900             7.900
# Visualisasi box plot
boxplot(Sepal.Length ~ Species, data = iris,
        main = "Distribusi Sepal.Length antar Species",
        xlab = "Species",
        ylab = "Sepal.Length",
        col = c("lightblue", "lightgreen", "lightpink"))

Korelasi antara mpg dan cyl dalam dataset mtcars

# Load data
data("mtcars")

# Lihat struktur data
str(mtcars)
## 'data.frame':    32 obs. of  11 variables:
##  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
##  $ disp: num  160 160 108 258 360 ...
##  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec: num  16.5 17 18.6 19.4 17 ...
##  $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb: num  4 4 1 1 2 1 4 2 2 4 ...
# Hitung korelasi mpg dan cyl
korelasi <- cor(mtcars$mpg, mtcars$cyl)

# Tampilkan nilai korelasi
cat("Nilai korelasi antara mpg dan cyl:", korelasi, "\n")
## Nilai korelasi antara mpg dan cyl: -0.852162
# Visualisasi scatter plot
plot(mtcars$cyl, mtcars$mpg,
     main = "Scatter Plot mpg vs cyl",
     xlab = "Jumlah Silinder (cyl)",
     ylab = "Miles per Gallon (mpg)",
     pch = 19,
     col = "blue")

# Hubungan antara mpg, hp, dan wt

# Load dataset bawaan
data("mtcars")

# Mengetahui struktur data
str(mtcars)
## 'data.frame':    32 obs. of  11 variables:
##  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
##  $ disp: num  160 160 108 258 360 ...
##  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec: num  16.5 17 18.6 19.4 17 ...
##  $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb: num  4 4 1 1 2 1 4 2 2 4 ...
# Melihat 6 baris pertama
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
# Ringkasan statistik
summary(mtcars)
##       mpg             cyl             disp             hp       
##  Min.   :10.40   Min.   :4.000   Min.   : 71.1   Min.   : 52.0  
##  1st Qu.:15.43   1st Qu.:4.000   1st Qu.:120.8   1st Qu.: 96.5  
##  Median :19.20   Median :6.000   Median :196.3   Median :123.0  
##  Mean   :20.09   Mean   :6.188   Mean   :230.7   Mean   :146.7  
##  3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:326.0   3rd Qu.:180.0  
##  Max.   :33.90   Max.   :8.000   Max.   :472.0   Max.   :335.0  
##       drat             wt             qsec             vs        
##  Min.   :2.760   Min.   :1.513   Min.   :14.50   Min.   :0.0000  
##  1st Qu.:3.080   1st Qu.:2.581   1st Qu.:16.89   1st Qu.:0.0000  
##  Median :3.695   Median :3.325   Median :17.71   Median :0.0000  
##  Mean   :3.597   Mean   :3.217   Mean   :17.85   Mean   :0.4375  
##  3rd Qu.:3.920   3rd Qu.:3.610   3rd Qu.:18.90   3rd Qu.:1.0000  
##  Max.   :4.930   Max.   :5.424   Max.   :22.90   Max.   :1.0000  
##        am              gear            carb      
##  Min.   :0.0000   Min.   :3.000   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:2.000  
##  Median :0.0000   Median :4.000   Median :2.000  
##  Mean   :0.4062   Mean   :3.688   Mean   :2.812  
##  3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :1.0000   Max.   :5.000   Max.   :8.000
# Ukuran dataset
dim(mtcars)
## [1] 32 11
# Nama kolom
names(mtcars)
##  [1] "mpg"  "cyl"  "disp" "hp"   "drat" "wt"   "qsec" "vs"   "am"   "gear"
## [11] "carb"
pairs(mtcars[, c("mpg", "hp", "wt")],
      main = "Hubungan antara mpg, hp, dan wt")

tipe data dari kolom cut pada dataset diamonds

# Load library
library(ggplot2)

# Lihat struktur kolom cut
str(diamonds$cut)
##  Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
# Lihat class kolom cut
class(diamonds$cut)
## [1] "ordered" "factor"
# Lihat level kategori
levels(diamonds$cut)
## [1] "Fair"      "Good"      "Very Good" "Premium"   "Ideal"

Tren berat anak ayam dari waktu dalam dataset ChickWeight

avg_weight <- aggregate(weight ~ Time, data = ChickWeight, mean)

plot(avg_weight$Time, avg_weight$weight, type = "l",
     main = "Tren Rata-rata Berat Anak Ayam terhadap Waktu",
     xlab = "Time",
     ylab = "Average Weight")

hubungan mpg vs wt

plot(mtcars$wt, mtcars$mpg,
     main = "Hubungan mpg vs wt",
     xlab = "wt",
     ylab = "mpg")