UTS Tsalisa Chulaili Sahri Nova

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

Soal visualisasi melihat hubungan mpg vs wt

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.5.2

ggplot(mtcars, aes(x = wt, y = mpg)) +
  geom_point(size = 3, color = "pink") +
  labs(title = "Scatter Plot: mpg vs wt",
       x = "Weight (1000 lbs)",
       y = "Miles per Gallon") +
  theme_minimal()

Soal melihat weight dan time dalam dataset ChickWeight

library(ggplot2)

ggplot(ChickWeight, aes(x = Time, y = weight, group = Chick)) +
  geom_line(alpha = 0.3) + 
  stat_summary(aes(group = 1), fun = "mean", geom = "line", 
               size = 1.5, color = "orange") + 
  labs(title = "melihat weight dan time",
       x = "Waktu (hari)",
       y = "Berat (gram)") +
  theme_minimal()

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Soal Menhitung korelasi antara mpg dan cyl dalam dataset mtcars

data(mtcars)
cor(mtcars$mpg, mtcars$cyl)

## [1] -0.852162

Jadi, -0,85 merupakan korelasi negatif kuat

plot(mtcars$cyl, mtcars$mpg,
     main = "Korelasi mpg vs cyl",
     xlab = "Jumlah Silinder (cyl)",
     ylab = "Miles per Gallon (mpg)",
     pch = 19, col = "green")

Soal penanganan missing value di kolom ozone dalam dataset airquality

data(airquality)
sum(is.na(airquality$Ozone))

## [1] 37

med_ozone <- median(airquality$Ozone, na.rm = TRUE)
airquality$Ozone_imputed <- ifelse(is.na(airquality$Ozone), med_ozone, airquality$Ozone)

head(airquality[, c("Ozone", "Ozone_imputed")])

##   Ozone Ozone_imputed
## 1    41          41.0
## 2    36          36.0
## 3    12          12.0
## 4    18          18.0
## 5    NA          31.5
## 6    28          28.0

Soal membedakan distribusi sepal length antar species dalam dataset

library(ggplot2)

ggplot(iris, aes(x = Species, y = Sepal.Length, fill = Species)) +
  geom_boxplot() +
  labs(title = "perbedaan distribusi sepal length per spesies",
       x = "Species",
       y = "Sepal Length (cm)") +
  theme_minimal()

Soal mengecek total penumpang yang selamat (survived 1)

library(titanic)

## Warning: package 'titanic' was built under R version 4.5.3

sum(titanic_train$Survived)

## [1] 342

data("titanic_train")

Soal korelasi eruptions dan waiting

data(faithful)
cor(faithful$eruptions, faithful$waiting)

## [1] 0.9008112

plot(faithful$eruptions, faithful$waiting,
     main = "Hubungan Eruptions vs Waiting",
     xlab = "Durasi Erupsi (menit)",
     ylab = "Waktu Tunggu (menit)",
     pch = 19, col = "brown")

Soal mengetahui tipe data kolom cut pada dataset diamonds

library(ggplot2)
class(diamonds$cut)

## [1] "ordered" "factor"

class(diamonds$cut)

## [1] "ordered" "factor"

levels(diamonds$cut)

## [1] "Fair"      "Good"      "Very Good" "Premium"   "Ideal"

str(diamonds$cut)

##  Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...

Soal struktur data mtcars

data(mtcars)
str(mtcars)

## 'data.frame':    32 obs. of  11 variables:
##  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
##  $ disp: num  160 160 108 258 360 ...
##  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec: num  16.5 17 18.6 19.4 17 ...
##  $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb: num  4 4 1 1 2 1 4 2 2 4 ...

summary(mtcars)

##       mpg             cyl             disp             hp       
##  Min.   :10.40   Min.   :4.000   Min.   : 71.1   Min.   : 52.0  
##  1st Qu.:15.43   1st Qu.:4.000   1st Qu.:120.8   1st Qu.: 96.5  
##  Median :19.20   Median :6.000   Median :196.3   Median :123.0  
##  Mean   :20.09   Mean   :6.188   Mean   :230.7   Mean   :146.7  
##  3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:326.0   3rd Qu.:180.0  
##  Max.   :33.90   Max.   :8.000   Max.   :472.0   Max.   :335.0  
##       drat             wt             qsec             vs        
##  Min.   :2.760   Min.   :1.513   Min.   :14.50   Min.   :0.0000  
##  1st Qu.:3.080   1st Qu.:2.581   1st Qu.:16.89   1st Qu.:0.0000  
##  Median :3.695   Median :3.325   Median :17.71   Median :0.0000  
##  Mean   :3.597   Mean   :3.217   Mean   :17.85   Mean   :0.4375  
##  3rd Qu.:3.920   3rd Qu.:3.610   3rd Qu.:18.90   3rd Qu.:1.0000  
##  Max.   :4.930   Max.   :5.424   Max.   :22.90   Max.   :1.0000  
##        am              gear            carb      
##  Min.   :0.0000   Min.   :3.000   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:2.000  
##  Median :0.0000   Median :4.000   Median :2.000  
##  Mean   :0.4062   Mean   :3.688   Mean   :2.812  
##  3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :1.0000   Max.   :5.000   Max.   :8.000

cor(mtcars[, c("mpg", "hp", "wt")])

##            mpg         hp         wt
## mpg  1.0000000 -0.7761684 -0.8676594
## hp  -0.7761684  1.0000000  0.6587479
## wt  -0.8676594  0.6587479  1.0000000

plot(mtcars$wt, mtcars$mpg,
     main = "hubungan antara Weight vs MPG",
     xlab = "Weight (1000 lbs)", ylab = "MPG")

Soal visualisasi terbaik untuk melihat distribusi price berdasarkan cut dalam dataset diamonds

library(ggplot2)

ggplot(diamonds, aes(x = cut, y = price, fill = cut)) +
  geom_boxplot() +
  labs(title = "distribusi price berdasarkan cut",
       x = "cut (kualitas potongan)",
       y = "price (USD)") +
  theme_minimal()

Jadi kesimmpulannya Fair dan Good harganya paling murah dan variasi sempit. Ideal harganya menengah tapi banyak sekali outlier mahal. Premium dan Very Good harganya lebih tinggi dengan sebaran lebar. Yang intinya, hampir semua kategori mempunyai outlier harga selangit atau tinggi.

UTS Tsalisa Chulaili Sahri Nova

Tsalisa Chulaili Sahri Nova

2026-04-08

R Markdown

Including Plots