SOAL 1

# Load package dplyr
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Akses dataset starwars, buang missing values
data <- starwars %>% na.omit()

# Ubah height dan mass menjadi numerik
data <- data %>%
  mutate(
    height = as.numeric(height),
    mass = as.numeric(mass)
  )

# Akses peubah "height" dan "mass"
data_selected <- data %>% select(height, mass)

# Tampilkan hasil
print(data_selected)
## # A tibble: 29 × 2
##    height  mass
##     <dbl> <dbl>
##  1    172    77
##  2    202   136
##  3    150    49
##  4    178   120
##  5    165    75
##  6    183    84
##  7    182    77
##  8    188    84
##  9    228   112
## 10    180    80
## # ℹ 19 more rows
# Menghitung rata-rata untuk setiap peubah dalam data
mean_height <- mean(data_selected$height, na.rm = TRUE)
mean_mass <- mean(data_selected$mass, na.rm = TRUE)

print(paste("Mean Height:", mean_height))
## [1] "Mean Height: 178.655172413793"
print(paste("Mean Mass:", mean_mass))
## [1] "Mean Mass: 77.7724137931035"
# Menghitung deviasi standar untuk setiap peubah dalam data
sd_height <- sd(data_selected$height, na.rm = TRUE)
sd_mass <- sd(data_selected$mass, na.rm = TRUE)

print(paste("Standard Deviation of Height:", sd_height))
## [1] "Standard Deviation of Height: 22.3994194154175"
print(paste("Standard Deviation of Mass:", sd_mass))
## [1] "Standard Deviation of Mass: 23.0858474975428"
# Load library yang dibutuhkan
library(e1071)  # Untuk menghitung skewness
## Warning: package 'e1071' was built under R version 4.4.2
# Menghitung Q1, Median (Q2), dan Q3 untuk height
Q1_height <- quantile(data_selected$height, 0.25, na.rm = TRUE)
Q2_height <- median(data_selected$height, na.rm = TRUE)
Q3_height <- quantile(data_selected$height, 0.75, na.rm = TRUE)

# Menghitung Q1, Median (Q2), dan Q3 untuk mass
Q1_mass <- quantile(data_selected$mass, 0.25, na.rm = TRUE)
Q2_mass <- median(data_selected$mass, na.rm = TRUE)
Q3_mass <- quantile(data_selected$mass, 0.75, na.rm = TRUE)

# Menghitung Skewness
skewness_height <- skewness(data_selected$height, na.rm = TRUE)
skewness_mass <- skewness(data_selected$mass, na.rm = TRUE)

# Menghitung JAK (Jangkauan Antar Kuartil / IQR)
JAK_height <- Q3_height - Q1_height
JAK_mass <- Q3_mass - Q1_mass

# Menghitung Varians
var_height <- var(data_selected$height, na.rm = TRUE)
var_mass <- var(data_selected$mass, na.rm = TRUE)

# Menampilkan hasil
print(paste("Q1 Height:", Q1_height))
## [1] "Q1 Height: 172"
print(paste("Q2 (Median) Height:", Q2_height))
## [1] "Q2 (Median) Height: 180"
print(paste("Q3 Height:", Q3_height))
## [1] "Q3 Height: 188"
print(paste("IQR Height:", JAK_height))
## [1] "IQR Height: 16"
print(paste("Skewness Height:", skewness_height))
## [1] "Skewness Height: -1.91075030371317"
print(paste("Variance Height:", var_height))
## [1] "Variance Height: 501.733990147783"
print(paste("Q1 Mass:", Q1_mass))
## [1] "Q1 Mass: 75"
print(paste("Q2 (Median) Mass:", Q2_mass))
## [1] "Q2 (Median) Mass: 79"
print(paste("Q3 Mass:", Q3_mass))
## [1] "Q3 Mass: 83"
print(paste("IQR Mass:", JAK_mass))
## [1] "IQR Mass: 8"
print(paste("Skewness Mass:", skewness_mass))
## [1] "Skewness Mass: 0.170225022331694"
print(paste("Variance Mass:", var_mass))
## [1] "Variance Mass: 532.956354679803"
par(mfrow=c(1,2))  # Bagi area plotting jadi 1 baris, 2 kolom
hist(data_selected$height, main="Histogram Height", xlab="Height", col="skyblue", border="black")
hist(data_selected$mass, main="Histogram Mass", xlab="Mass", col="lightcoral", border="black")

par(mfrow=c(1,1))  # Reset layout ke default
# Q-Q plot untuk height
qqnorm(data_selected$height, col = "blue", main = "Q-Q Plot Height")
qqline(data_selected$height, col = "red")

qqnorm(data_selected$mass, col = "blue", main = "Q-Q Plot Mass")
qqline(data_selected$mass, col = "red")

# Ambil variabel height dan mass
height = data_selected$height
mass = data_selected$mass

# Uji Kolmogorov-Smirnov untuk height
ks_test_height = ks.test(height, "pnorm", mean = mean(height), sd = sd(height))
## Warning in ks.test.default(height, "pnorm", mean = mean(height), sd =
## sd(height)): ties should not be present for the one-sample Kolmogorov-Smirnov
## test
print(ks_test_height)
## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  height
## D = 0.21167, p-value = 0.1487
## alternative hypothesis: two-sided

\[ H_0: \text{Data berasal dari distribusi normal} \]

Hipotesis Alternatif

\[ H_1: \text{Data tidak berasal dari distribusi normal} \]

P-Value = 0.1487 > 0.05, maka tak tolak \(H_0\). Artinya, data tidak berasal dari distribusi normal

# Uji Kolmogorov-Smirnov untuk mass
ks_test_mass = ks.test(mass, "pnorm", mean = mean(mass), sd = sd(mass))
## Warning in ks.test.default(mass, "pnorm", mean = mean(mass), sd = sd(mass)):
## ties should not be present for the one-sample Kolmogorov-Smirnov test
print(ks_test_mass)
## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  mass
## D = 0.25574, p-value = 0.04504
## alternative hypothesis: two-sided

\[ H_0: \text{Data berasal dari distribusi normal} \]

Hipotesis Alternatif

\[ H_1: \text{Data tidak berasal dari distribusi normal} \]

P-Value = 0.04504 < 0.05, maka tak tolak \(H_0\). Artinya, data berasal dari distribusi normal

SOAL NO 2

# Load library ggplot2 untuk visualisasi
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
# Boxplot untuk peubah height
ggplot(data, aes(y = height)) + 
  geom_boxplot(fill = "skyblue") +
  ggtitle("Boxplot Height") +
  theme_minimal()

# Boxplot untuk peubah mass
ggplot(data, aes(y = mass)) + 
  geom_boxplot(fill = "lightcoral") +
  ggtitle("Boxplot Mass") +
  theme_minimal()

# Scatter plot untuk height
ggplot(data_selected, aes(x = 1:nrow(data_selected), y = height)) +
  geom_point(color = "blue", alpha = 0.6) + 
  labs(title = "Scatter Plot: Height",
       x = "Index",
       y = "Height (cm)") +
  theme_minimal()

# Scatter plot untuk mass
ggplot(data_selected, aes(x = 1:nrow(data_selected), y = mass)) +
  geom_point(color = "green", alpha = 0.6) + 
  labs(title = "Scatter Plot: Mass",
       x = "Index",
       y = "Mass (kg)") +
  theme_minimal()

Menggunakan adjusted Boxplot untuk height karena tidak berasal dari distribusi normal

library(robustbase)
## Warning: package 'robustbase' was built under R version 4.4.3
mc_height <- mc(data_selected$height)
## The default of 'doScale' is FALSE now for stability;
##   set options(mc_doScale_quiet=TRUE) to suppress this (once per session) message
print(mc_height)
## [1] 0
# Membuat adjusted boxplot untuk height
adjbox(data_selected$height, main = "Adjusted Boxplot of Height", col = "lightblue")

Menggunakan Rosner’s Test untuk Mass karena berasal dari distribusi normal

Rosner’s Test atau Generalized ESD many-outliers test(GESD) digunakan untuk pencilan ganda(atas dan/atau bawah) dalam data univariat. Uji ini akurat untuk mendeteksi 10 pencilan dengan jumlah sampel minimal 15 dan data setelah menghilangkan pencilan berdistribusi normal. Pada uji ini, diperlukan parameter k untuk menentukan jumlah maksimal pencilan.

\(H_0\) : Tidak ada pencilan \(H_1\) : Ada pencilan

library(EnvStats)
## Warning: package 'EnvStats' was built under R version 4.4.3
## 
## Attaching package: 'EnvStats'
## The following objects are masked from 'package:e1071':
## 
##     kurtosis, skewness
## The following objects are masked from 'package:stats':
## 
##     predict, predict.lm
# Uji Rosner untuk mass dengan 10 pencilan
rosner_mass <- rosnerTest(data_selected$mass, k = 10)

# Menampilkan hasil uji
rosner_mass$all.stats
##    i   Mean.i      SD.i Value Obs.Num    R.i+1 lambda.i+1 Outlier
## 1  0 77.77241 23.085847 136.0       2 2.522220   2.892705    TRUE
## 2  1 75.69286 20.558173  20.0      18 2.709037   2.876209    TRUE
## 3  2 77.75556 17.753295 120.0       4 2.379527   2.858923    TRUE
## 4  3 76.13077 15.926626 113.0      14 2.314943   2.840774    TRUE
## 5  4 74.65600 14.329052 112.0       9 2.606174   2.821681    TRUE
## 6  5 73.10000 12.291814  45.0      19 2.286074   2.801551    TRUE
## 7  6 74.32174 10.977454  49.0       3 2.306704   2.780277    TRUE
## 8  7 75.47273  9.711955  50.0      27 2.622822   2.757735    TRUE
## 9  8 76.68571  8.065376  55.0      22 2.688742   2.733780    TRUE
## 10 9 77.77000  6.518080  56.2      26 3.309257   2.708246    TRUE

SOAL NO 3

Trimmed Mean adalah nilai rataan yang dicari setelah dataset dipangkas sebanyak \(\alpha\)%. Pada trimmed mean, nilai terbesar dan terkecil yang masuk dalam rentang \(\alpha\)% akan dipangkas terlebih dahulu.

# Menghitung rata-rata biasa
rataan_biasa_height <- mean(data_selected$height, na.rm = TRUE)

# Menghitung rata-rata terpangkas 10% (trim = 0.1)
rataan_terpangkas_height <- mean(data_selected$height, trim = 0.1, na.rm = TRUE)

# Menampilkan hasil
cat("Rataan Biasa :", rataan_biasa_height, "\n")
## Rataan Biasa : 178.6552
cat("Rataan Terpangkas (10%) :", rataan_terpangkas_height, "\n")
## Rataan Terpangkas (10%) : 180.52
# Menghitung rata-rata biasa
rataan_biasa_mass <- mean(data_selected$mass, na.rm = TRUE)

# Menghitung rata-rata terpangkas 10% (trim = 0.1)
rataan_terpangkas_mass <- mean(data_selected$mass, trim = 0.1, na.rm = TRUE)

# Menampilkan hasil
cat("Rataan Biasa :", rataan_biasa_mass, "\n")
## Rataan Biasa : 77.77241
cat("Rataan Terpangkas (10%) :", rataan_terpangkas_mass, "\n")
## Rataan Terpangkas (10%) : 77.376

SOAL NO 4

# Menambahkan amatan baru
data_selected <- rbind(data_selected, data.frame(height = 210, mass = 100))
# Boxplot height
boxplot(data_selected$height, main = "Boxplot of Height", col = "lightblue")

# Boxplot mass
boxplot(data_selected$mass, main = "Boxplot of Mass", col = "lightgreen")

# Menghitung Q1, Q3, dan IQR untuk height
Q1_height <- quantile(data_selected$height, 0.25, na.rm = TRUE)
Q3_height <- quantile(data_selected$height, 0.75, na.rm = TRUE)
IQR_height <- Q3_height - Q1_height

lower_bound_height <- Q1_height - 1.5 * IQR_height
upper_bound_height <- Q3_height + 1.5 * IQR_height

# Cek apakah height = 210 termasuk pencilan
is_outlier_height <- (210 < lower_bound_height) | (210 > upper_bound_height)

cat("Apakah height = 210 pencilan?", is_outlier_height, "\n")
## Apakah height = 210 pencilan? FALSE
# Menghitung Q1, Q3, dan IQR untuk mass
Q1_mass <- quantile(data_selected$mass, 0.25, na.rm = TRUE)
Q3_mass <- quantile(data_selected$mass, 0.75, na.rm = TRUE)
IQR_mass <- Q3_mass - Q1_mass

lower_bound_mass <- Q1_mass - 1.5 * IQR_mass
upper_bound_mass <- Q3_mass + 1.5 * IQR_mass

# Cek apakah mass = 100 termasuk pencilan
is_outlier_mass <- (100 < lower_bound_mass) | (100 > upper_bound_mass)

cat("Apakah mass = 100 pencilan?", is_outlier_mass, "\n")
## Apakah mass = 100 pencilan? TRUE