# Load package dplyr
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Akses dataset starwars, buang missing values
data <- starwars %>% na.omit()
# Ubah height dan mass menjadi numerik
data <- data %>%
mutate(
height = as.numeric(height),
mass = as.numeric(mass)
)
# Akses peubah "height" dan "mass"
data_selected <- data %>% select(height, mass)
# Tampilkan hasil
print(data_selected)
## # A tibble: 29 × 2
## height mass
## <dbl> <dbl>
## 1 172 77
## 2 202 136
## 3 150 49
## 4 178 120
## 5 165 75
## 6 183 84
## 7 182 77
## 8 188 84
## 9 228 112
## 10 180 80
## # ℹ 19 more rows
# Menghitung rata-rata untuk setiap peubah dalam data
mean_height <- mean(data_selected$height, na.rm = TRUE)
mean_mass <- mean(data_selected$mass, na.rm = TRUE)
print(paste("Mean Height:", mean_height))
## [1] "Mean Height: 178.655172413793"
print(paste("Mean Mass:", mean_mass))
## [1] "Mean Mass: 77.7724137931035"
# Menghitung deviasi standar untuk setiap peubah dalam data
sd_height <- sd(data_selected$height, na.rm = TRUE)
sd_mass <- sd(data_selected$mass, na.rm = TRUE)
print(paste("Standard Deviation of Height:", sd_height))
## [1] "Standard Deviation of Height: 22.3994194154175"
print(paste("Standard Deviation of Mass:", sd_mass))
## [1] "Standard Deviation of Mass: 23.0858474975428"
# Load library yang dibutuhkan
library(e1071) # Untuk menghitung skewness
## Warning: package 'e1071' was built under R version 4.4.2
# Menghitung Q1, Median (Q2), dan Q3 untuk height
Q1_height <- quantile(data_selected$height, 0.25, na.rm = TRUE)
Q2_height <- median(data_selected$height, na.rm = TRUE)
Q3_height <- quantile(data_selected$height, 0.75, na.rm = TRUE)
# Menghitung Q1, Median (Q2), dan Q3 untuk mass
Q1_mass <- quantile(data_selected$mass, 0.25, na.rm = TRUE)
Q2_mass <- median(data_selected$mass, na.rm = TRUE)
Q3_mass <- quantile(data_selected$mass, 0.75, na.rm = TRUE)
# Menghitung Skewness
skewness_height <- skewness(data_selected$height, na.rm = TRUE)
skewness_mass <- skewness(data_selected$mass, na.rm = TRUE)
# Menghitung JAK (Jangkauan Antar Kuartil / IQR)
JAK_height <- Q3_height - Q1_height
JAK_mass <- Q3_mass - Q1_mass
# Menghitung Varians
var_height <- var(data_selected$height, na.rm = TRUE)
var_mass <- var(data_selected$mass, na.rm = TRUE)
# Menampilkan hasil
print(paste("Q1 Height:", Q1_height))
## [1] "Q1 Height: 172"
print(paste("Q2 (Median) Height:", Q2_height))
## [1] "Q2 (Median) Height: 180"
print(paste("Q3 Height:", Q3_height))
## [1] "Q3 Height: 188"
print(paste("IQR Height:", JAK_height))
## [1] "IQR Height: 16"
print(paste("Skewness Height:", skewness_height))
## [1] "Skewness Height: -1.91075030371317"
print(paste("Variance Height:", var_height))
## [1] "Variance Height: 501.733990147783"
print(paste("Q1 Mass:", Q1_mass))
## [1] "Q1 Mass: 75"
print(paste("Q2 (Median) Mass:", Q2_mass))
## [1] "Q2 (Median) Mass: 79"
print(paste("Q3 Mass:", Q3_mass))
## [1] "Q3 Mass: 83"
print(paste("IQR Mass:", JAK_mass))
## [1] "IQR Mass: 8"
print(paste("Skewness Mass:", skewness_mass))
## [1] "Skewness Mass: 0.170225022331694"
print(paste("Variance Mass:", var_mass))
## [1] "Variance Mass: 532.956354679803"
par(mfrow=c(1,2)) # Bagi area plotting jadi 1 baris, 2 kolom
hist(data_selected$height, main="Histogram Height", xlab="Height", col="skyblue", border="black")
hist(data_selected$mass, main="Histogram Mass", xlab="Mass", col="lightcoral", border="black")
par(mfrow=c(1,1)) # Reset layout ke default
# Q-Q plot untuk height
qqnorm(data_selected$height, col = "blue", main = "Q-Q Plot Height")
qqline(data_selected$height, col = "red")
qqnorm(data_selected$mass, col = "blue", main = "Q-Q Plot Mass")
qqline(data_selected$mass, col = "red")
# Ambil variabel height dan mass
height = data_selected$height
mass = data_selected$mass
# Uji Kolmogorov-Smirnov untuk height
ks_test_height = ks.test(height, "pnorm", mean = mean(height), sd = sd(height))
## Warning in ks.test.default(height, "pnorm", mean = mean(height), sd =
## sd(height)): ties should not be present for the one-sample Kolmogorov-Smirnov
## test
print(ks_test_height)
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: height
## D = 0.21167, p-value = 0.1487
## alternative hypothesis: two-sided
\[ H_0: \text{Data berasal dari distribusi normal} \]
Hipotesis Alternatif
\[ H_1: \text{Data tidak berasal dari distribusi normal} \]
P-Value = 0.1487 > 0.05, maka tak tolak \(H_0\). Artinya, data tidak berasal dari distribusi normal
# Uji Kolmogorov-Smirnov untuk mass
ks_test_mass = ks.test(mass, "pnorm", mean = mean(mass), sd = sd(mass))
## Warning in ks.test.default(mass, "pnorm", mean = mean(mass), sd = sd(mass)):
## ties should not be present for the one-sample Kolmogorov-Smirnov test
print(ks_test_mass)
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: mass
## D = 0.25574, p-value = 0.04504
## alternative hypothesis: two-sided
\[ H_0: \text{Data berasal dari distribusi normal} \]
Hipotesis Alternatif
\[ H_1: \text{Data tidak berasal dari distribusi normal} \]
P-Value = 0.04504 < 0.05, maka tak tolak \(H_0\). Artinya, data berasal dari distribusi normal
# Load library ggplot2 untuk visualisasi
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
# Boxplot untuk peubah height
ggplot(data, aes(y = height)) +
geom_boxplot(fill = "skyblue") +
ggtitle("Boxplot Height") +
theme_minimal()
# Boxplot untuk peubah mass
ggplot(data, aes(y = mass)) +
geom_boxplot(fill = "lightcoral") +
ggtitle("Boxplot Mass") +
theme_minimal()
# Scatter plot untuk height
ggplot(data_selected, aes(x = 1:nrow(data_selected), y = height)) +
geom_point(color = "blue", alpha = 0.6) +
labs(title = "Scatter Plot: Height",
x = "Index",
y = "Height (cm)") +
theme_minimal()
# Scatter plot untuk mass
ggplot(data_selected, aes(x = 1:nrow(data_selected), y = mass)) +
geom_point(color = "green", alpha = 0.6) +
labs(title = "Scatter Plot: Mass",
x = "Index",
y = "Mass (kg)") +
theme_minimal()
library(robustbase)
## Warning: package 'robustbase' was built under R version 4.4.3
mc_height <- mc(data_selected$height)
## The default of 'doScale' is FALSE now for stability;
## set options(mc_doScale_quiet=TRUE) to suppress this (once per session) message
print(mc_height)
## [1] 0
# Membuat adjusted boxplot untuk height
adjbox(data_selected$height, main = "Adjusted Boxplot of Height", col = "lightblue")
Rosner’s Test atau Generalized ESD many-outliers test(GESD) digunakan untuk pencilan ganda(atas dan/atau bawah) dalam data univariat. Uji ini akurat untuk mendeteksi 10 pencilan dengan jumlah sampel minimal 15 dan data setelah menghilangkan pencilan berdistribusi normal. Pada uji ini, diperlukan parameter k untuk menentukan jumlah maksimal pencilan.
\(H_0\) : Tidak ada pencilan \(H_1\) : Ada pencilan
library(EnvStats)
## Warning: package 'EnvStats' was built under R version 4.4.3
##
## Attaching package: 'EnvStats'
## The following objects are masked from 'package:e1071':
##
## kurtosis, skewness
## The following objects are masked from 'package:stats':
##
## predict, predict.lm
# Uji Rosner untuk mass dengan 10 pencilan
rosner_mass <- rosnerTest(data_selected$mass, k = 10)
# Menampilkan hasil uji
rosner_mass$all.stats
## i Mean.i SD.i Value Obs.Num R.i+1 lambda.i+1 Outlier
## 1 0 77.77241 23.085847 136.0 2 2.522220 2.892705 TRUE
## 2 1 75.69286 20.558173 20.0 18 2.709037 2.876209 TRUE
## 3 2 77.75556 17.753295 120.0 4 2.379527 2.858923 TRUE
## 4 3 76.13077 15.926626 113.0 14 2.314943 2.840774 TRUE
## 5 4 74.65600 14.329052 112.0 9 2.606174 2.821681 TRUE
## 6 5 73.10000 12.291814 45.0 19 2.286074 2.801551 TRUE
## 7 6 74.32174 10.977454 49.0 3 2.306704 2.780277 TRUE
## 8 7 75.47273 9.711955 50.0 27 2.622822 2.757735 TRUE
## 9 8 76.68571 8.065376 55.0 22 2.688742 2.733780 TRUE
## 10 9 77.77000 6.518080 56.2 26 3.309257 2.708246 TRUE
Trimmed Mean adalah nilai rataan yang dicari setelah dataset dipangkas sebanyak \(\alpha\)%. Pada trimmed mean, nilai terbesar dan terkecil yang masuk dalam rentang \(\alpha\)% akan dipangkas terlebih dahulu.
# Menghitung rata-rata biasa
rataan_biasa_height <- mean(data_selected$height, na.rm = TRUE)
# Menghitung rata-rata terpangkas 10% (trim = 0.1)
rataan_terpangkas_height <- mean(data_selected$height, trim = 0.1, na.rm = TRUE)
# Menampilkan hasil
cat("Rataan Biasa :", rataan_biasa_height, "\n")
## Rataan Biasa : 178.6552
cat("Rataan Terpangkas (10%) :", rataan_terpangkas_height, "\n")
## Rataan Terpangkas (10%) : 180.52
# Menghitung rata-rata biasa
rataan_biasa_mass <- mean(data_selected$mass, na.rm = TRUE)
# Menghitung rata-rata terpangkas 10% (trim = 0.1)
rataan_terpangkas_mass <- mean(data_selected$mass, trim = 0.1, na.rm = TRUE)
# Menampilkan hasil
cat("Rataan Biasa :", rataan_biasa_mass, "\n")
## Rataan Biasa : 77.77241
cat("Rataan Terpangkas (10%) :", rataan_terpangkas_mass, "\n")
## Rataan Terpangkas (10%) : 77.376
# Menambahkan amatan baru
data_selected <- rbind(data_selected, data.frame(height = 210, mass = 100))
# Boxplot height
boxplot(data_selected$height, main = "Boxplot of Height", col = "lightblue")
# Boxplot mass
boxplot(data_selected$mass, main = "Boxplot of Mass", col = "lightgreen")
# Menghitung Q1, Q3, dan IQR untuk height
Q1_height <- quantile(data_selected$height, 0.25, na.rm = TRUE)
Q3_height <- quantile(data_selected$height, 0.75, na.rm = TRUE)
IQR_height <- Q3_height - Q1_height
lower_bound_height <- Q1_height - 1.5 * IQR_height
upper_bound_height <- Q3_height + 1.5 * IQR_height
# Cek apakah height = 210 termasuk pencilan
is_outlier_height <- (210 < lower_bound_height) | (210 > upper_bound_height)
cat("Apakah height = 210 pencilan?", is_outlier_height, "\n")
## Apakah height = 210 pencilan? FALSE
# Menghitung Q1, Q3, dan IQR untuk mass
Q1_mass <- quantile(data_selected$mass, 0.25, na.rm = TRUE)
Q3_mass <- quantile(data_selected$mass, 0.75, na.rm = TRUE)
IQR_mass <- Q3_mass - Q1_mass
lower_bound_mass <- Q1_mass - 1.5 * IQR_mass
upper_bound_mass <- Q3_mass + 1.5 * IQR_mass
# Cek apakah mass = 100 termasuk pencilan
is_outlier_mass <- (100 < lower_bound_mass) | (100 > upper_bound_mass)
cat("Apakah mass = 100 pencilan?", is_outlier_mass, "\n")
## Apakah mass = 100 pencilan? TRUE