set.seed(123)
data <- rexp(100, rate = 1)
median(data)
## [1] 0.847754
set.seed(123)
data <- rnorm(100)
mean_data <- mean(data)
sd_data <- sd(data)
# Hitung berapa banyak data di luar ±2 SD
outliers <- sum(data < (mean_data - 2 * sd_data) | data > (mean_data + 2 * sd_data))
outliers
## [1] 5
5.Simulasikan 100 data dari distribusi Poisson dengan λ = 4. Berapa nilai modus dari data yang dihasilkan? Gunakan set.seed(123)
set.seed(123)
data <- rpois(100, lambda = 4)
# Hitung modus (nilai yang paling sering muncul)
modus <- as.numeric(names(sort(table(data), decreasing = TRUE)[1]))
print(modus)
## [1] 2
set.seed(123)
data <- rnorm(100, mean = 0, sd = 1)
data_filtered <- data[data > 1]
length(data_filtered)
## [1] 17
set.seed(123)
data <- sample(c("A", "B", "C"), size = 100, replace = TRUE, prob = c(0.2, 0.3, 0.5))
# Hitung jumlah kategori "B"
sum(data == "B")
## [1] 29
8.Manakah kode berikut yang benar untuk membuat model regresi linear dari y terhadap x?
# Buat data
set.seed(123)
x <- 1:10
y <- 2 * x + rnorm(10)
# Buat model regresi linear
reg <- lm(y ~ x)
# Tampilkan ringkasan model
summary(reg)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.1348 -0.5624 -0.1393 0.3854 1.6814
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.5255 0.6673 0.787 0.454
## x 1.9180 0.1075 17.835 1e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9768 on 8 degrees of freedom
## Multiple R-squared: 0.9755, Adjusted R-squared: 0.9724
## F-statistic: 318.1 on 1 and 8 DF, p-value: 1e-07
set.seed(123)
x <- c(5, 8, 12, 13, 15, 18, 21)
# Lakukan bootstrap sampling sebanyak 10.000 kali
bootstrap_medians <- replicate(10000, {
sample_data <- sample(x, length(x), replace = TRUE)
median(sample_data)
})
# Hitung interval kepercayaan 95%
ci <- quantile(bootstrap_medians, c(0.025, 0.975))
ci
## 2.5% 97.5%
## 8 18
set.seed(123)
# Data
x <- 1:10
y <- 2 * x + rnorm(10, 0, 1)
# Membuat model regresi linear
model <- lm(y ~ x)
# Menampilkan koefisien dari model
coef(model)
## (Intercept) x
## 0.5254674 1.9180288
set.seed(123)
# Data
x <- 1:10
y <- 2 * x + rnorm(10, 0, 1)
# Membuat model regresi linear
model <- lm(y ~ x)
# Menampilkan koefisien dari model
coef(model)
## (Intercept) x
## 0.5254674 1.9180288
set.seed(123)
# Membuat 100 data acak dari distribusi binomial
data <- rbinom(100, size = 10, prob = 0.3)
# Menghitung rata-rata dari data
mean(data)
## [1] 3.02
12.Diberikan data:x <- rnorm(100)y <- 2*x + rnorm(100). Manakah kode berikut yang benar untuk melihat ringkasan dari model regresi?
summary(lm(y ~ x))
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.1348 -0.5624 -0.1393 0.3854 1.6814
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.5255 0.6673 0.787 0.454
## x 1.9180 0.1075 17.835 1e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9768 on 8 degrees of freedom
## Multiple R-squared: 0.9755, Adjusted R-squared: 0.9724
## F-statistic: 318.1 on 1 and 8 DF, p-value: 1e-07
set.seed(123)
# Simulasi 1000 percobaan dengan n = 10 dan p = 0.3
data <- rbinom(1000, size = 10, prob = 0.3)
# Menghitung rata-rata jumlah sukses
mean(data)
## [1] 2.989
set.seed(123)
# Membuat 100 data acak dari distribusi normal
data <- rnorm(100, mean = 70, sd = 5)
# Menghitung nilai maksimum dari data
max(data)
## [1] 80.93666
set.seed(123)
# Membuat 100 data acak dari distribusi normal
data <- rnorm(100, mean = 50, sd = 10)
# Menghitung standar deviasi dari data
sd(data)
## [1] 9.128159
set.seed(123)
# Simulasi 100 data dari distribusi uniform antara 20 dan 80
data <- runif(100, min = 20, max = 80)
# Menghitung range (max - min) dari data
range_value <- max(data) - min(data)
range_value
## [1] 59.6187
rnorm(100, 70, 15)
## [1] 73.79978 69.57180 69.35694 90.52903 66.61344 92.74706 46.76871
## [8] 78.76921 71.85781 73.23912 75.69459 62.46515 65.00189 54.72137
## [15] 53.92313 74.55293 76.72315 70.79506 83.83401 100.75127 62.63453
## [22] 35.36247 85.08608 59.36199 59.67987 85.38357 65.72840 51.68923
## [29] 72.71955 67.91663 70.08646 75.77921 64.44010 79.66565 66.69270
## [36] 74.97673 86.45259 76.52772 65.11103 87.23211 84.90256 78.22595
## [43] 73.58098 60.58141 90.40979 60.99611 102.80999 92.98916 66.46449
## [50] 54.60369 59.34390 73.85326 66.29962 64.78686 55.72572 69.32458
## [57] 58.22643 44.98087 64.29660 83.78495 61.36980 79.11946 45.73176
## [64] 69.16657 77.79111 74.51730 71.58514 60.38941 57.25443 54.63807
## [71] 71.76470 55.78788 62.64164 66.15862 97.65793 60.22075 73.53080
## [78] 71.16941 55.57215 68.93038 91.66826 76.77256 70.61849 63.66255
## [85] 39.20129 86.97006 48.09040 81.09921 98.63655 48.34160 80.52677
## [92] 66.06704 46.41784 47.27999 45.97696 62.03640 48.07367 80.31875
## [99] 101.50163 50.69454
set.seed(123)
# Data
x <- c(5, 7, 8, 10, 12)
# Lakukan bootstrap 1000 kali
bootstrap_means <- replicate(1000, {
sample_data <- sample(x, length(x), replace = TRUE)
mean(sample_data)
})
# Menghitung rata-rata dari hasil bootstrap
mean(bootstrap_means)
## [1] 8.3572
set.seed(42)
# Simulasi data
x1 <- rnorm(100)
x2 <- x1 + rnorm(100, 0, 0.01) # x2 sangat berkorelasi dengan x1
x3 <- rnorm(100)
y <- 3 + 2*x1 - 1*x3 + rnorm(100) # y bergantung pada x1 dan x3
# Membuat model regresi
model <- lm(y ~ x1 + x2 + x3)
# Melihat ringkasan dari model
summary(model)
##
## Call:
## lm(formula = y ~ x1 + x2 + x3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.7944 -0.5867 -0.1038 0.6188 2.3280
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.03150 0.08914 34.007 <2e-16 ***
## x1 1.17483 9.89434 0.119 0.906
## x2 0.88292 9.89031 0.089 0.929
## x3 -1.03161 0.08882 -11.614 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8867 on 96 degrees of freedom
## Multiple R-squared: 0.8927, Adjusted R-squared: 0.8893
## F-statistic: 266.2 on 3 and 96 DF, p-value: < 2.2e-16
set.seed(123)
# Simulasi 50 data dari N(100, 15)
data1 <- rnorm(50, mean = 100, sd = 15)
# Simulasi 50 data dari N(80, 10)
data2 <- rnorm(50, mean = 80, sd = 10)
# Gabungkan kedua data
combined_data <- c(data1, data2)
# Hitung rata-rata dari seluruh data gabungan
mean_combined_data <- mean(combined_data)
# Menampilkan rata-rata
mean_combined_data
## [1] 90.99007