Установка пакетов
Разведочный анализ данных (EDA)
ggpairs(iris, aes(color = Species, alpha = 0.4)) +
ggtitle("Pairplot of Iris Dataset")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(melt(iris, id.vars = "Species"), aes(x=variable, y=value, fill=Species)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
ggtitle("Boxplots of Iris Features by Species")

Бутстрап
# Функция бутстрепа
bootstrap <- function(data, n_bootstraps = 1000, statistic = mean, ci = 95) {
boot_samples <- replicate(n_bootstraps, {
sample_data <- sample(data, size = length(data), replace = TRUE)
statistic(sample_data)
})
lower <- quantile(boot_samples, (100 - ci)/200)
upper <- quantile(boot_samples, 1 - (100 - ci)/200)
mean_val <- mean(boot_samples)
list(samples = boot_samples, mean = mean_val, ci = c(lower, upper))
}
# Применение к длине лепестков (Petal.Length)
versicolor <- iris[iris$Species == "versicolor", "Petal.Length"]
result <- bootstrap(versicolor)
# Визуализация
hist(result$samples, breaks = 30, main = "Bootstrap Distribution of Mean Petal Length for Versicolor",
xlab = "Mean Petal Length", col = "lightblue", border = "black")
abline(v = result$mean, col = "red", lty = 2, lwd = 2)
abline(v = result$ci[1], col = "green", lty = 2, lwd = 2)
abline(v = result$ci[2], col = "green", lty = 2, lwd = 2)
legend("topright", legend = c(paste("Bootstrap Mean:", round(result$mean, 2)),
paste("95% CI: (", round(result$ci[1], 2), ",", round(result$ci[2], 2), ")")),
col = c("red", "green"), lty = 2, lwd = 2)

cat("Original mean:", mean(versicolor), "\n")
## Original mean: 4.26
cat("Bootstrap mean:", result$mean, "\n")
## Bootstrap mean: 4.256592
cat("95% CI:", result$ci, "\n")
## 95% CI: 4.116 4.38205