Установка пакетов

Разведочный анализ данных (EDA)

ggpairs(iris, aes(color = Species, alpha = 0.4)) + 
  ggtitle("Pairplot of Iris Dataset")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(melt(iris, id.vars = "Species"), aes(x=variable, y=value, fill=Species)) +
  geom_boxplot() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  ggtitle("Boxplots of Iris Features by Species")

Бутстрап

# Функция бутстрепа
bootstrap <- function(data, n_bootstraps = 1000, statistic = mean, ci = 95) {
  boot_samples <- replicate(n_bootstraps, {
    sample_data <- sample(data, size = length(data), replace = TRUE)
    statistic(sample_data)
  })
  
  lower <- quantile(boot_samples, (100 - ci)/200)
  upper <- quantile(boot_samples, 1 - (100 - ci)/200)
  mean_val <- mean(boot_samples)
  
  list(samples = boot_samples, mean = mean_val, ci = c(lower, upper))
}

# Применение к длине лепестков (Petal.Length)
versicolor <- iris[iris$Species == "versicolor", "Petal.Length"]
result <- bootstrap(versicolor)

# Визуализация
hist(result$samples, breaks = 30, main = "Bootstrap Distribution of Mean Petal Length for Versicolor",
     xlab = "Mean Petal Length", col = "lightblue", border = "black")
abline(v = result$mean, col = "red", lty = 2, lwd = 2)
abline(v = result$ci[1], col = "green", lty = 2, lwd = 2)
abline(v = result$ci[2], col = "green", lty = 2, lwd = 2)
legend("topright", legend = c(paste("Bootstrap Mean:", round(result$mean, 2)),
                            paste("95% CI: (", round(result$ci[1], 2), ",", round(result$ci[2], 2), ")")),
       col = c("red", "green"), lty = 2, lwd = 2)

cat("Original mean:", mean(versicolor), "\n")
## Original mean: 4.26
cat("Bootstrap mean:", result$mean, "\n")
## Bootstrap mean: 4.256592
cat("95% CI:", result$ci, "\n")
## 95% CI: 4.116 4.38205