set.seed(0)
uniforme <- runif(1000, min = 0, max = 1)
exponentielle <- rexp(1000, rate = 2)
par(mfrow = c(1, 2))
hist(uniforme, probability = TRUE, main = "1000 réalisations de la loi U(0, 1)",
xlab = "x", col = "#02B4A1", breaks = 10)
curve(dunif(x, 0, 1), add = TRUE, col = "red", lwd = 2)
hist(exponentielle, probability = TRUE, main = "1000 réalisations de la loi Exp(2)",
xlab = "x", col = "#02B4A1", breaks = 15)
curve(dexp(x, rate = 2), add = TRUE, col = "red", lwd = 2)
La densité de la loi normale centrée réduite en 0 est : \[\frac{1}{\sqrt{2\pi}}\approx 0.3989\]
dens_norm <- dnorm(0, mean = 0, sd = 1)
cat("Densité de N(0,1) en 0 :", dens_norm)
## Densité de N(0,1) en 0 : 0.3989423
Pour la loi binomiale B(10, 0.5) : \[0.5^{10}\approx 0.000977\]
dens_binom <- dbinom(0, size = 10, prob = 0.5)
cat("Densité de B(10, 0.5) en 0 :", dens_binom)
## Densité de B(10, 0.5) en 0 : 0.0009765625
x <- seq(-4, 4, length.out = 1000)
y <- dnorm(x, mean = 0, sd = 1)
plot(x, y, type = "l", lwd = 2, col = "black",
main = "Densité de la loi normale centrée réduite N(0,1)",
xlab = "x", ylab = "f(x)", las = 1)
grid()
sample <- rnorm(10000, mean = 0, sd = 1)
hist(sample, probability = TRUE, breaks = 50,
main = "10000 réalisations de la loi N(0,1)",
xlab = "x", ylab = "Densité", col = "#02B4A1")
# Superposer la densité théorique
x_dens <- seq(-4, 4, length.out = 1000)
lines(x_dens, dnorm(x_dens, 0, 1), col = "red", lwd = 2)
legend("topright", legend = "Densité théorique", col = "red", lwd = 2)
La fonction de répartition de la loi normale centrée réduite en 0 est :
F_0 <- pnorm(0, mean = 0, sd = 1)
cat("F(0) pour N(0,1) :", F_0, "\n")
## F(0) pour N(0,1) : 0.5
x <- seq(-4, 4, length.out = 1000)
F_x <- pnorm(x, mean = 0, sd = 1)
plot(x, F_x, lwd = 2,
main = "Fonction de répartition de N(0,1)",
xlab = "x", ylab = "F(x)")
exp <- rexp(100, rate = 1)
ecdf <- ecdf(exp)
x_plot <- seq(0, max(exp), length.out = 1000)
plot(ecdf, main = "Fonction de répartition de Exp(1)",
xlab = "x", ylab = "F(x)", col = "#02B4A1", lwd = 3)
lines(x_plot, pexp(x_plot, rate = 1), col = "red", lwd = 3)
legend("bottomright", legend = c("Empirique", "Théorique)"),
col = c("#02B4A1", "red"), lwd = 3)
simulate_X <- function(n) {
sample(c(0, 1, 3), size = n, replace = TRUE, prob = c(0.5, 0.25, 0.25))
}
test_sample <- simulate_X(10)
sequence <- simulate_X(10000)
cumulative_means <- cumsum(sequence) / (1:10000)
plot(1:10000, cumulative_means, type = "l", col = "#02B4A1",
main = "Simulation de Xn",
xlab = "n", ylab = expression(bar(X)[n]), ylim = c(0, 2))
E_X <- 0 * 0.5 + 1 * 0.25 + 3 * 0.25
abline(h = E_X, col = "red", lwd = 2)
legend("topright", legend = c(expression(bar(X)[n]), "E[X] = 1"),
col = c("#02B4A1", "red"), lwd = 2)
On constate que \[\displaystyle \lim_{n \to \infty} \overline{X}_n = \mathbb{E}[X]\]
cat("Espérance théorique E[X] =", E_X)
## Espérance théorique E[X] = 1
cat("Moyenne empirique finale :", tail(cumulative_means, 1))
## Moyenne empirique finale : 0.9872
Pour n = 500 , on a : \[ \overline{X}_{500} \sim \mathcal{N}\left(\mathbb{E}[X], \dfrac{\mathrm{Var}(X)}{n}\right) \] et donc \[ \frac{\overline{X}_{500} - \mathbb{E}[X]}{\sqrt{\frac{\mathrm{Var}(X)}{n}}} \sim \mathcal{N}(0, 1) \] On a donc : \[ a = \sqrt{\dfrac{n}{\mathrm{Var}(X)}} \quad \text{et} \quad b = -\mathbb{E}[X] \]
Var_X <- 0^2 * 0.5 + 1^2 * 0.25 + 3^2 * 0.25 - E_X^2
a <- sqrt(500 / Var_X)
b <- -E_X
cat("Y =", a, "* (̄x_500-1)")
## Y = 18.25742 * (̄x_500-1)
Y_simulations <- numeric(10000)
for (i in 1:10000) {
X_sample <- simulate_X(500)
X_bar <- mean(X_sample)
Y_simulations[i] <- a * (X_bar + b)
}
cat("Moyenne de Y :", mean(Y_simulations))
## Moyenne de Y : 0.005743784
cat("Variance de Y :", var(Y_simulations))
## Variance de Y : 1.029102
Les valeurs simulées se rapprochent des valeurs de N(0, 1).
hist(Y_simulations, probability = TRUE, breaks = 50,
main = "Comparaison avec N(0, 1)",
xlab = "Y", ylab = "Densité", col = "#02B4A1")
# Superposer la densité N(0,1)
x_norm <- seq(-4, 4, length.out = 1000)
lines(x_norm, dnorm(x_norm, 0, 1), col = "red", lwd = 2)
legend("topright", legend = "N(0,1)",
col = "red", lwd = 2)
Les simulations suivent approximativement N(0, 1). On trouve un
décalage car l’échantillon reste relativement petit.
On choisit : \[\theta = 5\]
theta <- 5
n <- 100
N <- 1000
T1 <- numeric(N)
T2 <- numeric(N)
T3 <- numeric(N)
T4 <- numeric(N)
T5 <- numeric(N)
T6 <- numeric(N)
T7 <- numeric(N)
T8 <- numeric(N)
for (i in 1:N) {
X <- runif(n, min = 0, max = theta)
T1[i] <- (2/n) * sum(X)
T2[i] <- sqrt((3/n) * sum(X^2))
T3[i] <- ((4/n) * sum(X^3))^(1/3)
T4[i] <- ((3/(2*n)) * sum(sqrt(X)))^2
T5[i] <- ((1/(2*n)) * sum(1/sqrt(X)))^(-2)
T6[i] <- exp(1) * (prod(X))^(1/n)
T7[i] <- max(X)
T8[i] <- ((n+1)/n) * max(X)
}
T <- cbind(T1, T2, T3, T4, T5, T6, T7, T8)
T_df <- as.data.frame(T)
moyennes <- colMeans(T)
variances <- apply(T, 2, var)
biais <- moyennes - theta
EQM <- variances + biais^2
resultats <- data.frame(
Estimateur = colnames(T),
Moyenne = moyennes,
Variance = variances,
Biais = biais,
EQM = EQM
)
print(resultats, digits = 4)
## Estimateur Moyenne Variance Biais EQM
## T1 T1 4.997 0.085090 -0.003408 0.085102
## T2 T2 4.991 0.052026 -0.009401 0.052114
## T3 T3 4.989 0.037587 -0.010991 0.037708
## T4 T4 5.006 0.126117 0.005817 0.126151
## T5 T5 5.278 1.258047 0.278350 1.335526
## T6 T6 5.033 0.246873 0.033104 0.247969
## T7 T7 4.948 0.002538 -0.052280 0.005272
## T8 T8 4.997 0.002590 -0.002803 0.002597
L’estimateur le moins biaisé et avec une erreur quadratique la plus basse est T8.
boxplot(data.frame(T))
abline(h = theta, col = "red")
diveibull <- function(x, theta) {
ifelse(x >= 0, theta * x^(theta-1) * exp(-x^theta), 0)
}
x <- seq(0, 5, length.out = 1000)
theta_values <- c(0.5, 1, 2, 3)
plot(x, diveibull(x, theta_values[1]), type = "l", lwd = 2, col = "red",
ylim = c(0, 1.5), xlab = "x", ylab = "Densité",
main = "Densités de la loi de Weibull")
lines(x, diveibull(x, theta_values[2]), lwd = 2, col = "blue")
lines(x, diveibull(x, theta_values[3]), lwd = 2, col = "green")
lines(x, diveibull(x, theta_values[4]), lwd = 2, col = "black")
legend("topright",
legend = c("θ = 0.5", "θ = 1", "θ = 2", "θ = 3"),
col = c("red", "blue", "green", "black"),
lwd = 2, bty = "n")
data <- scan("weibull.txt", sep = ",")
log_vraisemblance <- function(theta, x) {
sum(log(theta) + (theta - 1) * log(x) - x^theta)
}
theta_grid <- seq(0.1, 5, length.out = 1000)
log_vrai_values <- sapply(theta_grid, function(th) log_vraisemblance(th, data))
theta_hat <- theta_grid[which.max(log_vrai_values)]
cat("Estimateur du maximum de vraisemblance θ =", round(theta_hat, 4), "\n")
## Estimateur du maximum de vraisemblance θ = 2.9596
hist(data, breaks = 30, probability = TRUE,
col = "#02B4A1", border = "white",
xlab = "Durée de vie (milliers d'heures)",
ylab = "Densité",
main = "Densité de Weibull estimée")
x_fit <- seq(min(data), max(data), length.out = 1000)
lines(x_fit, diveibull(x_fit, theta_hat),
col = "red", lwd = 2)
legend("topright",
legend = c(paste("Densité Weibull (θ =", round(theta_hat, 3), ")"), "Données"),
col = c("red", "#02B4A1"),
lwd = c(2, 5), lty = c(1, 1),
bty = "n")