¿Cuántos de los puntos están dentro del círculo? ¿Cuál es su estimación de \(π\)?
#set.seed(1032495899)
x <- runif(10000000, min=0, max=1)
set.seed(19953123)
y <- runif(10000000, min=0, max=1)
# Define the center
center_x <- 0.5
center_y <- 0.5
# Calculate the squared distance
squared_distance <- (x - center_x)^2 + (y - center_y)^2
# Find the points with squared distance less than 0.25
indices <- which(squared_distance < 0.25)
# Count the number of points inside the circle
num_points_inside_circle <- length(indices)
# Total number of points
total_points <- length(x)
# Approximate pi
pi_approximation <- 4 * num_points_inside_circle / total_points
# Calculate the difference
error_relativo <- round(((abs(pi - pi_approximation))/pi)*100,4)En busca de obtener un valor más robusto se muestra a continuación la aproximación obtenida a partir de 20 iteraciones.
set.seed(1204395)
# Initialize vectors to store the approximations and differences
pi_approximations <- numeric(20)
points_inside <- numeric(20)
differences <- numeric(20)
# Loop 20 times
for (i in 1:20) {
# Your existing vectors
x <- runif(10000000, min=0, max=1)
y <- runif(10000000, min=0, max=1)
# Define the center
center_x <- 0.5
center_y <- 0.5
# Calculate the squared distance
squared_distance <- (x - center_x)^2 + (y - center_y)^2
# Find the points with squared distance less than 0.25
indices <- which(squared_distance < 0.25)
# Count the number of points inside the circle
num_points_inside_circle <- length(indices)
# Total number of points
total_points <- length(x)
# Approximate pi
pi_approximation <- 4 * num_points_inside_circle / total_points
# Calculate the difference
difference <- abs(pi - pi_approximation)
# Store the approximation and difference
pi_approximations[i] <- pi_approximation
points_inside[i] <- num_points_inside_circle
differences[i] <- difference
}
# Calculate the mean, standard deviation, and mean difference
mean_approximation <- mean(pi_approximations)
sd_approximation <- round(sd(pi_approximations),4)
mean_difference <- mean(differences)
error_relativo <- round(((abs(pi - mean_approximation))/pi)*100,4)
mean_points <- round(mean(points_inside),0)
Sean \(X\_{1}, X\_{2}, X\_{3}\) y \(X\_{4}\), una muestra aleatoria de tamaño \(n=4\) cuya población la conforma una distribución exponencial con parámetro \(\theta\) desconocido. Determine las características de cada uno de los siguientes estimadores propuestos:
Suponga un valor para el parámetro \(\theta\) y genere una muestras de \(n=20, 50, 100\) y \(1000\) para cada uno de los estimadores planteados.
\(\theta=2\)
set.seed(1204395)
n<-4
m<-20*n
theta<-2
lambda<-1/theta
Y <- matrix(rexp(m, lambda), ncol = n)
Max=apply(Y,1,max)
Min=apply(Y,1,min)
theta1 <- apply(Y, 1, function(x) (x[1] + x[2])/6 + (x[3] + x[4])/3)
theta2 <- apply(Y, 1, function(x) (x[1] + (2*x[2]) + (3*x[3]) + (4*x[4]))/5)
theta3 <- apply(Y, 1, function(x) (x[1] + x[2] + x[3] + x[4])/4)
theta4 <- (Min+Max)/2
T1234 <- data.frame(theta1, theta2, theta3, theta4)
boxplot(T1234, las=1, main="Comparación estimadores con n=20", ylim=c(0,10))
abline(h=2, col="red") # Calculate the mean and standard deviation for each estimator
mean_values <- round(apply(T1234, 2, mean),4)
sd_values <- round(apply(T1234, 2, sd),4)
# Combine the mean and standard deviation into a single data frame
summary_table <- rbind(mean_values, sd_values)
# Convert to a data frame and add row names
summary_table <- as.data.frame(summary_table)
rownames(summary_table) <- c("mean", "sd")
# Print the table using kable
knitr::kable(summary_table, caption = "Resumen de estadísticas para estimadores con n=20")| theta1 | theta2 | theta3 | theta4 | |
|---|---|---|---|---|
| mean | 2.0394 | 4.0265 | 2.2087 | 2.6726 |
| sd | 0.7364 | 1.6521 | 0.7820 | 1.0911 |
set.seed(1204395)
n<-4
m<-50*n
theta<-2
lambda<-1/theta
Y <- matrix(rexp(m, lambda), ncol = n)
Max<-apply(Y,1,max)
Min<-apply(Y,1,min)
theta1 <- apply(Y, 1, function(x) (x[1] + x[2])/6 + (x[3] + x[4])/3)
theta2 <- apply(Y, 1, function(x) (x[1] + (2*x[2]) + (3*x[3]) + (4*x[4]))/5)
theta3 <- apply(Y, 1, function(x) (x[1] + x[2] + x[3] + x[4])/4)
theta4 <- (Min+Max)/2
T1234=data.frame(theta1, theta2, theta3, theta4)
boxplot(T1234, las=1, main="Comparación estimadores con n=50", ylim=c(0,10))
abline(h=2, col="red") # Calculate the mean and standard deviation for each estimator
mean_values <- round(apply(T1234, 2, mean),4)
sd_values <- round(apply(T1234, 2, sd),4)
# Combine the mean and standard deviation into a single data frame
summary_table <- rbind(mean_values, sd_values)
# Convert to a data frame and add row names
summary_table <- as.data.frame(summary_table)
rownames(summary_table) <- c("mean", "sd")
# Print the table using kable
knitr::kable(summary_table, caption = "Resumen de estadísticas para estimadores con n=50")| theta1 | theta2 | theta3 | theta4 | |
|---|---|---|---|---|
| mean | 1.9879 | 3.9075 | 2.0456 | 2.4021 |
| sd | 0.8364 | 1.7930 | 0.9236 | 1.1015 |
n<-4
m<-100*n
theta<-2
lambda<-1/theta
Y <- matrix(rexp(m, lambda), ncol = n)
Max<-apply(Y,1,max)
Min<-apply(Y,1,min)
theta1 <- apply(Y, 1, function(x) (x[1] + x[2])/6 + (x[3] + x[4])/3)
theta2 <- apply(Y, 1, function(x) (x[1] + (2*x[2]) + (3*x[3]) + (4*x[4]))/5)
theta3 <- apply(Y, 1, function(x) (x[1] + x[2] + x[3] + x[4])/4)
theta4 <- (Min+Max)/2
T1234=data.frame(theta1, theta2, theta3, theta4)
boxplot(T1234, las=1, main="Comparación estimadores con n=100", ylim=c(0,10))
abline(h=2, col="red")# Calculate the mean and standard deviation for each estimator
mean_values <- round(apply(T1234, 2, mean),4)
sd_values <- round(apply(T1234, 2, sd),4)
# Combine the mean and standard deviation into a single data frame
summary_table <- rbind(mean_values, sd_values)
# Convert to a data frame and add row names
summary_table <- as.data.frame(summary_table)
rownames(summary_table) <- c("mean", "sd")
# Print the table using kable
knitr::kable(summary_table, caption = "Resumen de estadísticas para estimadores con n=100")| theta1 | theta2 | theta3 | theta4 | |
|---|---|---|---|---|
| mean | 1.9327 | 3.8797 | 1.9676 | 2.2834 |
| sd | 1.0984 | 2.2567 | 1.0877 | 1.3399 |
n<-4
m<-1000*n
theta<-2
lambda<-1/theta
Y <- matrix(rexp(m, lambda), ncol = n)
Max=apply(Y,1,max)
Min=apply(Y,1,min)
theta1 <- apply(Y, 1, function(x) (x[1] + x[2])/6 + (x[3] + x[4])/3)
theta2 <- apply(Y, 1, function(x) (x[1] + (2*x[2]) + (3*x[3]) + (4*x[4]))/5)
theta3 <- apply(Y, 1, function(x) (x[1] + x[2] + x[3] + x[4])/4)
theta4 <- (Min+Max)/2
T1234=data.frame(theta1, theta2, theta3, theta4)
boxplot(T1234, las=1, main="Comparación estimadores con n=1000", ylim=c(0,10)) # gráfico de comparación
abline(h=2, col="red")# Calculate the mean and standard deviation for each estimator
mean_values <- round(apply(T1234, 2, mean),4)
sd_values <- round(apply(T1234, 2, sd),4)
# Combine the mean and standard deviation into a single data frame
summary_table <- rbind(mean_values, sd_values)
# Convert to a data frame and add row names
summary_table <- as.data.frame(summary_table)
rownames(summary_table) <- c("mean", "sd")
# Print the table using kable
knitr::kable(summary_table, caption = "Resumen de estadísticas para estimadores con n=1000")| theta1 | theta2 | theta3 | theta4 | |
|---|---|---|---|---|
| mean | 1.9913 | 3.9843 | 1.9836 | 2.3203 |
| sd | 1.0379 | 2.1326 | 0.9799 | 1.2248 |
library(ggplot2)
library(reshape2)
set.seed(1204395)
n <- 4
theta <- 2
lambda <- 1/theta
m_values <- c(20, 50, 100, 1000) * n
my_colors <- c("#3c687a","#2a9d8f","#e9c46a", "#f4a261", "#e76f51")
generate_data <- function(m) {
Y <- matrix(rexp(m, lambda), ncol = n)
Max <- apply(Y, 1, max)
Min <- apply(Y, 1, min)
theta1 <- apply(Y, 1, function(x) (x[1] + x[2])/6 + (x[3] + x[4])/3)
theta2 <- apply(Y, 1, function(x) (x[1] + (2*x[2]) + (3*x[3]) + (4*x[4]))/5)
theta3 <- apply(Y, 1, function(x) (x[1] + x[2] + x[3] + x[4])/4)
theta4 <- (Min+Max)/2
T1234 <- data.frame(theta1, theta2, theta3, theta4)
mean_values <- round(apply(T1234, 2, mean), 4)
sd_values <- round(apply(T1234, 2, sd), 4)
mse_values <- round((mean_values - theta)^2 + sd_values^2, 4)
summary_table <- rbind(mean_values, sd_values, mse_values)
summary_table <- as.data.frame(summary_table)
rownames(summary_table) <- c("mean", "sd", "mse")
return(list(T1234 = T1234, summary_table = summary_table))
}
results <- lapply(m_values, generate_data)
combined_summary_table <- do.call(rbind, lapply(results, function(x) x$summary_table))
best_estimators <- sapply(results, function(x) names(which.min(x$summary_table["mse", ])))
long_data <- do.call(rbind, lapply(seq_along(results), function(i) {
data <- results[[i]]$T1234
data$m <- m_values[i]
melt(data, id.vars = "m")
}))
best_data <- data.frame(m = m_values, variable = best_estimators, value = sapply(seq_along(best_estimators), function(i) mean(results[[i]]$T1234[[best_estimators[i]]])))
best_data$n = best_data$m/4
ggplot(long_data, aes(x=factor(m), y=value, fill=variable)) +
geom_boxplot() +
labs(x="n", y="Valor", fill="Estimador") +
theme_minimal() +
geom_hline(yintercept=2, color="red") +
scale_fill_manual(values = my_colors) +
theme(legend.position="top") +
scale_x_discrete(labels=c("20", "50", "100", "1000")) +
ggtitle(expression(paste("Estimadores para ", theta, "=2"))) +
geom_point(data = best_data, aes(x=factor(m), y=value), shape=25, size=3, color="black")Al comparar todos los estimadores se encuentra que los mejores resultados se obtienen con \(\theta_{1}\) y \(\theta_{3}\). Estos dos estimadores se pueden clasificar como insesgados, consistentes y eficientes. En el caso de \(\theta_{2}\) ningún estimador se acerca al valor real del parámetro y para \(\theta_{4}\) el primer valor para \(n=20\) supera el resto por lo que es inconsistente. A continuación se muestran los mejores estimadores por valor de \(n\). En general, el mejor estimador es \(\theta_{3}\) con \(n=50\).
table_best_data <- best_data[, c("n", "variable", "value")]
colnames(table_best_data) <- c("n", "Estimador", "MSE")
knitr::kable(table_best_data, caption = "MSE para mejores estimadores por n")| n | Estimador | MSE |
|---|---|---|
| 20 | theta1 | 2.039398 |
| 50 | theta3 | 1.924404 |
| 100 | theta3 | 1.983714 |
| 1000 | theta3 | 1.979917 |
El Teorema del Límite Central es uno de los más importantes en la inferencia estadística y habla sobre la convergencia de los estimadores como la proporción muestral a la distribución normal. Algunos autores afirman que esta aproximación es bastante buena a partir del umbral \(n>30\).
50% plantas enfermas:
Para todos los tamaños de muestra se realizaron 500 simulaciones.
Con los histogramas podemos confirmar que se logra una distribución
normal en todos los ejercicios. Esto lo validamos también con el test
Shapiro-Wilks, aunque modificamos los tamaños de muestra, siempre se
tuvo variabilidad en la simetría como se puede observar en el boxplot.
Las pruebas arrojan un sesgo bajo, el parámetro es cercano a la media
(en menor medida para muestras pequeñas). La varianza disminuye a medida
que las muestras son mas grandes, comprobando así el Teorema del Limite
Central. Finalmente se identifica en el diagrama qqnormal
que a mayor tamaño de la muestra, las simulaciones se acercan más a la
linea nominal.
Sabiendo que el test de Shapiro-Wilks nos permite rechazar la
hipótesis nula para un p-value menor o igual a \(0.05\) y al realizar el test para cada
\(n\), obtenemos que para \(n > 100\) la hipótesis nula no puede
rechazarse por lo que las distribuciones para \(n < 100\) no son normales.
10% plantas enfermas:
Al cambiar los tamaños de plantas enfermas en 10%, se puede
observar como para muestras pequeñas la varianza se incrementa
notablemente. Para muestras grandes no se identifica el impacto de forma
tan notable, situación evidenciable en las gráficas, principalmente
qqplot.
Del mismo modo que para el lote anterior, se revisan los valores
p-value obtenidos desde el test de Shapiro-Wilks. En este
caso no se puede rechazar la hipótesis nula para ninguna
distribución.
90% plantas enfermas:
Al cambiar los tamaños de plantas enfermas en 90%, se puede
observar como para muestras pequeñas la varianza se incrementa
notablemente. Para muestras grandes no se identifica el impacto de forma
tan notable, situación evidenciable en las gráficas, principalmente
qqplot.
Del mismo modo que para el lote anterior, se revisan los valores
p-value obtenidos desde el test de Shapiro-Wilks. En este
caso no se puede rechazar la hipótesis nula para ninguna
distribución.
suppressMessages(suppressWarnings(library(e1071)))
suppressMessages(suppressWarnings(library(pacman)))
suppressMessages(suppressWarnings(library(moments)))
suppressMessages(suppressWarnings(library(knitr)))
###############################################################################
#
# Functions
#
###############################################################################
# -----------------------------------------------------------------------------
# Sample from population with correct proportions
sample_population <- function(population, size, condition) {
test_sample <- sample(population, size)
p_sample <- sum(test_sample == condition)/size
return(p_sample)
}
# -----------------------------------------------------------------------------
# Take n simluations for ther required sample size
nsim_samples <- function(population, size, condition, nsim){
nsim_sample <- numeric(nsim)
for (i in 1:nsim) {
nsim_sample[i] = sample_population(population, size, condition)
}
return(nsim_sample)
}
# -----------------------------------------------------------------------------
# Run analysis for a predefined population and sample size
run_analysis <- function(population, sample_size) {
set.seed(123)
condition <- 1
nsim <- 1000
p_sample <- nsim_samples(population, sample_size, condition, nsim)
lines = mean(p_sample)
par(mfrow = c(1, 3))
hist(p_sample, main="")
abline(v = lines, col = "blue", lwd = 3)
boxplot(p_sample, las = 1)
abline(h = lines, col = "red")
qqnorm(p_sample, col= "black", lwd=2, lty=2, main="")
qqline(p_sample, col="red")
mtext(paste("Sample size ", sample_size), outer = TRUE, line = -1.5, cex = 1.2)
mtext(paste("Proportions ", "(",proportions[1], "-", proportions[2],")", ", n simulations", nsim), outer = TRUE, line = -2.8, cex = 0.8)
mean_values <- round(mean(p_sample),4)
median_values <- round(median(p_sample),4)
sd_values <- round(sd(p_sample),4)
var_values <- round(var(p_sample),4)
asim_values <- round(skewness(p_sample),4)
test_result <- shapiro.test(p_sample)
w_values = round(test_result$statistic,4)
p_values = round(test_result$p.value,4)
summary_table <- rbind(mean_values, median_values, sd_values, var_values, asim_values, w_values, p_values)
# Convert to a data frame and add row names
summary_table <- as.data.frame(summary_table)
rownames(summary_table) <- c("Mean", "Median", "sd", "Variance", "Asymmetry", "Shapiro w", "Shapiro p-value")
colnames(summary_table) <- c("Statistics")
# Print the table using kable
print(knitr::kable(summary_table, caption = paste("Estadísticas para tamaño de muestra", sample_size)))
}###############################################################################
#
# Population 1
#
###############################################################################
set.seed(123)
states <- c(0, 1)
proportions <- c(0.50, 0.50)
# Generate population
population1 <- sample(states, size = 1000, replace = TRUE, prob = proportions)
sample_size <- 500
condition <- 1
nsim <- 500
escenario500 = nsim_samples(population1, 500, 1, 500)
estimadormuestra <- sample_population(population1, 500, 1)
par(mfrow = c(1, 2))
hist(escenario500, main="")
line = mean(escenario500)
abline(v = line, col = "blue", lwd = 3)
media = mean(escenario500)
boxplot(escenario500, las = 1)
abline(h = media, col = "red")
mtext(paste("Sample size ", sample_size), outer = TRUE, line = -1.5, cex = 1.2)
mtext(paste("Proportions ", "(",proportions[1], "-", proportions[2],")", ", n simulations", nsim), outer = TRUE, line = -2.8, cex = 0.8)Comentario
skewness es 0.0416. De
acuerdo a lo anterior, podemos observar que el histograma no es
completamente simétrico, la asimetría lo confirma ya que es diferente de
0, la media para los 500 estimadores es muy cercana a los parámetros, lo
podemos confirmar en el boxplot. Podemos decir que los resultados con
insesgados con baja variabilidad.###############################################################################
#
# Population 1
#
###############################################################################
set.seed(123)
states <- c(0, 1)
proportions <- c(0.50, 0.50)
# Generate population
population1 <- sample(states, size = 1000, replace = TRUE, prob = proportions)
run_analysis(population1, 5)| Statistics | |
|---|---|
| Mean | 0.5028 |
| Median | 0.6000 |
| sd | 0.2345 |
| Variance | 0.0550 |
| Asymmetry | -0.1119 |
| Shapiro w | 0.9311 |
| Shapiro p-value | 0.0000 |
| Statistics | |
|---|---|
| Mean | 0.5057 |
| Median | 0.5000 |
| sd | 0.1601 |
| Variance | 0.0256 |
| Asymmetry | 0.0081 |
| Shapiro w | 0.9668 |
| Shapiro p-value | 0.0000 |
| Statistics | |
|---|---|
| Mean | 0.5091 |
| Median | 0.5333 |
| sd | 0.1311 |
| Variance | 0.0172 |
| Asymmetry | -0.0939 |
| Shapiro w | 0.9759 |
| Shapiro p-value | 0.0000 |
| Statistics | |
|---|---|
| Mean | 0.5052 |
| Median | 0.5000 |
| sd | 0.1130 |
| Variance | 0.0128 |
| Asymmetry | -0.0383 |
| Shapiro w | 0.9817 |
| Shapiro p-value | 0.0000 |
| Statistics | |
|---|---|
| Mean | 0.5063 |
| Median | 0.5000 |
| sd | 0.0887 |
| Variance | 0.0079 |
| Asymmetry | -0.0360 |
| Shapiro w | 0.9872 |
| Shapiro p-value | 0.0000 |
| Statistics | |
|---|---|
| Mean | 0.5048 |
| Median | 0.5000 |
| sd | 0.0695 |
| Variance | 0.0048 |
| Asymmetry | -0.0119 |
| Shapiro w | 0.9915 |
| Shapiro p-value | 0.0000 |
| Statistics | |
|---|---|
| Mean | 0.5046 |
| Median | 0.5000 |
| sd | 0.0631 |
| Variance | 0.0040 |
| Asymmetry | 0.0456 |
| Shapiro w | 0.9919 |
| Shapiro p-value | 0.0000 |
| Statistics | |
|---|---|
| Mean | 0.5044 |
| Median | 0.5000 |
| sd | 0.0487 |
| Variance | 0.0024 |
| Asymmetry | 0.0288 |
| Shapiro w | 0.9939 |
| Shapiro p-value | 0.0004 |
| Statistics | |
|---|---|
| Mean | 0.5054 |
| Median | 0.5050 |
| sd | 0.0319 |
| Variance | 0.0010 |
| Asymmetry | 0.0238 |
| Shapiro w | 0.9970 |
| Shapiro p-value | 0.0594 |
###############################################################################
#
# Population 2
#
###############################################################################
set.seed(123)
states <- c(0, 1)
proportions <- c(0.10, 0.90)
# Generate population
population2 <- sample(states, size = 1000, replace = TRUE, prob = proportions)
sample_size <- 500
condition <- 1
nsim <- 500
escenario500 = nsim_samples(population2, 500, 1, 500)
par(mfrow = c(1, 2))
hist(escenario500, main="")
line = mean(escenario500)
abline(v = line, col = "blue", lwd = 3)
media = mean(escenario500)
boxplot(escenario500, las = 1)
abline(h = media, col = "red")
mtext(paste("Sample size ", sample_size), outer = TRUE, line = -1.5, cex = 1.2)
mtext(paste("Proportions ", "(",proportions[1], "-", proportions[2],")", ", n simulations", nsim), outer = TRUE, line = -2.8, cex = 0.8)Comentario
skewness es 0.0421.###############################################################################
#
# Population 2
#
###############################################################################
set.seed(123)
states <- c(0, 1)
proportions <- c(0.10, 0.90)
# Generate population
population2 <- sample(states, size = 1000, replace = TRUE, prob = proportions)
run_analysis(population2, 5)| Statistics | |
|---|---|
| Mean | 0.9100 |
| Median | 1.0000 |
| sd | 0.1302 |
| Variance | 0.0170 |
| Asymmetry | -1.2741 |
| Shapiro w | 0.6807 |
| Shapiro p-value | 0.0000 |
| Statistics | |
|---|---|
| Mean | 0.9101 |
| Median | 0.9000 |
| sd | 0.0883 |
| Variance | 0.0078 |
| Asymmetry | -0.8177 |
| Shapiro w | 0.8268 |
| Shapiro p-value | 0.0000 |
| Statistics | |
|---|---|
| Mean | 0.9103 |
| Median | 0.9333 |
| sd | 0.0726 |
| Variance | 0.0053 |
| Asymmetry | -0.7483 |
| Shapiro w | 0.8824 |
| Shapiro p-value | 0.0000 |
| Statistics | |
|---|---|
| Mean | 0.9071 |
| Median | 0.9000 |
| sd | 0.0629 |
| Variance | 0.0040 |
| Asymmetry | -0.5975 |
| Shapiro w | 0.9192 |
| Shapiro p-value | 0.0000 |
| Statistics | |
|---|---|
| Mean | 0.9077 |
| Median | 0.9000 |
| sd | 0.0502 |
| Variance | 0.0025 |
| Asymmetry | -0.3821 |
| Shapiro w | 0.9507 |
| Shapiro p-value | 0.0000 |
| Statistics | |
|---|---|
| Mean | 0.9068 |
| Median | 0.9000 |
| sd | 0.0389 |
| Variance | 0.0015 |
| Asymmetry | -0.2666 |
| Shapiro w | 0.9716 |
| Shapiro p-value | 0.0000 |
| Statistics | |
|---|---|
| Mean | 0.9069 |
| Median | 0.9167 |
| sd | 0.0364 |
| Variance | 0.0013 |
| Asymmetry | -0.2860 |
| Shapiro w | 0.9740 |
| Shapiro p-value | 0.0000 |
| Statistics | |
|---|---|
| Mean | 0.9065 |
| Median | 0.9100 |
| sd | 0.0285 |
| Variance | 0.0008 |
| Asymmetry | -0.1688 |
| Shapiro w | 0.9867 |
| Shapiro p-value | 0.0000 |
| Statistics | |
|---|---|
| Mean | 0.9075 |
| Median | 0.9100 |
| sd | 0.0190 |
| Variance | 0.0004 |
| Asymmetry | -0.2546 |
| Shapiro w | 0.9896 |
| Shapiro p-value | 0.0000 |
###############################################################################
#
# Population 3
#
###############################################################################
set.seed(123)
states <- c(0, 1)
proportions <- c(0.90, 0.10)
# Generate population
population3 <- sample(states, size = 1000, replace = TRUE, prob = proportions)
sample_size <- 500
condition <- 1
nsim <- 500
escenario500 = nsim_samples(population3, 500, 1, 500)
par(mfrow = c(1, 2))
hist(escenario500, main="")
line = mean(escenario500)
abline(v = line, col = "blue", lwd = 3)
media = mean(escenario500)
boxplot(escenario500, las = 1)
abline(h = media, col = "red")
mtext(paste("Sample size ", sample_size), outer = TRUE, line = -1.5, cex = 1.2)
mtext(paste("Proportions ", "(",proportions[1], "-", proportions[2],")", ", n simulations", nsim), outer = TRUE, line = -2.8, cex = 0.8)Comentario
skewness es
-0.0421.###############################################################################
#
# Population 3
#
###############################################################################
set.seed(123)
states <- c(0, 1)
proportions <- c(0.90, 0.10)
# Generate population
population3 <- sample(states, size = 1000, replace = TRUE, prob = proportions)
run_analysis(population3, 5)| Statistics | |
|---|---|
| Mean | 0.0900 |
| Median | 0.0000 |
| sd | 0.1302 |
| Variance | 0.0170 |
| Asymmetry | 1.2741 |
| Shapiro w | 0.6807 |
| Shapiro p-value | 0.0000 |
| Statistics | |
|---|---|
| Mean | 0.0899 |
| Median | 0.1000 |
| sd | 0.0883 |
| Variance | 0.0078 |
| Asymmetry | 0.8177 |
| Shapiro w | 0.8268 |
| Shapiro p-value | 0.0000 |
| Statistics | |
|---|---|
| Mean | 0.0897 |
| Median | 0.0667 |
| sd | 0.0726 |
| Variance | 0.0053 |
| Asymmetry | 0.7483 |
| Shapiro w | 0.8824 |
| Shapiro p-value | 0.0000 |
| Statistics | |
|---|---|
| Mean | 0.0929 |
| Median | 0.1000 |
| sd | 0.0629 |
| Variance | 0.0040 |
| Asymmetry | 0.5975 |
| Shapiro w | 0.9192 |
| Shapiro p-value | 0.0000 |
| Statistics | |
|---|---|
| Mean | 0.0923 |
| Median | 0.1000 |
| sd | 0.0502 |
| Variance | 0.0025 |
| Asymmetry | 0.3821 |
| Shapiro w | 0.9507 |
| Shapiro p-value | 0.0000 |
| Statistics | |
|---|---|
| Mean | 0.0932 |
| Median | 0.1000 |
| sd | 0.0389 |
| Variance | 0.0015 |
| Asymmetry | 0.2666 |
| Shapiro w | 0.9716 |
| Shapiro p-value | 0.0000 |
| Statistics | |
|---|---|
| Mean | 0.0931 |
| Median | 0.0833 |
| sd | 0.0364 |
| Variance | 0.0013 |
| Asymmetry | 0.2860 |
| Shapiro w | 0.9740 |
| Shapiro p-value | 0.0000 |
| Statistics | |
|---|---|
| Mean | 0.0935 |
| Median | 0.0900 |
| sd | 0.0285 |
| Variance | 0.0008 |
| Asymmetry | 0.1688 |
| Shapiro w | 0.9867 |
| Shapiro p-value | 0.0000 |
| Statistics | |
|---|---|
| Mean | 0.0925 |
| Median | 0.0900 |
| sd | 0.0190 |
| Variance | 0.0004 |
| Asymmetry | 0.2546 |
| Shapiro w | 0.9896 |
| Shapiro p-value | 0.0000 |
datos_ejercicio = c(7.69, 4.97, 4.56, 6.49, 4.34, 6.24, 4.45)
generacion_muestras = sample(datos_ejercicio,1000,replace=TRUE)
matriz_muestras = matrix(generacion_muestras,nrow=1000,ncol=7, byrow =TRUE)
calculo_medias = apply(matriz_muestras,1,mean)
ic1=quantile(calculo_medias, probs=c(0.025, 0.975))
ic1=round(ic1,4)Intervalo de confianza para el método 1 es 4.7486, 6.4373.
ic2=c(2*mean(calculo_medias)-ic1[2], 2*mean(calculo_medias)-ic1[1])
ic2=round(ic2,4)
hist(calculo_medias, las=1, main="Histograma", ylab = "Frecuencias", xlab = "Medias", col="#034A94")
abline(v=ic1, col="#FF7F00",lwd=2)
abline(v=ic2, col="#0EB0C6",lwd=2)
legend("topright", legend=c("IC1", "IC2"), col=c("#FF7F00", "#0EB0C6"), lty=1, lwd=2, bty="n")Intervalo de confianza para el método 2 es 4.6113, 6.3.
Comentario
Para el ejercicio se usaron muestras significativas y podemos observa en el histograma que presenta tendencia a una distribución normal, con una leve asimetría a la derecha. Si es posible confiar en la estimaciones de ambos métodos, contienen los valores de las medias.