It is the theory that when you take the mean of all the samples of a large enough population, with a finite level of variance, it will roughly equal the mean of the population.
rm(list = ls())
gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 515625 27.6 1138545 60.9 NA 669337 35.8
## Vcells 946738 7.3 8388608 64.0 16384 1839792 14.1
cat("\f")
##### Step 1
?rbinom # simulates a series of Bernoulli trials and return the results
mybinom <- rbinom(1000, # observations
1000, # trials/observation
0.05 # prob of success
)
mybinom[1:16]
## [1] 58 66 33 47 48 57 54 59 62 50 57 41 56 50 57 53
mu <- mean(mybinom)
mu
## [1] 50.101
sigma <- sd(mybinom)
sigma
## [1] 6.878817
library("psych")
describe(mybinom)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 1000 50.1 6.88 50 49.97 7.41 28 71 43 0.17 -0.01 0.22
hist(x = mybinom,
main = "Histogram of Binomial Distribution (n=1000)",
xlab = ""
)
?matrix # creates a matrix from the given set of values.
?rep # replicates the values in x lenth times
z <- matrix(data = rep(x = 0,
times = 1000
),
nrow = 1000,
ncol = 1)
z[1:16]
## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
describe(z)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 1000 0 0 0 0 0 0 0 0 NaN NaN 0
?sample # makes a sample of the specified size from the elements of x using either with or without replacement.
# sample(x, size, replace = FALSE, prob = NULL)
for (i in 1:1000){
z[i,] <- mean(sample( x = mybinom,
size = 100,
replace = TRUE
)
)
}
z[1:16]
## [1] 50.43 48.47 50.34 49.99 50.82 50.50 51.23 50.12 49.16 49.45 50.96 49.43
## [13] 49.13 50.65 49.51 50.81
describe(z)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 1000 50.1 0.69 50.12 50.1 0.73 47.92 52.43 4.51 -0.04 -0.09
## se
## X1 0.02
hist(z)
z <- matrix(data = rep(x = 0,
times = 4000
),
nrow = 1000,
ncol = 4)
n <- c(2, 6, 30, 100)
for (j in 1:4){
for (i in 1:1000){ # indexes the rows of matrix
z[i,j] <- mean(sample( x = mybinom, # compute mean and assign
size = n[j],
replace = TRUE
)
)
}
}
colnames(z) <- c("Sample size=2", "Sample size=6", "Sample size=30", "Sample size=100")
summary(z)
## Sample size=2 Sample size=6 Sample size=30 Sample size=100
## Min. :37.50 Min. :40.17 Min. :46.83 Min. :47.91
## 1st Qu.:47.00 1st Qu.:48.33 1st Qu.:49.27 1st Qu.:49.62
## Median :50.00 Median :50.17 Median :50.10 Median :50.09
## Mean :50.19 Mean :50.23 Mean :50.10 Mean :50.08
## 3rd Qu.:53.00 3rd Qu.:52.33 3rd Qu.:50.97 3rd Qu.:50.56
## Max. :66.00 Max. :59.00 Max. :54.93 Max. :52.10
par(mfrow=c(3,2))
length(mybinom)
## [1] 1000
hist(x = mybinom,
main = "Histogram of a Binomial Distribution, N=1000",
xlab = ""
)
?hist
for (k in 1:4){
hist(x = z[,k],
main = "Histogram of Binomial Distribution",
xlim = c(1, 100),
xlab = paste0("Sample Size ", n[k], " (Column ", k, " from matrix)")
)
}
Not attempted