rm(list = ls()) # Clear environment
gc() # Clear unused memory
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 533803 28.6 1190437 63.6 NA 669428 35.8
## Vcells 986611 7.6 8388608 64.0 16384 1851749 14.2
cat("\f") # Clear the console
# dev.off() # Clear par(mfrow=c(3,2)) type commands
library("psych")
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 150 39.4 4.91 39 39.51 4.45 27 53 26 -0.08 0.01 0.4
The rbinom function was used to generate a random sample from a given binomial distribution. The distribution is governed by the parameters of sample size (n=150) and probability (p=.4). The size corresponds to the number of trials (100).
The mean of the binomial distribution is 39.4, with a standard deviation of 4.91 (measure of variation around the mean).
mu <- mean(random.binomial)
sigma <- sd(random.binomial)
hist(x = random.binomial,
main = "Binomial Distribution (n = 150, p = .4)",
xlab = ""
)
#creating emty matrix to store means generated from a random sample
z <- matrix(data = rep(x = 0, times = 10000),
nrow = 10000,
ncol = 1)
describe(z)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 10000 0 0 0 0 0 0 0 0 NaN NaN 0
for (i in 1:10000){
z[i,] <- mean(sample( x = random.binomial, #random.binomial vector used to generate distribution
size = 1000, #taking a random sample of 1000 observations
replace = TRUE #binomial sampling must be with replacement
))}
describe(z)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 10000 39.4 0.16 39.4 39.4 0.16 38.81 39.94 1.13 -0.03 -0.03 0
hist(z, xlab = "", main = "Histogram of Sample Mean (n = 1000)")
#expanding matrix
z.1 <- matrix(data = rep(x = 0, times = 40000),
nrow = 10000,
ncol = 4)
n <- c(50, 100, 1000, 5000) #sample size
for (j in 1:4){ # indexing columns of matrix, each corresponds to a different sample size
for (i in 1:10000){ # indexing rows of matrix
z.1[i,j] <- mean(sample( x = random.binomial, #computing mean/assigning to row
size = n[j],
replace = TRUE
))}}
colnames(z.1) <- c("n=50","n=100","n=1000","n=5000")
summary(z.1)
## n=50 n=100 n=1000 n=5000
## Min. :36.52 Min. :37.52 Min. :38.85 Min. :39.18
## 1st Qu.:38.92 1st Qu.:39.07 1st Qu.:39.30 1st Qu.:39.35
## Median :39.40 Median :39.40 Median :39.40 Median :39.40
## Mean :39.40 Mean :39.40 Mean :39.40 Mean :39.40
## 3rd Qu.:39.88 3rd Qu.:39.73 3rd Qu.:39.51 3rd Qu.:39.45
## Max. :42.38 Max. :41.32 Max. :39.97 Max. :39.66
par(mfrow=c(3,2))
length(random.binomial)
## [1] 150
hist(x = random.binomial,
main = "Histogram of Uniform Distribution, N=10,000",
xlab = ""
)
for (k in 1:4){
hist(x = z.1[,k], # matrix column 1-4 which contain sample means of randomly chosen samples from uniform dist
main = "Histogram of Sample Mean of Uniform Distribution",
xlim = c(36, 42), # plot the domain of X axis from x=36 to x=42 only
xlab = paste0("Sample Size ", n[k], " (Column ", k, " from matrix)")
)}
#creating emty matrix to store means generated from a random sample
z.2 <- matrix(data = rep(x = 0, times = 10000),
nrow = 10000,
ncol = 1)
describe(z.2)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 10000 0 0 0 0 0 0 0 0 NaN NaN 0
for (i in 1:10000) {
z.2[i,] <- median(sample(x = random.binomial, size = 1000, replace = TRUE))
}
describe(z.2)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 10000 39.21 0.4 39 39.14 0 39 40 1 1.44 0.11 0
hist(z.2, xlab = "", main = "Histogram of Sample Median (n = 1000)")
z.3 <- matrix(data = rep(x = 0, times = 40000),
nrow = 10000,
ncol = 4)
n <- c(50, 100, 1000, 5000) #sample size
for (j in 1:4){ # indexing columns of matrix, each corresponds to a different sample size
for (i in 1:10000){ # indexing rows of matrix
z.3[i,j] <- median(sample( x = random.binomial, #computing mean/assigning to row
size = n[j],
replace = TRUE
))}}
colnames(z.3) <- c("n=50","n=100","n=1000","n=5000")
summary(z.3)
## n=50 n=100 n=1000 n=5000
## Min. :36.5 Min. :37.00 Min. :39.00 Min. :39.00
## 1st Qu.:39.0 1st Qu.:39.00 1st Qu.:39.00 1st Qu.:39.00
## Median :39.0 Median :39.00 Median :39.00 Median :39.00
## Mean :39.4 Mean :39.37 Mean :39.21 Mean :39.03
## 3rd Qu.:40.0 3rd Qu.:40.00 3rd Qu.:39.00 3rd Qu.:39.00
## Max. :43.0 Max. :42.00 Max. :40.00 Max. :40.00
par(mfrow=c(3,2))
length(random.binomial)
## [1] 150
hist(x = random.binomial,
main = "Histogram of Binomial Distribution, N=10,000",
xlab = ""
)
for (k in 1:4){
hist(x = z.3[,k], # matrix column 1-4 which contain sample means of randomly chosen samples from uniform dist
main = "Histogram of Sample Median of Binomial Distribution",
xlim = c(36, 42), # plot the domain of X axis from x=38 to x=42 only
xlab = paste0("Sample Size ", n[k], " (Column ", k, " from matrix)")
)}