x_values = seq ( -3, 3, length.out = 100000)
plot ( x_values, dnorm (x_values, mean = 0, sd = 1),
type = "l")
abline ( v = c (-3, 3), col = "red",
lty = 2, lwd = 2)
total_area <- pnorm (Inf, mean = 0, sd = 1)
area_below_3 <- pnorm (3, mean = 0, sd = 1)
area_between <- ((2 * area_below_3 - 1) / total_area ) *100
print ( paste (area_between,"%"))
## [1] "99.730020393674 %"
\[Area= \frac{(2 \times P(X<3) -1)}{P(total)} = 99.730020393674 \% \]
\[ P (exactly~one~of~each~value) = (\frac{1}{4})^4 \times 4! = \frac{3}{32}\]
\[ P (3~different~values~with~one~repeated) = \frac{4!}{2!} \times 1 \times (\frac{1}{4})^3 \times \binom{3}{1} = \frac{9}{16}\]
set.seed (001)
values <- c ("Buzz", "Michael", "Neil")
probabilities <- c (0.25, 0.01, 0.74)
sample_data_size_200 <- sample (values, size = 200,
replace = TRUE, prob = probabilities)
table_of_200_data <- table (sample_data_size_200)
table_of_200_data
## sample_data_size_200
## Buzz Michael Neil
## 48 3 149
for comparison
data.frame (values = c ("Buzz", "Michael", "Neil"),
probabilities = c (0.25, 0.01, 0.74),
sample_probabilities = c (table_of_200_data / 200),
sample_frequencies = c (table_of_200_data))
## values probabilities sample_probabilities sample_frequencies
## Buzz Buzz 0.25 0.240 48
## Michael Michael 0.01 0.015 3
## Neil Neil 0.74 0.745 149
set.seed(12345)
random_normal_1 <- rnorm ( n = 100, mean = 0, sd = 1 )
random_normal_2 <- rnorm ( n = 100, mean = 0, sd = 1 )
random_normal_3 <- rnorm ( n = 100, mean = 0, sd = 1 )
random_normal_4 <- rnorm ( n = 100, mean = 0, sd = 1 )
par (mfrow = c (2,2))
hist (random_normal_1, prob = TRUE, xlim = c (-3, 3),
main = "1st set of data \n normal 1", col = "darkseagreen1", xlab = "")
curve (dnorm (x, mean = mean(random_normal_1), sd = sd(random_normal_1)),
lwd = 2, col = "blue", lty = 2, add = TRUE)
hist (random_normal_2, prob = TRUE, xlim = c (-3, 3),
main = "2nd set of data \n normal 2", col = "lightsalmon", xlab = "")
curve (dnorm (x, mean = mean(random_normal_2), sd = sd(random_normal_2)),
lwd = 2, col = "blue", lty = 2, add = TRUE)
hist (random_normal_3, prob = TRUE, xlim = c (-3, 3),
main = "3rd set of data \n normal 3", col = "moccasin", xlab = "")
curve (dnorm (x, mean = mean(random_normal_3), sd = sd(random_normal_3)),
lwd = 2, col = "blue", lty = 2, add = TRUE)
hist (random_normal_4, prob = TRUE, xlim = c (-3, 3),
main = "4th set of data \n normal 4", col = "paleturquoise", xlab = "")
curve (dnorm (x, mean = mean(random_normal_4), sd = sd(random_normal_4)),
lwd = 2, col = "blue", lty = 2, add = TRUE)
random_exp_1 <- rexp ( n = 100, rate = 2)
random_exp_2 <- rexp ( n = 100, rate = 2)
random_exp_3 <- rexp ( n = 100, rate = 2)
random_exp_4 <- rexp ( n = 100, rate = 2)
par (mfrow = c (2,2))
hist (random_exp_1, prob = TRUE,
main = "1st set of data \n exp 1", col = "royalblue", xlab = "")
curve(dexp(x, rate = 2), add = TRUE,
col = "red", lty = 2, lwd = 2)
hist (random_exp_2, prob = TRUE, xlim = c (0, 5),
main = "2nd set of data \n exp 2", col = "seagreen1", xlab = "")
curve(dexp(x, rate = 2), add = TRUE,
col = "red", lty = 2, lwd = 2)
hist (random_exp_3, prob = TRUE, xlim = c (0, 5),
main = "3rd set of data \n exp 3", col = "plum", xlab = "")
curve(dexp(x, rate = 2), add = TRUE,
col = "red", lty = 2, lwd = 2)
hist (random_exp_4, prob = TRUE, xlim = c (0, 5),
main = "4th set of data \n exp 4", col = "wheat", xlab = "")
curve(dexp(x, rate = 2), add = TRUE,
col = "red", lty = 2, lwd = 2)
par (mfrow = c (2,2))
qqnorm(random_exp_1,
main = "qq plot for both 1st sets of data \n exp to norm",
col = "blue", pch = 20)
qqline(random_exp_1, distribution = qnorm, col = "red")
qqnorm(random_exp_2,
main = "qq plot for both 2nd sets of data \n exp to norm",
col = "blue", pch = 20)
qqline(random_exp_2, col = "red")
qqnorm(random_exp_3,
main = "qq plot for both 3rd sets of data \n exp to norm",
col = "blue", pch = 20)
qqline(random_exp_3, col = "red")
qqnorm(random_exp_4,
main = "qq plot for both 4th sets of data \n exp to norm",
col = "blue", pch = 20)
qqline(random_exp_4, col = "red")
From the diagram, most of them are bad fitted to the line. As an exponential distribution varies a lot from the normal distribution. The parameters of the normal distribution used by R will be mean = 0, sd = 1. The x-axis represents the theoretical quantiles of a standard normal distribution, which include negative and positive numbers. The y-axis represents the observed quantiles of the exponential data, so they are always positive.
par (mfrow = c (2,2))
qqplot (qexp (ppoints(100), rate = 2), random_exp_1,
main = "qq plot for both 1st set of data \n exp to exp",
col = "blue", pch = 20)
abline(a = 0, b = 1, col = "red", lty = 2)
qqplot (qexp(ppoints(100), rate = 2), random_exp_2,
main = "qq plot for both 2nd set of data \n exp to exp",
col = "blue", pch = 20)
abline(a = 0, b = 1, col = "red", lty = 2)
qqplot (qexp(ppoints(100), rate = 2), random_exp_3,
main = "qq plot for both 3rd set of data \n exp to exp",
col = "blue", pch = 20)
abline(a = 0, b = 1, col = "red", lty = 2)
qqplot (qexp(ppoints(100), rate = 2), random_exp_4,
main = "qq plot for both 4th set of data \n exp to exp",
col = "blue", pch = 20)
abline(a = 0, b = 1, col = "red", lty = 2)
They appear to be good fit.