Exercise 8.4 – an optional one

set.seed(12345)

Generate 4 sets of 100 points from the N(0, 1) distribution

random_normal_1 <- rnorm ( n = 100, mean = 0, sd = 1 )
random_normal_2 <- rnorm ( n = 100, mean = 0, sd = 1 )
random_normal_3 <- rnorm ( n = 100, mean = 0, sd = 1 )
random_normal_4 <- rnorm ( n = 100, mean = 0, sd = 1 )

compare each of the sets using histogram

par (mfrow = c (2,2))

hist (random_normal_1, prob = TRUE, xlim = c (-3, 3),
      main = "1st set of data \n normal 1", col = "darkseagreen1", xlab = "")
curve (dnorm (x, mean = mean(random_normal_1), sd = sd(random_normal_1)), 
       lwd = 2, col = "blue", lty = 2, add = TRUE)

hist (random_normal_2, prob = TRUE, xlim = c (-3, 3),
      main = "2nd set of data \n normal 2", col = "lightsalmon", xlab = "")
curve (dnorm (x, mean = mean(random_normal_2), sd = sd(random_normal_2)), 
       lwd = 2, col = "blue", lty = 2, add = TRUE)

hist (random_normal_3, prob = TRUE, xlim = c (-3, 3),
      main = "3rd set of data \n normal 3", col = "moccasin", xlab = "")
curve (dnorm (x, mean = mean(random_normal_3), sd = sd(random_normal_3)), 
       lwd = 2, col = "blue", lty = 2, add = TRUE)

hist (random_normal_4, prob = TRUE, xlim = c (-3, 3),
      main = "4th set of data \n normal 4", col = "paleturquoise", xlab = "")
curve (dnorm (x, mean = mean(random_normal_4), sd = sd(random_normal_4)), 
       lwd = 2, col = "blue", lty = 2, add = TRUE)

Generate 4 sets of 100 points from the exp(2) distribution

random_exp_1 <- rexp ( n = 100, rate = 2)
random_exp_2 <- rexp ( n = 100, rate = 2)
random_exp_3 <- rexp ( n = 100, rate = 2)
random_exp_4 <- rexp ( n = 100, rate = 2)

Compare each of the sets using a histogram to

the shape of the corresponding Exponential distribution

par (mfrow = c (2,2))

hist (random_exp_1, prob = TRUE,
      main = "1st set of data \n exp 1", col = "royalblue", xlab = "")
curve(dexp(x, rate = 2), add = TRUE, 
      col = "red", lty = 2, lwd = 2)

hist (random_exp_2, prob = TRUE, xlim = c (0, 5),
      main = "2nd set of data \n exp 2", col = "seagreen1", xlab = "")
curve(dexp(x, rate = 2), add = TRUE, 
      col = "red", lty = 2, lwd = 2)

hist (random_exp_3, prob = TRUE, xlim = c (0, 5),
      main = "3rd set of data \n exp 3", col = "plum", xlab = "")
curve(dexp(x, rate = 2), add = TRUE, 
      col = "red", lty = 2, lwd = 2)

hist (random_exp_4, prob = TRUE, xlim = c (0, 5),
      main = "4th set of data \n exp 4", col = "wheat", xlab = "")
curve(dexp(x, rate = 2), add = TRUE, 
      col = "red", lty = 2, lwd = 2)

qq plots comparing the exponential data to a normal distribution

par (mfrow = c (2,2))

qqnorm(random_exp_1, 
       main = "qq plot for both 1st sets of data \n exp to norm", 
       col = "blue", pch = 20)
qqline(random_exp_1, distribution = qnorm, col = "red")

qqnorm(random_exp_2, 
       main = "qq plot for both 2nd sets of data \n exp to norm", 
       col = "blue", pch = 20)
qqline(random_exp_2, col = "red")

qqnorm(random_exp_3, 
       main = "qq plot for both 3rd sets of data \n exp to norm", 
       col = "blue", pch = 20)
qqline(random_exp_3, col = "red")

qqnorm(random_exp_4, 
       main = "qq plot for both 4th sets of data \n exp to norm", 
       col = "blue", pch = 20)
qqline(random_exp_4, col = "red")

Does there appear to be a good fit?

What values for the parameters of the Normal distribution will be used by R in the comparison?

Why do the x axes have negative numbers but the y axes not?

From the diagram, most of them are in good fit. The parameters of the normal distribution used by R will be mean = 0, sd = 1 The x-axis represents the theoretical quantiles of a standard normal distribution, which include negative and positive numbers. The y-axis represents the observed quantiles of the exponential data, so they are always positive.

qqplot of exp to exp

par (mfrow = c (2,2))

qqplot (qexp (ppoints(100), rate = 2), random_exp_1, 
       main = "qq plot for both 1st set of data \n exp to exp", 
       col = "blue", pch = 20)
abline(a = 0, b = 1, col = "red", lty = 2)

qqplot (qexp(ppoints(100), rate = 2), random_exp_2, 
       main = "qq plot for both 2nd set of data \n exp to exp", 
       col = "blue", pch = 20)
abline(a = 0, b = 1, col = "red", lty = 2)

qqplot (qexp(ppoints(100), rate = 2), random_exp_3, 
       main = "qq plot for both 3rd set of data \n exp to exp", 
       col = "blue", pch = 20)
abline(a = 0, b = 1, col = "red", lty = 2)

qqplot (qexp(ppoints(100), rate = 2), random_exp_4, 
       main = "qq plot for both 4th set of data \n exp to exp", 
       col = "blue", pch = 20)
abline(a = 0, b = 1, col = "red", lty = 2)

They appear to be good fit.