Exercise 7.1

Consider a random variable X with X ~ Bin(20, 0.4).

X ~ Bin (20, 0.4)

n1 <- 20
p1 <- 0.4

aa <- pbinom (13, n1, p1)

miu1 <- n1*p1
sigma1 <- sqrt (n1*p1*(1-p1))

bb <- pnorm (13+0.5, miu1, sigma1)
## The probability using binomial distribution is 0.9935341.
## The probability using approximated normal distribution is 0.9939702.

X ~ Bin (50, 0.8)

n2 <- 50
p2 <- 0.8

aa <- pbinom (41 - 1, n2, p2, lower.tail = FALSE)

miu2 <- n2*p2
sigma2 <- sqrt (n2*p2*(1-p2))

bb <- pnorm (41 - 1 - 0.5, miu2, sigma2)
## The probability using binomial distribution is 0.4437404.
## The probability using approximated normal distribution is 0.4298419.

Visualize these two scenarios (plot)

___

  • The approximation is closer for the first case (B (20, 0.4)), than the other.

Exercise 7.2.

Reproduce the plots in the lecture notes

binnor <- function (n, p) {
  
plot (0:n, dbinom (0:n, n, p),
      col = "blue", type = "h",
      main = paste("X ~ Binomial (", n,   ","   ,p,   ")"),
      xlab = "", ylab = "probability")
points (0:n, dbinom (0:n, n, p),
        col = "blue", pch = 16)

miu <- n*p
sigma <- sqrt (n*p*(1-p))

x <- seq (0, n, length.out = 1000)
lines (x, dnorm (x, miu, sigma),
       col = "red", lwd = 2)

legend ("topright", legend = c ("Binomial", "Normal"),
        col = c ("blue", "red"),
        lty = rep (1, times = 2), cex = 1.2)
}

Use the function four times

par (mfrow = c (2, 2))

binnor (n = 20, p = 0.1)
binnor (n = 20, p = 0.3)
binnor (n = 40, p = 0.1)
binnor (n = 40, p = 0.3)

or use Loop function:

par (mfrow = c (2, 2))
ns <- c (20, 40)
ps <- c (0.1, 0.3)

for (n in ns) {
  for (p in ps) {
    binnor(n, p)
  }
}

Exercise 7.3.

Plot the \(\chi_3^2\) distribution over the range (0, 10), using the function dchisq().

x <- seq (0, 10, length.out = 1000)

par (cex.lab = 0.8)

plot (x, dchisq (x, df = 3),
      type = "l", col = "blue", lwd = 2,
      ylab = substitute (chi[3]^2))

Exercise 7.4.

I create a function to output the mean of samples

mean.dr <- function (n, m){
  mtx <- matrix (nrow = n, ncol = m) # This gives me the structure of a matrix
  
  mtx[1:n,] <- replicate (m, rchisq (n = n, df = 3))
  # I put values into the matrix
  
  M <- colMeans (mtx)
  
  meanM <- mean(M)
  sdM <- sd(M)
  
  
  # save them as "result"
  result_toplot <- as.numeric (M)
  
  # plot these sample means as a histogram
  hist (result_toplot, probability = TRUE,
        col = "cornflowerblue", border = "darkgreen",
        main = paste("histogram of means ( n =", n,   ", df = "   ,3,   ")"))
  
  # plot the corresponding curve of the normal distribution
  x <- seq (min(result_toplot), max(result_toplot), 
            length.out = 1000)
  mean_result <- meanM
  sd_result <- sqrt (6 / n)
  
  lines (x, dnorm (x, mean = mean_result, sd = sd_result),
         col = "red", lwd = 3)
}
  • Try it:
mean.dr (n = 500, m = 1000)

Use the function four times

par (mfrow = c (2, 2))

mean.dr (n = 100, m = 1000)
mean.dr (n = 200, m = 1000)
mean.dr (n = 100, m = 2000)
mean.dr (n = 200, m = 2000)

par (mfrow = c (1, 1))

Or use Loop function:

par (mfrow = c (2, 2))

ns <- c (100, 200)
ps <- c (1000, 2000)

for (n in ns) {
  for (p in ps) {
    mean.dr(n, p)
  }
}

par (mfrow = c (1, 1))

Exercise 7.5.

Using data from N (3, 6)

mean.nor <- function (n, m, miu, var){
  mtx <- matrix (nrow = n, ncol = m)
  
  mtx[1:n,] <- replicate (m, rnorm (n = n, mean = miu, sd = sqrt(var)))
  
  M <- colMeans (mtx)
  
  meanM <- mean(M)
  sdM <- sd(M)
  
  
  # save them as "result"
  result_toplot <- as.numeric (M)
  
  # plot these means as a histogram
  hist (result_toplot, probability = TRUE,
        col = "cornflowerblue", border = "darkgreen",
        main = paste("histogram of means (",substitute (miu), "=", miu,   ", variance ="   ,var,   ")"))
  
  x <- seq (min(result_toplot), max(result_toplot), 
            length.out = 1000)
  mean_result <- meanM
  sd_result <- sqrt (var/n)
  
  lines (x, dnorm (x, mean = mean_result, sd = sd_result),
         col = "red", lwd = 3)
}

Try it with different values

par (mfrow = c (2, 2))

ns <- c (100, 200)
ms <- c (1000, 2000)
mius <- 3
vars <- 6

for (n in ns) {
  for (m in ms) {
    for (miu in mius){
      for (var in vars){
            mean.nor(n, m, miu, var)
      }
    }
  }
}

Exercise 7.6.

This exercise is not critical, but you might be bored. Finish rewriting the loop with k specified in the for statement by replacing the command in the first line of the loop for k in terms of j with one with j in terms of k. Ensure that it produces the same matrix as the code above.

The matrix produced in the lecture notes:

std_norm_table <- matrix(nrow = 601, ncol = 2) 

for (j in 1:601) {
  k <- -3 + (j - 1) / 100
  std_norm_table[j,] <- c(k, round(x = pnorm(q = k), digits = 5))
}
colnames(std_norm_table) <- c("x", paste('U+03A6', "(x)", sep = ""))

The argument in the for loop () has changed instead to: for (k in seq(from = -3, to = 3, length = 601))

The another version of creating this matrix

std_norm_table76 <- matrix(nrow = 601, ncol = 2) 

for (k in seq(from = -3, to = 3, length = 601)) {
  
  j <- round( (  k - (-3)  ) *100  )
  # float / double
  std_norm_table76[j,] <- c(k, round(x = pnorm(q = k), digits = 5))
  
}

colnames(std_norm_table76) <- c("x", paste('U+03A6', "(x)", sep = ""))
  • j needs to be round because it represents the rows of a matrix, it should be an integer

  • try the function: typeof(0.8) ; typeof(1)

Take a look at the tale:

##          x U+03A6(x)
## [1,] -2.99   0.00139
## [2,] -2.98   0.00144
## [3,] -2.97   0.00149
## [4,] -2.96   0.00154
## [5,] -2.95   0.00159
## [6,] -2.94   0.00164

Exercise 7.7.

chi_table77 <- matrix(nrow = 601, ncol = 2) 

for (k in seq (from = 0, 
               to = 30, 
               length = nrow(chi_table77)
               )
     ) {
  
  j <- round (k * 20)
  chi_table77[j,] <- c (k, 
                             round (x = pchisq (q = k, df = 3), 
                                    digits = 5)
                             )
}
  colnames (chi_table77) <- c ("x", "cdf")

plot (chi_table77[,1], chi_table77[,2])

##         x     cdf
## [1,] 0.05 0.00293
## [2,] 0.10 0.00816
## [3,] 0.15 0.01477
## [4,] 0.20 0.02241
## [5,] 0.25 0.03086
## [6,] 0.30 0.03997

Exercise 7.8.

Amend your program from Exercise 7.5 or that from the answer to Exercise 7.4 so that it uses a loop to write the 4 plots. Is it possible to write this program without a loop or without repeating the code four times?

Exercise 7.9.

cd <- read.csv ("Country_data.csv")

cd$continent <- as.factor(cd$continent)
levels(cd$continent) <- c(levels(cd$continent), "NAM")
cd[is.na (cd$continent) == TRUE, 2] <- "NAM"

df_cd_log <- data.frame (
  age = log(cd$age_median),
  kcal = log(cd$kcals_day),
  gdp = log(cd$gdp_head),
  con = cd$continent
)


par(mfrow = c(1, 3))

for (i in 1:2) {
  for(j in 2:3) {
    if (i != j) {
      
      colors <- rainbow(6)
      
      plot (df_cd_log [, i], df_cd_log [, j],
            col = colors[df_cd_log$con],
            pch = 16,
            xlab = paste("log",colnames(df_cd_log)[i]), 
            ylab = paste("log",colnames(df_cd_log)[j]),
            )
      
      legend ("topleft", legend = levels(cd$continent),
              col = colors, pch = 20,
              title = "Continents")
      
      fit <- lm (df_cd_log [, j] ~ df_cd_log [, i])
      
      abline (fit, col = "red", lwd = 2)
      
    }
  }
}

par(mfrow = c(1, 1))