Data 604: Homework 3

Kelton et al. (2014). Simio and Simulation: Modeling, Analysis, Applications (3rd Ed.)

Section 3.5, Problems 1, 5 & 17

1) Extend the simulation of throwing two dice in Section 3.2.1 in each of the following ways (one at a time, not cumulatively):

Instead of 50 throws, extend the spreadsheet to make 500 throws, and compare your results. Tap the F9 key to get a “fresh” set of random numbers and thus a fresh set of results.
Load the dice by changing the probabilities of the faces to be something other than uniform at 1/6 for each face. Be careful to ensure that your probabilities sum to 1.
Use @RISK (see Section 3.2.3), or another Excel add-in for the static Monte Carlo spreadsheet simulation, to make 10,000 throws of the pair of fair dice, and compare your results to the true probabilities of getting the sum equal to each of 2,3,…,12 as well as to the true expected value of 7.

library(ggplot2)
library(dplyr)
library(tidyr)

fair <- rep(1/6, 6)

get_sums <- function (n, probs) {
  dice <- data.frame(toss = 1:n, die_1 = integer(n), die_2 = integer(n), sum = integer(n))
  dice$die_1 <- sample(1:6, n, replace = TRUE, prob = probs)
  dice$die_2 <- sample(1:6, n, replace = TRUE, prob = probs)
  dice$sum <- dice$die_1 + dice$die_2

  return(dice)  
}

knitr::kable(head(get_sums(500, fair)), caption = "Results of rolling two fair dice")

Results of rolling two fair dice
toss	die_1	die_2	sum
1	2	2	4
2	4	5	9
3	5	3	8
4	3	4	7
5	3	1	4
6	2	4	6

sums <- data.frame(tosses = integer(10),
                   mean = numeric(10), 
                   min = numeric(10), 
                   max = numeric(10))
for (i in 1:10) {
  if (i < 6) {
    result <- get_sums(50, fair)$sum
    sums[i, ] <- c(50, mean(result), min(result), max(result))
  } else {
    result <- get_sums(500, fair)$sum
    sums[i, ] <- c(500, mean(result), min(result), max(result))
  }
}
knitr::kable(sums, caption = "Mean, minima, and maxima of sum of two fair dice from 5 iterations each of 50 and 500 tosses")

Mean, minima, and maxima of sum of two fair dice from 5 iterations each of 50 and 500 tosses
tosses	mean	min	max
50	6.900	2	12
50	7.660	3	12
50	6.940	2	12
50	6.980	2	12
50	7.040	3	11
500	7.012	2	12
500	7.114	2	12
500	6.786	2	12
500	6.900	2	12
500	6.900	2	12

# P(6) = 3 * P(x); x != 6
loaded <- c(rep(1/8, 5), 3/8)
sum(loaded)

## [1] 1

get_sums <- function (n, probs) {
  dice <- data.frame(toss = 1:n, die_1 = integer(n), die_2 = integer(n), sum = integer(n))
  dice$die_1 <- sample(1:6, n, replace = TRUE, prob = probs)
  dice$die_2 <- sample(1:6, n, replace = TRUE, prob = probs)
  dice$sum <- dice$die_1 + dice$die_2

  return(dice)  
}

sums <- data.frame(tosses = integer(5),
                   mean = numeric(5), 
                   min = numeric(5), 
                   max = numeric(5))

for (i in 1:5) {
  result <- get_sums(50, loaded)$sum
  sums[i, ] <- c(50, mean(result), min(result), max(result))
}
knitr::kable(sums, caption = "Mean, minima, and maxima of sum of two loaded dice (with 6 being 3 times more likely to appear than any other face value) in 5 iterations of 50 tosses")

Mean, minima, and maxima of sum of two loaded dice (with 6 being 3 times more likely to appear than any other face value) in 5 iterations of 50 tosses
tosses	mean	min	max
50	8.54	3	12
50	8.84	3	12
50	7.70	2	12
50	8.44	3	12
50	8.60	3	12

result <- get_sums(10000, fair)

obs_exp <- data.frame(sum = 2:12, observed = numeric(length(2:12)), expected = numeric(length(2:12)))

for (val in 2:12) {
  obs_exp[obs_exp$sum == val, "observed"] <- sum(result$sum == val) / 10000
  obs_exp[obs_exp$sum == val, "expected"]  <- 
    sum(rowSums(expand.grid(1:6, 1:6)) == val) / nrow(expand.grid(1:6, 1:6))
}

knitr::kable(obs_exp, caption = "Observed and expected probabilities for the sum of two fair dice tossed 10000 times")

Observed and expected probabilities for the sum of two fair dice tossed 10000 times
sum	observed	expected
2	0.0273	0.0277778
3	0.0616	0.0555556
4	0.0834	0.0833333
5	0.1077	0.1111111
6	0.1450	0.1388889
7	0.1630	0.1666667
8	0.1375	0.1388889
9	0.1124	0.1111111
10	0.0812	0.0833333
11	0.0538	0.0555556
12	0.0271	0.0277778

5) In the Monte Carlo integration of Section 3.2.2, add to the spreadsheet calculation the standard deviation calculation of the 50 individual values, and use that, together with the mean already in cell H4, to compute a 95% confidence interval on the exact integral in cell I4; does your confidence interval contain, or “cover” the exact integral? How often (tap F9 repeatedly and keep track manually)? Repeat all this but with a 90% confidence interval, and then with a 99% confidence interval.

mu <- 5.8
theta <- 2.3
a <- 4.5
b <- 6.7

exact <- pnorm(b, mu, theta) - pnorm(a, mu, theta)
x_i <- runif(50, a, b)
h <- dnorm(x_i, mu, theta)

nrml <- data.frame(cbind(i = 1:50, x_i = x_i, area = h * (b - a)))

mc <- data.frame(est = mean(nrml$area), 
                 sd = sd(nrml$area),
                 exact = exact)

mc <- mc %>% 
  mutate(ci_90 = paste0("(", 
                        formatC(est - (qnorm(0.95) * sd / sqrt(nrow(nrml))), 
                                digits = 3, format = "f"),
                        ", ",
                        formatC(est + (qnorm(0.95) * sd / sqrt(nrow(nrml))), 
                                digits = 3, format = "f"),
                        ")"),
         ci_95 = paste0("(", 
                        formatC(est - (qnorm(0.975) * sd / sqrt(nrow(nrml))), 
                                digits = 3, format = "f"),
                        ", ",
                        formatC(est + (qnorm(0.975) * sd / sqrt(nrow(nrml))), 
                                digits = 3, format = "f"),
                        ")"),
         ci_99 = paste0("(", 
                        formatC(est - (qnorm(0.995) * sd / sqrt(nrow(nrml))), 
                                digits = 3, format = "f"),
                        ", ",
                        formatC(est + (qnorm(0.995) * sd / sqrt(nrow(nrml))), 
                                digits = 3, format = "f"),
                        ")"))

knitr::kable(mc, col.names = c("Estimate", "SD", "Exact",
                               "90% CI", "95% CI", "99% CI"),
             align = rep("r", 6),
             caption = "Static Monte Carlo simulation of normal density function")

Static Monte Carlo simulation of normal density function
Estimate	SD	Exact	90% CI	95% CI	99% CI
0.3623702	0.0162009	0.3662509	(0.359, 0.366)	(0.358, 0.367)	(0.356, 0.368)

runs <- 10000
cover <- data.frame(est = numeric(runs),
                    sd = numeric(runs),
                    lo_90 = numeric(runs),
                    hi_90 = numeric(runs),
                    lo_95 = numeric(runs),
                    hi_95 = numeric(runs),
                    lo_99 = numeric(runs),
                    hi_99 = numeric(runs))
              
for (i in 1:runs) {
  x_i <- runif(50, a, b)
  h <- dnorm(x_i, mu, theta)

  nrml <- data.frame(cbind(i = 1:50, x_i = x_i, area = h * (b - a)))

  cover$est[i] <- mean(nrml$area)
  cover$sd[i] <- sd(nrml$area)
  cover$lo_90 <- cover$est - (qnorm(0.95) * cover$sd / sqrt(nrow(nrml))) 
  cover$hi_90 <- cover$est + (qnorm(0.95) * cover$sd / sqrt(nrow(nrml))) 
  cover$lo_95 <- cover$est - (qnorm(0.975) * cover$sd / sqrt(nrow(nrml))) 
  cover$hi_95 <- cover$est + (qnorm(0.975) * cover$sd / sqrt(nrow(nrml))) 
  cover$lo_99 <- cover$est - (qnorm(0.995) * cover$sd / sqrt(nrow(nrml))) 
  cover$hi_99 <- cover$est + (qnorm(0.995) * cover$sd / sqrt(nrow(nrml))) 
}

cover <- cover %>% 
  mutate(in_90 <- exact >= lo_90 & exact <= hi_90,
         in_95 <- exact >= lo_95 & exact <= hi_95,
         in_99 <- exact >= lo_99 & exact <= hi_99)

knitr::kable(data.frame(cbind(sum(cover$in_90) / runs,
                              sum(cover$in_95) / runs,
                              sum(cover$in_99) / runs)),
             col.names = c("90% CI coverage", 
                           "95% CI coverage", 
                           "99% CI coverage"),
             caption = paste("Proportion of",
                             runs,
                             "runs with confidence intervals containing exact integral"))

Proportion of 10000 runs with confidence intervals containing exact integral
90% CI coverage	95% CI coverage	99% CI coverage
0.8854	0.9351	0.9823

17) Walther has a roadside produce stand where he sells oats, peas, beans, and barley. He buys these products at per-pound wholesale prices of, respectively, $1.05, $3.17, $1.99, and $0.95; he sells them at per-pound retail prices of, respectively, $1.29, $3.76, $2.33, and $1.65. Each day the amount demanded (in pounds) could be as little as zero for each product, and as much as 10, 8, 14, and 11 for oats, peas, beans, and barley, respectively; he sells only whole-pound amounts, no partial pounds. Assume a discrete uniform distribution for daily demand for each product over its range; assume as well that Walther always has enough inventory to satisfy all demand. The summer selling season is 90 days, and demand each day is independent of demand on other days. Create a spreadsheet simulation that will, for each day as well as for the whole season, simulate Walther’s total cost, total revenue and total profit.

produce <- matrix(NA, nrow = 90, ncol = 4)
colnames(produce) <- c("oats", "peas", "beans", "barley")

crp <- as.data.frame(matrix(NA, nrow = 90, ncol = 3))
colnames(crp) <- c("cost", "revenue", "profit")

cost <- c(1.05, 3.17, 1.99, 0.95)
revenue <- c(1.29, 3.76, 2.33, 1.65)
n_max <- c(10, 8, 14, 11)

sim <- as.data.frame(matrix(NA, nrow = 1000, ncol = 7))
colnames(sim) <- c(colnames(produce), colnames(crp))

for (n in 1:nrow(sim)) {
  for (i in 1:4) {
    produce[, i] <- sample(0:n_max[i], 90, replace = TRUE, 
                         prob = rep((1 / length(0:n_max[i])), length(0:n_max[i])))
    sim[n, i] <- sum(produce[, i])
  }
  crp$cost <- as.numeric(produce %*% cost)
  crp$revenue <- as.numeric(produce %*% revenue)
  crp$profit <- crp$revenue - crp$cost
  
  sim[n, ]$cost <- sum(crp$cost)   
  sim[n, ]$revenue <- sum(crp$revenue)
  sim[n, ]$profit <- sum(crp$profit)
}

knitr::kable(produce[1:10, ], caption = "Produce demand for first 10 days of 1000th simulated summer")

Produce demand for first 10 days of 1000th simulated summer
oats	peas	beans	barley
7	7	4	4
9	2	10	6
0	4	9	8
5	6	4	11
9	1	12	4
8	1	11	2
3	4	11	10
4	4	9	1
9	1	9	7
0	4	12	2

knitr::kable(sim %>% summarize(avg_cost = mean(cost),
                               avg_revenue = mean(revenue),
                               avg_profit = mean(profit)),
             caption = "Average values for 1000 simulated summer seasons")

Average values for 1000 simulated summer seasons
avg_cost	avg_revenue	avg_profit
3331.553	4211.288	879.7348

res <- sim %>% 
  mutate(summer = row_number()) %>% 
  select(summer, cost:profit) %>%
  gather(name, value, cost:profit)

res$name <- factor(res$name, levels = c("cost", "revenue", "profit"))

ggplot(res) +
  geom_histogram(aes(value, fill = name), alpha = 0.6) +
  scale_fill_manual(name = "",
    values = c("cost" = "red", "revenue" = "blue", "profit" = "green"),
    labels = c("Cost", "Profit", "Revenue")) +
  labs(x = "Dollars", 
       y = "Count", 
       title = paste("Distribution of Total Cost, Revenue & Profit across",
                     "1000 Simulated 90-day Summer Seasons",
                     sep = "\n")
  )

ggplot(res) +
  geom_line(aes(summer, value, color = name, group = name), alpha = 0.6) +
  scale_color_manual(name = "",
    values = c("cost" = "red", "revenue" = "blue", "profit" = "green"),
    labels = c("Cost", "Profit", "Revenue")) +
  labs(x = "Summer", 
       y = "Dollars", 
       title = "Total Cost, Revenue & Profit for each Simulated Summer Season")

Data 604: Homework 3

Judd Anderman

September 17, 2017

Kelton et al. (2014). Simio and Simulation: Modeling, Analysis, Applications (3rd Ed.)