Data609

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr)
library(knitr)
library(stringr)

Pg 193 #3

3. Using Monte Carlo simulation, write an algorithm to calculate an approximation to pi by considering the number of random points selected inside the quarter circle.

\[ Q : { x^2}+{y^2} = 1 \]

\[ x â¥ 0, y â¥ 0 \]

\[ S : {0 â¤ x â¤ 1} and {0 â¤ y â¤ 1} \]

\[ \frac{Ï}{4} = \frac{area of Q}{area of S} \]

Solution:

Input n random points to be generated

Step 1 Initialize the COUNTER to 0

Step 2 For i= 1 to n, repeat steps 3 to 5

Step 3 Calculate random points xi and yi that satisfy 0 â¤ x1 â¤ 1, 0 â¤ yi â¤ 1

Step 4 Calculate f(xi) for random xi and yi coordinates.

Step 5 If f(xi,yi) â¤ 1, then increment the COUNTER by 1. Otherwise, leave COUNTER as is.

Step 6 Calculate PI = 4 * ((1(1-0)COUNTER/n) / S)

Step 7 OUTPUT (PI) - STOP

# Estimate pi using sample size, n
est_pi <- function(n){
    myrand.x <- runif(n) # generate random x values between 0 and 1
    myrand.y <- runif(n) #  gnerate random y values between 0 and 1
    sum(myrand.x^2 + myrand.y^2 <=1) / n * 4
}

# specify various sample sizes
n <- c(10,100,1000,10000,100000,1000000)

# apply pi estimate function for specified sample sizes 
set.seed(1900)
est <- sapply(n, est_pi)
est

## [1] 2.800000 3.240000 3.144000 3.136800 3.140560 3.140464

Pg 194:#1

1. Use the middle-square method to generate

10 random numbers using x0 D 1009.
20 random numbers using x0 D 653217.
15 random numbers using x0 D 3043.
Comment about the results of each sequence. Was there cycling? Did each sequence degenerate rapidly?

# random number generator function

rand <- function(n, x0){
  
  # initialize with seed value
  x <- x0
  
  # number of digits required in squared number, assume odd length numbers have leading zero
  num_dig <- (nchar(x) + nchar(x) %% 2) * 2
  
  # helper function to calculate next number in sequence
  mid_sq <- function(z){
    
    # square number, add leading zeros if necessary
    z <- str_sub(paste(c(replicate(num_dig,"0"),z^2), collapse = ""),-num_dig)
    
    # return middle digits, convert to integer
    as.integer(str_sub(z,start = 1 + num_dig/4 , end = num_dig - num_dig/4))
  }
  
  # apply repeated iterations of mid_sq() based on previous output; append to vector
  if (n > 1) {
    for (i in 2:n) {
      x <- append(x, mid_sq(x[length(x)]))
    }
  }
  x
}

a) 10 random numbers starting with x0 = 1009

n <- 10
x0 <- 1009
rand1 <- rand(n, x0)
rand1

##  [1] 1009  180  324 1049 1004   80   64   40   16    2

b) 20 random numbers starting with x0 = 653217

n <- 20
x0 <- 653217
rand2 <- rand(n, x0)
rand2

##  [1] 653217 692449 485617 823870 761776 302674 611550 993402 847533 312186
## [11] 460098 690169 333248  54229 940784  74534 555317 376970 106380 316704

c) 15 random numbers using x0 = 3043.

n <- 15
x0 <- 3043
rand3 <- rand(n, x0)
rand3

##  [1] 3043 2598 7496 1900 6100 2100 4100 8100 6100 2100 4100 8100 6100 2100
## [15] 4100

d)Comment about the results of each sequence. Was there cycling? Did each sequence degenerate rapidly?

No cycling in the first sequence, but the sequence is degenerating rapidly. After the first ten numbers in the sequence, all values are equal to zero.
No cycling or degeneration in the first 20 draws.
The sequence begins to cycle at the 9th value in the sequence. At this point, we see the following repeating sequence : 6,100, 2,100, 4,100, 8,100

Page 201: #4

4.

# data
horse <- c("Euler's Folly","Leapin' Leibniz","Newton Lobell","Count Cauchy","Pumped up Poisson",
           "Loping L'Hopital","Steamin' Stokes","Dancin' Dantzig")
payoff <- c(7,5,9,12,4,35,15,4)
stake <- rep(1,8)
odds <- paste0(payoff,"-",stake)


mydf <- data.frame(number = 1:8,horse=horse, odds=odds)
mydf

##   number             horse odds
## 1      1     Euler's Folly  7-1
## 2      2   Leapin' Leibniz  5-1
## 3      3     Newton Lobell  9-1
## 4      4      Count Cauchy 12-1
## 5      5 Pumped up Poisson  4-1
## 6      6  Loping L'Hopital 35-1
## 7      7   Steamin' Stokes 15-1
## 8      8   Dancin' Dantzig  4-1

# probability of each horse winning
probs <- 1 - payoff / (payoff + stake)
probs

## [1] 0.12500000 0.16666667 0.10000000 0.07692308 0.20000000 0.02777778
## [7] 0.06250000 0.20000000

sum(probs)

## [1] 0.9588675

true_probs <- probs / sum(probs)
true_probs

## [1] 0.13036212 0.17381616 0.10428969 0.08022284 0.20857939 0.02896936
## [7] 0.06518106 0.20857939

sum(true_probs)

## [1] 1

# simulate 1k random U(0,1) variables 
cumu_prob <- cumsum(true_probs)
sims <- 1000
set.seed(5678)
myrand <- runif(sims)

# match random numbers with horse cumulative probability
winners <- findInterval(myrand,cumu_prob)+1

# summarize simulation results
sim_results <- data.frame(number = winners) %>%
  group_by(number) %>%
  count() %>%
  rename(wins = n) %>%
  mutate(sim_prob = sprintf("%.2f%%", 100* wins/sims))

# combine sim results with original odds data frame; print
mydf <- mydf %>%
  inner_join(sim_results, by="number")
mydf$true_prob <- sprintf("%.2f%%", 100*true_probs)

mydf

##   number             horse odds wins sim_prob true_prob
## 1      1     Euler's Folly  7-1  136   13.60%    13.04%
## 2      2   Leapin' Leibniz  5-1  186   18.60%    17.38%
## 3      3     Newton Lobell  9-1  112   11.20%    10.43%
## 4      4      Count Cauchy 12-1   90    9.00%     8.02%
## 5      5 Pumped up Poisson  4-1  198   19.80%    20.86%
## 6      6  Loping L'Hopital 35-1   25    2.50%     2.90%
## 7      7   Steamin' Stokes 15-1   52    5.20%     6.52%
## 8      8   Dancin' Dantzig  4-1  201   20.10%    20.86%

Which horse won the most races?

Dancinâ Dantzig

Which horse won the the fewest races?

Loping LâHopital

Do these results surprise you?

No, the simulation results are alligned with our long run expectations.

Page 211 # 3

In many situations, the time T between deliveries and the order quantity Q is not fixed. Instead, an order is placed for a specific amount of gasoline. Depending on how many orders are placed in a given time interval, the time to fill an order varies.You have no reason to believe that the performance of the delivery operation will change. Therefore, you have examined records for the past 100 deliveries and found the following lag times, or extra days, required to fill your order:

Construct a Monte Carlo simulation for the lag time submodel. If you have a handheld calculator or computer available, test your submodel by running 1000 trials and comparing the number of occurrences of the various lag times with the historical data.

lagtime <- 2:7
occurrences <- c(10, 25, 30, 20, 13, 2)

lagtime <- c(1, lagtime)
occurrences <- c(0, occurrences)

# calculating the probabilities
probs <- occurrences / sum(occurrences)
# Calculate cumulative probabilities
cum.probs <- cumsum(probs)

trials <- 1000
model <- approxfun(cum.probs, lagtime, method="linear")
sim <- model(runif(trials))

## Comparing the historical data with the data from the model:
  
model_probs <- as.numeric(table(round(sim))/trials)
df <- data.frame(lagtime, "historical"=probs, "model"=round(model_probs, 2))
df

##   lagtime historical model
## 1       1       0.00  0.05
## 2       2       0.10  0.18
## 3       3       0.25  0.27
## 4       4       0.30  0.25
## 5       5       0.20  0.18
## 6       6       0.13  0.07
## 7       7       0.02  0.00

The historical and model’s probability are alligned.

Page 221: #2

Use a smooth polynomial to fit the data in Table 5.18 to obtain arrivals and unloading times. Compare results to those in Tables 5.19 and 5.20.

# data from table 5.18

arrival.lb <- 0:12
arrival.lb <- arrival.lb * 10 + 15

arrival.ub <- arrival.lb + 9
arrival.ub[length(arrival.ub)] <- 145

arrival.mid <- (arrival.lb + arrival.ub) / 2
arrival.occur <- c(11, 35, 42, 61, 108, 193, 240, 207, 150, 85, 44, 21, 3)

unload.lb <- 0:8
unload.lb <- unload.lb * 5 + 45

unload.ub <- unload.lb + 4
unload.ub[length(unload.ub)] <- 90

unload.mid <- (unload.lb + unload.ub) / 2
unload.occur <- c(20, 54, 114, 103, 156, 221, 250, 171, 109)

arrival.probs <- arrival.occur / sum(arrival.occur)
arrival.cum <- cumsum(arrival.probs)

unload.probs <- unload.occur / sum(unload.occur)
unload.cum <- cumsum(unload.probs)

# Linear Models
arrival.lin <- approxfun(arrival.cum, arrival.mid, method="linear")
unload.lin <- approxfun(unload.cum, unload.mid, method="linear")

plot(arrival.cum, arrival.mid, main="Arrival Data")
points(arrival.cum, arrival.lin(arrival.cum), type="l")

plot(unload.cum, unload.mid, main="Unload Data")
points(unload.cum, unload.lin(unload.cum), type="l")

# Arrival
arrival.lm <- lm(arrival.mid ~ poly(arrival.cum, 3))
unload.lm <- lm(unload.mid ~ poly(unload.cum, 3))

plot(arrival.cum, arrival.mid, main="Arrival Data")
points(arrival.cum, arrival.lin(arrival.cum), type="l")
new.data <- data.frame(arrival.cum=seq(from=0, to=1, by=0.01))
y <- predict(arrival.lm, newdata=new.data)
points(new.data$arrival.cum, y, type="l", col="red")

plot(unload.cum, unload.mid, main="Unload Data")
points(unload.cum, unload.lin(unload.cum), type="l")
new.data <- data.frame(unload.cum=seq(from=0, to=1, by=0.01))
y <- predict(unload.lm, newdata=new.data)
points(new.data$unload.cum, y, type="l", col="red")

For unloading data, both models are good representations of the data.

Data609_hw4

Dhananjay Kumar

11/01/2017