set.seed(123)
# create simulation function
roll_dice = function(rolls) {
tbl_results = data.table(roll = seq(1:rolls),
dice1 = sample(seq(1:6), rolls, replace = TRUE),
dice2 = sample(seq(1:6), rolls, replace = TRUE))
tbl_results[, dice1_2 := dice1 + dice2]
}
roll_dice_50 = roll_dice(50)
roll_dice_500 = roll_dice(500)
roll_dice_10000 = roll_dice(10000)
data.table(rolls = c('Actual probality', 50, 500, 10000),
rbind(c(1, 2, 3, 4, 5, 6, 5, 4, 3, 2, 1)/36,
prop.table(table(roll_dice_50$dice1_2)),
prop.table(table(roll_dice_500$dice1_2)),
prop.table(table(roll_dice_10000$dice1_2))))
## Warning in rbind(c(1, 2, 3, 4, 5, 6, 5, 4, 3, 2, 1)/36,
## prop.table(table(roll_dice_50$dice1_2)), : number of columns of result is
## not a multiple of vector length (arg 2)
## rolls 2 3 4 5 6
## 1: Actual probality 0.02777778 0.05555556 0.08333333 0.1111111 0.1388889
## 2: 50 0.06000000 0.08000000 0.06000000 0.0600000 0.1200000
## 3: 500 0.03000000 0.05600000 0.09200000 0.1020000 0.1620000
## 4: 10000 0.02810000 0.05550000 0.08240000 0.1157000 0.1482000
## 7 8 9 10 11 12
## 1: 0.1666667 0.1388889 0.1111111 0.08333333 0.05555556 0.02777778
## 2: 0.2000000 0.1600000 0.0800000 0.10000000 0.08000000 0.06000000
## 3: 0.1600000 0.1400000 0.0840000 0.08800000 0.06200000 0.02400000
## 4: 0.1614000 0.1380000 0.1116000 0.07830000 0.05390000 0.02690000
set.seed(123)
# create simulation function
roll_loaded_dice = function(rolls) {
tbl_results = data.table(roll = seq(1:rolls),
# load dice by making 1s three times more likely to appear than other number
dice1 = sample(c(1, 1, seq(1:6)), rolls, replace = TRUE),
dice2 = sample(c(1, 1, seq(1:6)), rolls, replace = TRUE))
tbl_results[, dice1_2 := dice1 + dice2]
}
roll_loaded_dice_50 = roll_loaded_dice(50)
roll_loaded_dice_500 = roll_loaded_dice(500)
roll_loaded_dice_10000 = roll_loaded_dice(10000)
# calculate actual probabilites
actual_probs = data.frame(dice = c(1, 1, seq(1:6)))
actual_probs = actual_probs %>% merge(actual_probs, by = NULL) %>%
mutate(dice1_2 = dice.x + dice.y) %>%
select(dice1_2)
data.table(rolls = c('Actual probality', 50, 500, 10000),
rbind(prop.table(table(actual_probs)),
prop.table(table(roll_loaded_dice_50$dice1_2)),
prop.table(table(roll_loaded_dice_500$dice1_2)),
prop.table(table(roll_loaded_dice_10000$dice1_2))))
## Warning in rbind(prop.table(table(actual_probs)),
## prop.table(table(roll_loaded_dice_50$dice1_2)), : number of columns of
## result is not a multiple of vector length (arg 2)
## rolls 2 3 4 5 6 7
## 1: Actual probality 0.140625 0.09375 0.109375 0.1250 0.140625 0.15625
## 2: 50 0.180000 0.10000 0.060000 0.0600 0.200000 0.14000
## 3: 500 0.152000 0.09400 0.102000 0.1400 0.160000 0.12000
## 4: 10000 0.137900 0.10240 0.107900 0.1251 0.150000 0.14970
## 8 9 10 11 12
## 1: 0.078125 0.0625 0.046875 0.03125 0.015625
## 2: 0.100000 0.0400 0.060000 0.06000 0.180000
## 3: 0.072000 0.0620 0.052000 0.03200 0.014000
## 4: 0.078000 0.0587 0.045400 0.03030 0.014600
Using 90, 95 and 99% confidence intervals and 1000 runs, I generally saw results within +/- 1-3% from the stated confidence interval.
# create function for simulation
integrate_sim = function(mu, sig, a, b, n, ci) {
tbl_results = data.table(sim = seq(1:n), Xi = runif(n) * (b - a ) + a)
tbl_results[, est := (b - a) * dnorm(Xi, mu, sig)]
tbl_results = tbl_results[, .(mean = mean(est), se = sd(est) / sqrt(n))]
ci = qnorm(mean(c(ci, 1)))
return(list(lowerCI = tbl_results$mean - ci * tbl_results$se,
upperCI = tbl_results$mean + ci * tbl_results$se))
}
exact = (pnorm(6.7, 5.8, 2.3) - pnorm(4.5, 5.8, 2.3))
df = data.frame(ci_90 = rep(0, 100), ci_95 = rep(0, 100), ci_99 = rep(0, 100))
for (i in 1:1000) {
c = 1
for (test in c(0.90, 0.95, 0.99)) {
ci = integrate_sim(5.8, 2.3, 4.5, 6.7, 50, test)
df[i, c] = ci$lowerCI < exact & exact < ci$upperCI
c = c + 1
}
}
colSums(df)/1000
## ci_90 ci_95 ci_99
## 0.881 0.932 0.972
set.seed(123)
# create simulation function for one season
run_sim = function(days = 90) {
# create data table to house inputs
tbl_items = data.table(item = c('oats', 'peas', 'beans', 'barley'),
cost = c(1.05, 3.17, 1.99, 0.95),
price = c(1.29, 3.76, 2.23, 1.65),
minq = c(0, 0, 0, 0),
maxq = c(10, 8, 14, 11))
tbl_items = tbl_items[, profit := price - cost]
# create data frame to house results
tbl_sim = data.frame(day = rep(0, days), total_revenue = rep(0, days),
total_cost = rep(0, days), total_profit = rep(0, days))
for (i in 1:days) {
qty = NULL
revenue = 0
cost = 0
profit = 0
for (r in 1:nrow(tbl_items)) {
qty = sample(tbl_items[r, minq]:tbl_items[r, maxq], 1)
revenue = revenue + tbl_items[r, price] * qty
cost = cost + tbl_items[r, cost] * qty
profit = profit + tbl_items[r, profit] * qty
}
tbl_sim[i,] = c(i, revenue, cost, profit)
}
return(data.table(tbl_sim))
}
# create simulation function for x seasons
run_sim_season = function(seasons = 1000, days = 90) {
tbl_daily = data.frame(day = rep(0, seasons*days), total_revenue = rep(0, seasons*days),
total_cost = rep(0, seasons*days), total_profit = rep(0, seasons*days))
tbl_season = data.frame(season = rep(0, seasons), total_revenue = rep(0, seasons),
total_cost = rep(0, seasons), total_profit = rep(0, seasons))
for (i in 1:seasons) {
## REPLACE LOGIC BELOW WITH A SIMPLE ARRAY AND THEN RETURN A DATA.TABLE
sim = run_sim(days = days)
tbl_daily[(i*days-days+1):(i*days),] = sim
tbl_season[i,] = c(season = i, sim[, lapply(.SD, sum), .SDcols = 2:4])
}
return(list(tbl_daily = data.table(tbl_daily), tbl_season = data.table(tbl_season)))
}
# run simulation x times and aggregate results
sim_results = run_sim_season(seasons = 100, days = 90)
summary(sim_results$tbl_daily)
## day total_revenue total_cost total_profit
## Min. : 1.0 Min. : 1.29 Min. : 0.95 Min. : 0.240
## 1st Qu.:23.0 1st Qu.:34.90 1st Qu.:27.62 1st Qu.: 6.707
## Median :45.5 Median :45.61 Median :36.78 Median : 9.020
## Mean :45.5 Mean :45.77 Mean :36.75 Mean : 9.019
## 3rd Qu.:68.0 3rd Qu.:56.72 3rd Qu.:45.80 3rd Qu.:11.300
## Max. :90.0 Max. :91.06 Max. :73.22 Max. :17.940
summary(sim_results$tbl_season)
## season total_revenue total_cost total_profit
## Min. : 1.00 Min. :3787 Min. :3036 Min. :733.6
## 1st Qu.: 25.75 1st Qu.:4038 1st Qu.:3231 1st Qu.:795.4
## Median : 50.50 Median :4128 Median :3311 Median :814.3
## Mean : 50.50 Mean :4120 Mean :3308 Mean :811.7
## 3rd Qu.: 75.25 3rd Qu.:4198 3rd Qu.:3375 3rd Qu.:828.5
## Max. :100.00 Max. :4507 Max. :3638 Max. :869.0
sim_results$tbl_daily %>% select(-day) %>%
gather('metric', 'value') %>%
mutate('metric' = factor(metric, levels = c('total_revenue', 'total_cost', 'total_profit'))) %>%
ggplot(aes(x = value)) +
geom_histogram() +
facet_grid(. ~ metric, scales = 'free')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
sim_results$tbl_season %>% select(-season) %>%
gather('metric', 'value') %>%
mutate('metric' = factor(metric, levels = c('total_revenue', 'total_cost', 'total_profit'))) %>%
ggplot(aes(x = value)) +
geom_histogram() +
facet_grid(. ~ metric, scales = 'free')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.