The following is directly from aschinchon's blog post. Although I haven't figured out why this happens yet, it seems quite important to know. To be further investigated…
library(ggplot2)
steps <- 2 * 60 * 60 #Number of steps
results <- data.frame()
walks <- 90 #Number of walks
for (i in 1:walks) {
state <- cumsum(sample(c(-1, 1), steps, replace = TRUE))
results <- rbind(results, c(sum(state < 0), sum(state > 0), sum(state ==
0), if (sum(state < 0) >= sum(state > 0)) 1 else 0))
}
colnames(results) <- c("neg.steps", "pos.steps", "zero.steps", "ind.neg")
results$max.steps <- apply(results, 1, max)/apply(results, 1, sum)
# Plot of one of these walks
mfar = max(abs(max(state)), abs(min(state)))
qplot(seq_along(state), state, geom = "path") + xlab("Step") + ylab("Location") +
labs(title = "The First Walk Of Ms. Positive And Mr. Negative") + theme(plot.title = element_text(size = 20)) +
theme(axis.title.y = element_text(size = 20)) + theme(axis.title.x = element_text(size = 20)) +
scale_x_continuous(limits = c(0, length(state)), breaks = c(1, steps/4,
steps/2, 3 * steps/4, steps)) + scale_y_continuous(limits = c(-mfar,
mfar), breaks = c(-mfar, -mfar/2, 0, mfar/2, mfar)) + geom_hline(yintercept = 0)
# Summary of all walks
ggplot(results, aes(x = max.steps)) + geom_histogram(colour = "white", breaks = seq(0.4,
1, by = 0.2), fill = c("blue", "orange", "red")) + theme_bw() + labs(title = paste("What Happened After ",
toString(walks), " Walks?", sep = "")) + scale_y_continuous(breaks = seq(0,
(nrow(results[results$max.steps > 0.8, ]) + 10), by = 10)) + theme(plot.title = element_text(size = 20)) +
xlab("Maximum Steps In The Same Location (%)") + ylab("Number of Walks")
# Data for waterfall chart
waterfall <- as.data.frame(cbind(c("Total Walks", "Satisfactory Walks", "Uncomfortable Walks",
"Awful Walks for Mr. +", "Awful Walks for Ms. -"), c("a", "b", "c", "d",
"d"), c(0, nrow(results), nrow(results) - nrow(results[results$max.steps <
0.6, ]), nrow(results) - nrow(results[results$max.steps < 0.6, ]) - nrow(results[results$max.steps >=
0.6 & results$max.steps < 0.8, ]), nrow(results) - nrow(results[results$max.steps <
0.6, ]) - nrow(results[results$max.steps >= 0.6 & results$max.steps < 0.8,
]) - nrow(results[results$max.steps >= 0.8 & results$ind.neg == 1, ])),
c(nrow(results), nrow(results) - nrow(results[results$max.steps < 0.6, ]),
nrow(results) - nrow(results[results$max.steps < 0.6, ]) - nrow(results[results$max.steps >=
0.6 & results$max.steps < 0.8, ]), nrow(results) - nrow(results[results$max.steps <
0.6, ]) - nrow(results[results$max.steps >= 0.6 & results$max.steps <
0.8, ]) - nrow(results[results$max.steps >= 0.8 & results$ind.neg ==
1, ]), 0), c(nrow(results), nrow(results[results$max.steps < 0.6,
]), nrow(results[results$max.steps >= 0.6 & results$max.steps < 0.8,
]), nrow(results[results$max.steps >= 0.8 & results$ind.neg == 1, ]),
nrow(results[results$max.steps >= 0.8 & results$ind.neg == 0, ]))))
colnames(waterfall) <- c("desc", "type", "start", "end", "amount")
waterfall$id <- seq_along(waterfall$amount)
waterfall$desc <- factor(waterfall$desc, levels = waterfall$desc)
# Waterfall chart
ggplot(waterfall, aes(desc, fill = type)) + geom_rect(aes(x = desc, xmin = id -
0.45, xmax = id + 0.45, ymin = end, ymax = start)) + xlab("Kind of Walk") +
ylab("Number of Walks") + labs(title = "The Ultimate Proof (After 90 Walks)") +
theme(plot.title = element_text(size = 20)) + theme(axis.title.y = element_text(size = 20)) +
theme(axis.title.x = element_text(size = 20)) + theme(legend.position = "none")