Robert Schiller has all kinds of historical data available for free on his Yale website. We’ll grab some S&P 500 historical data
library(readxl)
library(RCurl)
library(lubridate)
library(data.table)
url <- "http://www.econ.yale.edu/~shiller/data/ie_data.xls"
#create a temp file
temp_file <- tempfile(fileext = ".xls")
#get the data
download.file(url = url,
destfile = temp_file,
mode = "wb",
quiet = TRUE)
#load xl from temp
data <- readxl::read_excel(temp_file,skip=7,sheet="Data")
#create dates
data$Date <- parse_date_time(gsub('\\.', '-', data$Date),"ym")
#create monthly returns
data$Returns <- (data$P / shift(data$P,1))-1
#drop the last few data points as there's some garbage in there...
data <- head(data,-5)
One chronic problem that we have in financial market research is that there’s never enough data. Below, I build 50 fake versions of the S&P 500. Pretty simplistic, but effective.
#bootstrap the data
new <- replicate(50,sample(data$Returns,500,replace = TRUE))+1
#index to a start value of 1
new <- rbind(rep(1, ncol(new)), new)
#take cumprod to build something that looks like a market
new <- apply(new,2,cumprod)
#plot the orig data
plot(data$P,type='l',main="Original S&P Data",log="y")
#plot the new data
matplot(new,type='l',main="Fake S&P Data",log="y")