# make quantmod available
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
# get Facebook prices
getSymbols("FB", src="yahoo")
## 'getSymbols' currently uses auto.assign=TRUE by default, but will
## use auto.assign=FALSE in 0.5-0. You will still be able to use
## 'loadSymbols' to automatically load data. getOption("getSymbols.env")
## and getOption("getSymbols.auto.assign") will still be checked for
## alternate defaults.
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for details.
##
## WARNING: There have been significant changes to Yahoo Finance data.
## Please see the Warning section of '?getSymbols.yahoo' for details.
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.yahoo.warning"=FALSE).
## Warning in as.POSIXlt.POSIXct(Sys.time()): unknown timezone 'zone/tz/2018e.
## 1.0/zoneinfo/America/New_York'
## [1] "FB"
# convert to a dataframe
FB <- data.frame(FB)
# create a date variable using row.names
FB$date <- as.Date(row.names(FB))
# show first few rows of data
head(FB)
## FB.Open FB.High FB.Low FB.Close FB.Volume FB.Adjusted
## 2012-05-18 42.05 45.00 38.00 38.23 573576400 38.23
## 2012-05-21 36.53 36.66 33.00 34.03 168192700 34.03
## 2012-05-22 32.61 33.59 30.94 31.00 101786600 31.00
## 2012-05-23 31.37 32.50 31.36 32.00 73600000 32.00
## 2012-05-24 32.95 33.21 31.77 33.03 50237200 33.03
## 2012-05-25 32.90 32.95 31.11 31.91 37149800 31.91
## date
## 2012-05-18 2012-05-18
## 2012-05-21 2012-05-21
## 2012-05-22 2012-05-22
## 2012-05-23 2012-05-23
## 2012-05-24 2012-05-24
## 2012-05-25 2012-05-25
# make ggplot2 available
library(ggplot2)
# create a time series plot of appl stock
ggplot(FB) + geom_line(aes(x=date, y=FB.Close)) +
labs(x="Year", y="Facebook Closing Price")
# make dplyr available
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:xts':
##
## first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Create a lag of closing price
FB$lFB.Close <- lag(FB$FB.Close, k=1)
# correlate closing price with the lag of closing price
cor(FB$FB.Close, FB$lFB.Close, use="complete.obs")
## [1] 0.9992317
Do you expect the previous day closing price to be a good predictor of the current closing price based on the correlation? Explain.
The correlation between previous day closing price and current day closing price of 0.999 is a very strong, positive correlation. Therefore I expect the previous day closing price to be a good indicator of the current closing price. Put into other words, this strong, positive correlation means that 99.9% of the time, of the previous day’s price increases, the current day’s price will increase, and vice versa.
# the stocks chosen by the student will not necessarily match
# but their code should be identical
getSymbols("MSFT", src="yahoo")
## [1] "MSFT"
getSymbols("AMZN", src="yahoo")
## [1] "AMZN"
# convert to a dataframe
MSFT <- data.frame(MSFT)
AMZN <- data.frame(AMZN)
# add date
MSFT$date <- as.Date(row.names(MSFT))
AMZN$date <- as.Date(row.names(AMZN))
What companies did you choose?
Microsoft and Amazon.
What are their stock symbols?
Microsoft’s symbol is MSFT. Amazon’s symbol is AMZN
Why do you think these companies might be good predictors for the price of Facebook?
I believe these two companies are good predictors of the price of Facebook because they are both in the same industry (technology). Generally, there is a positive correlation between companies in the same indsutry; if the price of a company increases, the price of another comapny within the same industry will increase as well.
# make dplyr available
library(dplyr)
# create lags of closing price
MSFT$lMSFT.Close <- lag(MSFT$MSFT.Close, 1)
AMZN$lAMZN.Close <- lag(AMZN$AMZN.Close, 1)
# subset the data to keep only date and closing price
MSFT <- subset(MSFT, select=c(date, lMSFT.Close))
AMZN <- subset(AMZN, select=c(date, lAMZN.Close))
# merge both dataframe to your Facebook data
FB <- merge(FB, MSFT, by="date", all=F)
FB <- merge(FB, AMZN, by="date", all=F)
# show the first few rows of FB to make sure you have:
# FB.Close, lFB.Close, and the lag of two additional stocks
head(FB)
## date FB.Open FB.High FB.Low FB.Close FB.Volume FB.Adjusted
## 1 2012-05-18 42.05 45.00 38.00 38.23 573576400 38.23
## 2 2012-05-21 36.53 36.66 33.00 34.03 168192700 34.03
## 3 2012-05-22 32.61 33.59 30.94 31.00 101786600 31.00
## 4 2012-05-23 31.37 32.50 31.36 32.00 73600000 32.00
## 5 2012-05-24 32.95 33.21 31.77 33.03 50237200 33.03
## 6 2012-05-25 32.90 32.95 31.11 31.91 37149800 31.91
## lFB.Close lMSFT.Close lAMZN.Close
## 1 NA 29.72 218.36
## 2 38.23 29.27 213.85
## 3 34.03 29.75 218.11
## 4 31.00 29.76 215.33
## 5 32.00 29.11 217.28
## 6 33.03 29.07 215.24
# calculate the correlation coefficient
cor(FB$FB.Close, FB$lMSFT.Close, use="complete.obs")
## [1] 0.9413599
cor(FB$FB.Close, FB$lAMZN.Close, use="complete.obs")
## [1] 0.9029125
# plot first lagged stock price
ggplot(FB) + geom_point(aes(x=FB.Close, y=lMSFT.Close))
# plot second lagged stock price
ggplot(FB) + geom_point(aes(x=FB.Close, y=lAMZN.Close))
Based on the calculated correlation coefficients and plots, which stocks do you expect to be good predictors of Facebook stock price?
Both Microsoft (MSFT) and Amazon (AMZN) can be expected to be good predictors of Facebook’s (FB) stock price because of th strong positive correlation coefficients of 0.942 and 0.904, respectively. These strong positive correlations mean that most of the time when the respective company’s stock price increases, Facebook’s stock price will increase as well.
# run a linear model and summarize
mod <- lm(FB.Close ~ lFB.Close + lMSFT.Close + lAMZN.Close, data=FB)
summary(mod)
##
## Call:
## lm(formula = FB.Close ~ lFB.Close + lMSFT.Close + lAMZN.Close,
## data = FB)
##
## Residuals:
## Min 1Q Median 3Q Max
## -41.127 -0.752 -0.022 0.878 14.566
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1257231 0.3048903 0.412 0.680
## lFB.Close 1.0019796 0.0030310 330.582 <2e-16 ***
## lMSFT.Close -0.0005448 0.0151167 -0.036 0.971
## lAMZN.Close -0.0003265 0.0005797 -0.563 0.573
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.076 on 1601 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.9985, Adjusted R-squared: 0.9985
## F-statistic: 3.472e+05 on 3 and 1601 DF, p-value: < 2.2e-16
# make a model forecast
predicted <- predict(mod, newdata=FB)
# calculate correlation
cor(FB$FB.Close, predicted, use="complete.obs")
## [1] 0.9992324
# calculate MAE
mean(abs(FB$FB.Close - predicted), na.rm=T)
## [1] 1.22224
Which variables were significant in the model?
The only variable that was significant in this model was the lag of Facebook’s close price. This is because this variable has a p-value of p < 0.001.
Did the model results match your expectations from #8? Briefly discuss.
The model did not match my expectations from #8. I expected Microsoft and Amazon’s stick prices to be strong predictors of Facebook’s stock price. Even though there was a strong correlation between the two companies’ stock price and Facebook’s, the lack of significance for the variables lag of Microsoft stock price and lag of Amazon stock price indicate that these two companies are not good indicators of Facebook’s stock price in our model.
Make a forecast using the model.
From the output we can derive the model: Predicted Facebook closing price = 0.097 + 1.001(lFB.close) + 0.001(lMSFT.close) - 0.0003(lAMZN.close)
On October 1 Facebook’s stock price was $162.44, Microsoft’s was $115.61, and Amazon’s was $2,004.36. Using the model, this will yield a prediction of Facebook’s stock price on October 2 of $163.42. Facebook’s true stock price on October 2 was $159.33.
Evaluate the forecast accuracy using correlation and mean absolute error (MAE). Discuss your model’s performance in terms of each.
In terms of correlation, the forecast is accurate. There is a correlation coefficient of 0.999 indicating a very strong positive correlation.
The mean aboslute error shows that even though there is a strong correlation between the predcited stock price and the true stock price, there is still variability. The MAE of 1.222 shows that on average the predicted stock price will be off by $1.22.
# set the initial training cutoff date
initialTrainDate <- as.Date("2014-12-31")
# find the row numbers where date > initialTrainDate
forecastDates <- which(FB$date > initialTrainDate)
# create a series with missing values that will hold our model forecasts
predicted.series <- rep(NA, length(FB$date))
# create a bank account with $100,000
bank <- 100000
# set the number of stock we currently own to 0
holding <- 0
for(i in forecastDates){
# create training data. use seq() to generate a list of row numbers up to i-1.
# this is our training data
train.data <- FB[seq(1, i-1),]
# run our model using the train.data
mod <- lm(FB.Close ~ lFB.Close + lMSFT.Close + lAMZN.Close, data=train.data)
# create a test.data object with the next day's values
# since i is a list of dates > our training data cutoff,
# we can use i to retrieve the data needed to make a forecast
# remember the only variable we used in the model is lagged already
test.data <- FB[i,]
# forecast the next day's closing price using the testing data
priceTomorrow <- predict(mod, newdata=test.data)
# insert the forecast price into our predicted.series variable
predicted.series[i] <- priceTomorrow
# get the actual price today
# this is our buy price if we decide to buy
priceToday <- train.data$FB.Close[length(train.data$FB.Close)]
# use an if statement to see if the forecast closing price
# is higher than today's actual closing price
if(predicted.series[i] > priceToday){
# check our bank account to make sure we have enough money to buy at least 100 shares
if(bank >= priceToday * 100){
# buy 100 shares
holding <- holding + 100
# deduct money from the bank to pay for the shares
bank <- bank - (priceToday * 100)
}
}
# sell all shares if the price tomorrow is lower than today and we currently have stock to sell
if(priceTomorrow < priceToday){
if(holding > 0){
bank <- bank + (holding * priceToday)
holding <- 0
}
}
}
# sell any remaining shares and deposit the money in bank
(bank <- bank + (holding * priceToday))
## [1] 212206
holding <- 0
How much did you gain / lose using your model and trading strategy?
I gained $112,206 from this strategy.
# set the initial training cutoff date
initialTrainDate <- as.Date("2014-12-31")
# find the row numbers where date > initialTrainDate
forecastDates <- which(FB$date > initialTrainDate)
# create a series with missing values that will hold our model forecasts
predicted.series <- rep(NA, length(FB$date))
# create a bank account with $100,000
bank <- 100000
# set the number of stock we currently own to 0
holding <- 0
for(i in forecastDates){
# create training data. use seq() to generate a list of row numbers up to i-1.
# this is our training data
train.data <- FB[seq(1, i-1),]
# run our model using the train.data
mod <- lm(FB.Close ~ lFB.Close, data=train.data)
# create a test.data object with the next day's values
# since i is a list of dates > our training data cutoff,
# we can use i to retrieve the data needed to make a forecast
# remember the only variable we used in the model is lagged already
test.data <- FB[i,]
# forecast the next day's closing price using the testing data
priceTomorrow <- predict(mod, newdata=test.data)
# insert the forecast price into our predicted.series variable
predicted.series[i] <- priceTomorrow
# get the actual price today
# this is our buy price if we decide to buy
priceToday <- train.data$FB.Close[length(train.data$FB.Close)]
# use an if statement to see if the forecast closing price
# is higher than today's actual closing price
if(predicted.series[i] > priceToday){
# check our bank account to make sure we have enough money to buy at least 100 shares
if(bank >= priceToday * 100){
# buy 100 shares
holding <- holding + 100
# deduct money from the bank to pay for the shares
bank <- bank - (priceToday * 100)
}
}
# sell all shares if the price tomorrow is lower than today and we currently have stock to sell
if(priceTomorrow < priceToday){
if(holding > 0){
bank <- bank + (holding * priceToday)
holding <- 0
}
}
}
# sell any remaining shares and deposit the money in bank
(bank <- bank + (holding * priceToday))
## [1] 211479
holding <- 0
What happened to your gains/losses?
My gains decreased from $112,206 to $107,449.
Discuss how including / excluding the additional stock prices impacted your ability to make money in this scenario.
In this scenario, including additional stock prices decreased my ability to make money. This can be expected because of the expectation of everyday market forces. When the technology industry moves in a specific direction, Facebook will generally follow. Removing Microsoft and Amazon from the model removes this added data and decreases the confidence of the model.
Suggest another stock you think might be a better predictor of Facebook stock prices and give your reasoning.
Alphabet (GOOGL) might be a better predictor of Facebook stock prices. This is because Google provides more similar services to Facebook than Microsoft or Amazon do. Google is a web-based system that sells a service instead of a product, similar to Facebook. While Microsoft and Amazon are both technology companies, their main markets is the selling of products.
Do you think recent (only the last 90 trading days) or more (all price history) data will return the largest gains? Why?
I believe more data (all price history) will return larger gains than only recent data because with a larger data set, a more accurate prediction can be drawn. Only the last 90 days is a sample of the price change of Facebook’s stock price leading to more fluctuation in the data. All price history will allow for more accurate statistics that better model the change in Facebook’s stock price.
# 90 days
# set the initial training cutoff date
initialTrainDate <- as.Date("2014-12-31")
# find the row numbers where date > initialTrainDate
forecastDates <- which(FB$date > initialTrainDate)
# create a series with missing values that will hold our model forecasts
predicted.series <- rep(NA, length(FB$date))
# create a bank account with $100,000
bank <- 100000
# set the number of stock we currently own to 0
holding <- 0
for(i in forecastDates){
# create training data. use seq() to generate a list of row numbers up to i-1.
# this is our training data
train.data <- FB[seq(i-91, i-1),]
# run our model using the train.data
mod <- lm(FB.Close ~ lFB.Close + lMSFT.Close + lAMZN.Close, data=train.data)
# create a test.data object with the next day's values
# since i is a list of dates > our training data cutoff,
# we can use i to retrieve the data needed to make a forecast
# remember the only variable we used in the model is lagged already
test.data <- FB[i,]
# forecast the next day's closing price using the testing data
priceTomorrow <- predict(mod, newdata=test.data)
# insert the forecast price into our predicted.series variable
predicted.series[i] <- priceTomorrow
# get the actual price today
# this is our buy price if we decide to buy
priceToday <- train.data$FB.Close[length(train.data$FB.Close)]
# use an if statement to see if the forecast closing price
# is higher than today's actual closing price
if(predicted.series[i] > priceToday){
# check our bank account to make sure we have enough money to buy at least 100 shares
if(bank >= priceToday * 100){
# buy 100 shares
holding <- holding + 100
# deduct money from the bank to pay for the shares
bank <- bank - (priceToday * 100)
}
}
# sell all shares if the price tomorrow is lower than today and we currently have stock to sell
if(priceTomorrow < priceToday){
if(holding > 0){
bank <- bank + (holding * priceToday)
holding <- 0
}
}
}
# sell any remaining shares and deposit the money in bank
(bank <- bank + (holding * priceToday))
## [1] 92100.98
holding <- 0
# 60 days
# set the initial training cutoff date
initialTrainDate <- as.Date("2014-12-31")
# find the row numbers where date > initialTrainDate
forecastDates <- which(FB$date > initialTrainDate)
# create a series with missing values that will hold our model forecasts
predicted.series <- rep(NA, length(FB$date))
# create a bank account with $100,000
bank <- 100000
# set the number of stock we currently own to 0
holding <- 0
for(i in forecastDates){
# create training data. use seq() to generate a list of row numbers up to i-1.
# this is our training data
train.data <- FB[seq(i-61, i-1),]
# run our model using the train.data
mod <- lm(FB.Close ~ lFB.Close + lMSFT.Close + lAMZN.Close, data=train.data)
# create a test.data object with the next day's values
# since i is a list of dates > our training data cutoff,
# we can use i to retrieve the data needed to make a forecast
# remember the only variable we used in the model is lagged already
test.data <- FB[i,]
# forecast the next day's closing price using the testing data
priceTomorrow <- predict(mod, newdata=test.data)
# insert the forecast price into our predicted.series variable
predicted.series[i] <- priceTomorrow
# get the actual price today
# this is our buy price if we decide to buy
priceToday <- train.data$FB.Close[length(train.data$FB.Close)]
# use an if statement to see if the forecast closing price
# is higher than today's actual closing price
if(predicted.series[i] > priceToday){
# check our bank account to make sure we have enough money to buy at least 100 shares
if(bank >= priceToday * 100){
# buy 100 shares
holding <- holding + 100
# deduct money from the bank to pay for the shares
bank <- bank - (priceToday * 100)
}
}
# sell all shares if the price tomorrow is lower than today and we currently have stock to sell
if(priceTomorrow < priceToday){
if(holding > 0){
bank <- bank + (holding * priceToday)
holding <- 0
}
}
}
# sell any remaining shares and deposit the money in bank
(bank <- bank + (holding * priceToday))
## [1] 125341
holding <- 0
# 30 days
# set the initial training cutoff date
initialTrainDate <- as.Date("2014-12-31")
# find the row numbers where date > initialTrainDate
forecastDates <- which(FB$date > initialTrainDate)
# create a series with missing values that will hold our model forecasts
predicted.series <- rep(NA, length(FB$date))
# create a bank account with $100,000
bank <- 100000
# set the number of stock we currently own to 0
holding <- 0
for(i in forecastDates){
# create training data. use seq() to generate a list of row numbers up to i-1.
# this is our training data
train.data <- FB[seq(i-31, i-1),]
# run our model using the train.data
mod <- lm(FB.Close ~ lFB.Close + lMSFT.Close + lAMZN.Close, data=train.data)
# create a test.data object with the next day's values
# since i is a list of dates > our training data cutoff,
# we can use i to retrieve the data needed to make a forecast
# remember the only variable we used in the model is lagged already
test.data <- FB[i,]
# forecast the next day's closing price using the testing data
priceTomorrow <- predict(mod, newdata=test.data)
# insert the forecast price into our predicted.series variable
predicted.series[i] <- priceTomorrow
# get the actual price today
# this is our buy price if we decide to buy
priceToday <- train.data$FB.Close[length(train.data$FB.Close)]
# use an if statement to see if the forecast closing price
# is higher than today's actual closing price
if(predicted.series[i] > priceToday){
# check our bank account to make sure we have enough money to buy at least 100 shares
if(bank >= priceToday * 100){
# buy 100 shares
holding <- holding + 100
# deduct money from the bank to pay for the shares
bank <- bank - (priceToday * 100)
}
}
# sell all shares if the price tomorrow is lower than today and we currently have stock to sell
if(priceTomorrow < priceToday){
if(holding > 0){
bank <- bank + (holding * priceToday)
holding <- 0
}
}
}
# sell any remaining shares and deposit the money in bank
(bank <- bank + (holding * priceToday))
## [1] 125613
holding <- 0
Which test returned the largest profit (or smallest loss if you never returned a profit)?
Of the tests run using only recent data, the past 60 days test produced the largest profit of $25,341.
Of all the tests run, the largest profit ($112,206) was returned by using all data history and including Microsoft and Amazon in the model.
Would you trade stock using an approach like this? If so, what gives you confidence enough to invest real dollars? If not, what else would you like to see before you could be convinced to invest?
I would trade stocks using this approach. Using this algorithm for day trading can be effective as the backtesting proved. While we can not conclude with complete certainty that this approach will allways be correct, using this model in the long run can be effective.