Load Libraries
library(data.table)
library(Quandl)
library(quantmod)
library(dplyr)
library(tidyr)
library(GGally)
library(purrr)
library(modelr)
library(ggplot2)
Filter out junk tickers, duplicate tickers
qURL <- paste("https://s3.amazonaws.com/static.quandl.com/tickers/nasdaq100.csv", sep = "")
mydf <- read.csv(qURL, header = TRUE)
mydf[] <- lapply(mydf, as.character)
mystocktibble <- as_data_frame(mydf)
mystocktibble <- mystocktibble %>% filter(free_code != "")
mystocktibble <- mystocktibble %>% filter(free_code != "WIKI/LBTYK" & free_code != "GOOG/NASDAQ_LMCK" & free_code != "WIKI/NXPI")
mystocktibble <-mystocktibble %>% group_by(ticker) %>% top_n(n = 1 , wt = free_code) %>% group_by(ticker) %>% top_n(n = 1 , wt = name)
mystocktibble <- mystocktibble %>% filter(free_code %like% "A" & free_code %like% 'N')
mystocktibble <- mystocktibble %>% filter(free_code != "GOOG/NASDAQ_VIP")
Create function to access Quandl API
get_Quandl_Chart_Data <- function(string) {
print(string)
mydf <- Quandl(string, api_key="evdFxFw2Tf2BDSGtXUvg")
mydf$Date <- as.numeric(gsub("-", "", as.character(mydf$Date)))
return(mydf)
}
Apply Quandl Data Extraction function over ticker codes and adds chart as nested data in tibble
mystocktibble <- mystocktibble %>%
mutate(chart = map(free_code, get_Quandl_Chart_Data))
## [1] "WIKI/ALXN"
## [1] "WIKI/AMGN"
## [1] "WIKI/AMZN"
## [1] "WIKI/LVNTA"
## [1] "WIKI/NTAP"
## [1] "WIKI/NVDA"
mystocktibble
## # A tibble: 6 x 5
## # Groups: ticker [6]
## ticker name premium_code free_code chart
## <chr> <chr> <chr> <chr> <list>
## 1 ALXN ALEXION PHARM INC EOD/ALXN WIKI/ALXN <data.frame [5,557 x~
## 2 AMZN AMAZON.COM INC EOD/AMZN WIKI/AMZN <data.frame [5,248 x~
## 3 AMGN AMGEN EOD/AMGN WIKI/AMGN <data.frame [8,458 x~
## 4 LVNTA LIBERTY VNTRS SRS A EOD/LVNTA WIKI/LVNTA <data.frame [1,402 x~
## 5 NTAP NETAPP INC. EOD/NTAP WIKI/NTAP <data.frame [5,623 x~
## 6 NVDA NVIDIA CORPORATION EOD/NVDA WIKI/NVDA <data.frame [4,825 x~
Create Linear Modeling function and apply it over stock tickers, add residual values
stock_model <- function(df) {
lm(Close ~ Date, data = df)
}
mystocktibble <- mystocktibble %>%
mutate(model = map(chart, stock_model))
mystocktibble
## # A tibble: 6 x 6
## # Groups: ticker [6]
## ticker name premium_code free_code chart model
## <chr> <chr> <chr> <chr> <list> <list>
## 1 ALXN ALEXION PHARM INC EOD/ALXN WIKI/ALXN <data.frame [~ <S3: ~
## 2 AMZN AMAZON.COM INC EOD/AMZN WIKI/AMZN <data.frame [~ <S3: ~
## 3 AMGN AMGEN EOD/AMGN WIKI/AMGN <data.frame [~ <S3: ~
## 4 LVNTA LIBERTY VNTRS SRS A EOD/LVNTA WIKI/LVNTA <data.frame [~ <S3: ~
## 5 NTAP NETAPP INC. EOD/NTAP WIKI/NTAP <data.frame [~ <S3: ~
## 6 NVDA NVIDIA CORPORATION EOD/NVDA WIKI/NVDA <data.frame [~ <S3: ~
mystocktibble <- mystocktibble %>%
mutate(
resids = map2(chart, model, add_residuals)
)
Unnest exposes the nested data
resids <- unnest(mystocktibble, resids)
resids
## # A tibble: 31,113 x 18
## # Groups: ticker [6]
## ticker name premium_code free_code Date Open High Low Close
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALXN ALEXION P~ EOD/ALXN WIKI/ALXN 2.02e7 112. 113. 108. 108.
## 2 ALXN ALEXION P~ EOD/ALXN WIKI/ALXN 2.02e7 112. 112. 108. 112.
## 3 ALXN ALEXION P~ EOD/ALXN WIKI/ALXN 2.02e7 114. 115. 110. 111.
## 4 ALXN ALEXION P~ EOD/ALXN WIKI/ALXN 2.02e7 117. 118 114. 114.
## 5 ALXN ALEXION P~ EOD/ALXN WIKI/ALXN 2.02e7 120. 120. 118. 118.
## 6 ALXN ALEXION P~ EOD/ALXN WIKI/ALXN 2.02e7 123. 123. 119. 120.
## 7 ALXN ALEXION P~ EOD/ALXN WIKI/ALXN 2.02e7 125. 126. 121. 122.
## 8 ALXN ALEXION P~ EOD/ALXN WIKI/ALXN 2.02e7 128. 131. 126. 127.
## 9 ALXN ALEXION P~ EOD/ALXN WIKI/ALXN 2.02e7 137. 138. 124. 127.
## 10 ALXN ALEXION P~ EOD/ALXN WIKI/ALXN 2.02e7 122. 123. 120. 123.
## # ... with 31,103 more rows, and 9 more variables: Volume <dbl>,
## # `Ex-Dividend` <dbl>, `Split Ratio` <dbl>, `Adj. Open` <dbl>, `Adj.
## # High` <dbl>, `Adj. Low` <dbl>, `Adj. Close` <dbl>, `Adj.
## # Volume` <dbl>, resid <dbl>
Plot of residuals by stock with regression line
## `geom_smooth()` using method = 'gam'
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.