This is just a quick look at some stock data. Marky Marc and I are exploring some AMZN returns data at the handlebar. He is learning some R. I am learning about finance data.
This document is published here:
http://rpubs.com/williamsurles/283417
library(readr)
library(dplyr)
library(tidyr)
library(rCharts)
library(ggvis)
df_amzn <- read_csv("AMZN.csv") %>% data.frame()
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_integer()
## )
head(df_amzn)
## Date Open High Low Close Adj.Close Volume
## 1 1997-05-15 2.437500 2.500000 1.927083 23.500 1.958333 72156000
## 2 1997-05-16 1.968750 1.979167 1.708333 20.750 1.729167 14700000
## 3 1997-05-19 1.760417 1.770833 1.625000 20.500 1.708333 6106800
## 4 1997-05-20 1.729167 1.750000 1.635417 19.625 1.635417 5467200
## 5 1997-05-21 1.635417 1.645833 1.375000 17.125 1.427083 18853200
## 6 1997-05-22 1.437500 1.447917 1.312500 16.750 1.395833 11776800
str(df_amzn)
## 'data.frame': 5049 obs. of 7 variables:
## $ Date : Date, format: "1997-05-15" "1997-05-16" ...
## $ Open : num 2.44 1.97 1.76 1.73 1.64 ...
## $ High : num 2.5 1.98 1.77 1.75 1.65 ...
## $ Low : num 1.93 1.71 1.62 1.64 1.38 ...
## $ Close : num 23.5 20.8 20.5 19.6 17.1 ...
## $ Adj.Close: num 1.96 1.73 1.71 1.64 1.43 ...
## $ Volume : int 72156000 14700000 6106800 5467200 18853200 11776800 15937200 8697600 4574400 3472800 ...
df_sp <- read_csv("^SP500TR.csv") %>% data.frame()
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_integer()
## )
head(df_sp)
## Date Open High Low Close Adj.Close Volume
## 1 1997-05-15 1114.86 1114.86 1114.86 1114.86 1114.86 0
## 2 1997-05-16 1099.00 1099.00 1099.00 1099.00 1099.00 0
## 3 1997-05-19 1103.68 1103.68 1103.68 1103.68 1103.68 0
## 4 1997-05-20 1114.88 1114.88 1114.88 1114.88 1114.88 0
## 5 1997-05-21 1111.94 1111.94 1111.94 1111.94 1111.94 0
## 6 1997-05-22 1107.08 1107.08 1107.08 1107.08 1107.08 0
str(df_sp)
## 'data.frame': 5049 obs. of 7 variables:
## $ Date : Date, format: "1997-05-15" "1997-05-16" ...
## $ Open : num 1115 1099 1104 1115 1112 ...
## $ High : num 1115 1099 1104 1115 1112 ...
## $ Low : num 1115 1099 1104 1115 1112 ...
## $ Close : num 1115 1099 1104 1115 1112 ...
## $ Adj.Close: num 1115 1099 1104 1115 1112 ...
## $ Volume : int 0 0 0 0 0 0 0 0 0 0 ...
Lets look at how Amazon has changed over the years
df_amzn2 <- df_amzn %>%
mutate(
adj_close_amzn = Adj.Close,
adj_close_prev_amzn = lag(adj_close_amzn, default = df_amzn$Adj.Close[1]),
diff_day_amzn = adj_close_amzn - adj_close_prev_amzn,
diff_cum_amzn = cumsum(diff_day_amzn),
perc_diff_day_amzn = diff_day_amzn/adj_close_prev_amzn,
perc_diff_cum_amzn = (diff_cum_amzn + adj_close_amzn[1])/adj_close_amzn[1]
) %>%
select(Date, adj_close_amzn, diff_day_amzn, perc_diff_day_amzn, diff_cum_amzn, perc_diff_cum_amzn)
df_amzn2 %>%
ggvis(~Date, ~perc_diff_cum_amzn) %>%
layer_lines(strokeWidth := 1)
Lets look at how the S&P500 has changed over the years
df_sp2 <- df_sp %>%
mutate(
adj_close_sp = Adj.Close,
adj_close_prev_sp = lag(adj_close_sp, default = df_sp$Adj.Close[1]),
diff_day_sp = adj_close_sp - adj_close_prev_sp,
diff_cum_sp = cumsum(diff_day_sp),
perc_diff_day_sp = diff_day_sp/adj_close_prev_sp,
perc_diff_cum_sp = (diff_cum_sp + adj_close_sp[1])/adj_close_sp[1]
) %>%
select(Date, adj_close_sp, diff_day_sp, perc_diff_day_sp, diff_cum_sp, perc_diff_cum_sp)
df_sp2 %>%
ggvis(~Date, ~perc_diff_cum_sp) %>%
layer_lines(strokeWidth := 1)
Lets combine them and compare the percent change
df2 <- df_amzn2 %>%
left_join(df_sp2, by = 'Date') %>%
select(
Date,
amzn = perc_diff_cum_amzn,
sp = perc_diff_cum_sp
) %>%
gather(stock, percent_change, -Date)
df2 %>%
ggvis(~Date, ~percent_change, stroke = ~stock) %>%
layer_lines(strokeWidth := 1)
Shoulda put all my money into Amazon
df2 <- df_amzn2 %>%
left_join(df_sp2, by = 'Date') %>%
select(
Date,
amzn = perc_diff_day_amzn,
sp = perc_diff_day_sp
) %>%
gather(stock, percent_change_daily, -Date)
df2 %>%
group_by(stock) %>%
ggvis(~percent_change_daily, fill = ~stock) %>%
layer_histograms(
width = .01,
boundary = 0,
stack = F,
opacity := .5)
df2 %>%
group_by(stock) %>%
ggvis(~percent_change_daily, fill = ~stock) %>%
layer_densities()
You could gain or lose more money in amazon on any given day.