Introduction

This is just a quick look at some stock data. Marky Marc and I are exploring some AMZN returns data at the handlebar. He is learning some R. I am learning about finance data.

This document is published here:
http://rpubs.com/williamsurles/283417

library(readr)
library(dplyr)
library(tidyr)
library(rCharts)
library(ggvis)

Load Amazon Data

df_amzn <- read_csv("AMZN.csv") %>% data.frame()
## Parsed with column specification:
## cols(
##   Date = col_date(format = ""),
##   Open = col_double(),
##   High = col_double(),
##   Low = col_double(),
##   Close = col_double(),
##   `Adj Close` = col_double(),
##   Volume = col_integer()
## )
head(df_amzn)
##         Date     Open     High      Low  Close Adj.Close   Volume
## 1 1997-05-15 2.437500 2.500000 1.927083 23.500  1.958333 72156000
## 2 1997-05-16 1.968750 1.979167 1.708333 20.750  1.729167 14700000
## 3 1997-05-19 1.760417 1.770833 1.625000 20.500  1.708333  6106800
## 4 1997-05-20 1.729167 1.750000 1.635417 19.625  1.635417  5467200
## 5 1997-05-21 1.635417 1.645833 1.375000 17.125  1.427083 18853200
## 6 1997-05-22 1.437500 1.447917 1.312500 16.750  1.395833 11776800
str(df_amzn)
## 'data.frame':    5049 obs. of  7 variables:
##  $ Date     : Date, format: "1997-05-15" "1997-05-16" ...
##  $ Open     : num  2.44 1.97 1.76 1.73 1.64 ...
##  $ High     : num  2.5 1.98 1.77 1.75 1.65 ...
##  $ Low      : num  1.93 1.71 1.62 1.64 1.38 ...
##  $ Close    : num  23.5 20.8 20.5 19.6 17.1 ...
##  $ Adj.Close: num  1.96 1.73 1.71 1.64 1.43 ...
##  $ Volume   : int  72156000 14700000 6106800 5467200 18853200 11776800 15937200 8697600 4574400 3472800 ...

Load S&P 500 Data

df_sp <- read_csv("^SP500TR.csv")  %>% data.frame()
## Parsed with column specification:
## cols(
##   Date = col_date(format = ""),
##   Open = col_double(),
##   High = col_double(),
##   Low = col_double(),
##   Close = col_double(),
##   `Adj Close` = col_double(),
##   Volume = col_integer()
## )
head(df_sp)
##         Date    Open    High     Low   Close Adj.Close Volume
## 1 1997-05-15 1114.86 1114.86 1114.86 1114.86   1114.86      0
## 2 1997-05-16 1099.00 1099.00 1099.00 1099.00   1099.00      0
## 3 1997-05-19 1103.68 1103.68 1103.68 1103.68   1103.68      0
## 4 1997-05-20 1114.88 1114.88 1114.88 1114.88   1114.88      0
## 5 1997-05-21 1111.94 1111.94 1111.94 1111.94   1111.94      0
## 6 1997-05-22 1107.08 1107.08 1107.08 1107.08   1107.08      0
str(df_sp)
## 'data.frame':    5049 obs. of  7 variables:
##  $ Date     : Date, format: "1997-05-15" "1997-05-16" ...
##  $ Open     : num  1115 1099 1104 1115 1112 ...
##  $ High     : num  1115 1099 1104 1115 1112 ...
##  $ Low      : num  1115 1099 1104 1115 1112 ...
##  $ Close    : num  1115 1099 1104 1115 1112 ...
##  $ Adj.Close: num  1115 1099 1104 1115 1112 ...
##  $ Volume   : int  0 0 0 0 0 0 0 0 0 0 ...

Percent Change Over Time

Lets look at how Amazon has changed over the years

df_amzn2 <- df_amzn %>%
  mutate(
    adj_close_amzn = Adj.Close,
    adj_close_prev_amzn = lag(adj_close_amzn, default = df_amzn$Adj.Close[1]),
    diff_day_amzn = adj_close_amzn - adj_close_prev_amzn,
    diff_cum_amzn = cumsum(diff_day_amzn),
    perc_diff_day_amzn = diff_day_amzn/adj_close_prev_amzn,
    perc_diff_cum_amzn = (diff_cum_amzn + adj_close_amzn[1])/adj_close_amzn[1]
    ) %>%
  select(Date, adj_close_amzn, diff_day_amzn, perc_diff_day_amzn, diff_cum_amzn, perc_diff_cum_amzn)

df_amzn2 %>%
  ggvis(~Date, ~perc_diff_cum_amzn) %>%
  layer_lines(strokeWidth := 1)

Lets look at how the S&P500 has changed over the years

df_sp2 <- df_sp %>%
  mutate(
    adj_close_sp = Adj.Close,
    adj_close_prev_sp = lag(adj_close_sp, default = df_sp$Adj.Close[1]),
    diff_day_sp = adj_close_sp - adj_close_prev_sp,
    diff_cum_sp = cumsum(diff_day_sp),
    perc_diff_day_sp = diff_day_sp/adj_close_prev_sp,
    perc_diff_cum_sp = (diff_cum_sp + adj_close_sp[1])/adj_close_sp[1]
    ) %>%
  select(Date, adj_close_sp, diff_day_sp, perc_diff_day_sp, diff_cum_sp, perc_diff_cum_sp)

df_sp2 %>%
  ggvis(~Date, ~perc_diff_cum_sp) %>%
  layer_lines(strokeWidth := 1)

Lets combine them and compare the percent change

df2 <- df_amzn2 %>%
  left_join(df_sp2, by = 'Date') %>%
  select(
      Date, 
      amzn = perc_diff_cum_amzn, 
      sp = perc_diff_cum_sp
      ) %>%
  gather(stock, percent_change, -Date)

df2 %>%
  ggvis(~Date, ~percent_change, stroke = ~stock) %>%
  layer_lines(strokeWidth := 1)

Shoulda put all my money into Amazon

Daily Percent Change Variation

df2 <- df_amzn2 %>%
  left_join(df_sp2, by = 'Date') %>%
  select(
      Date, 
      amzn = perc_diff_day_amzn, 
      sp = perc_diff_day_sp
      ) %>%
  gather(stock, percent_change_daily, -Date)

df2 %>%
  group_by(stock) %>%
  ggvis(~percent_change_daily, fill = ~stock) %>%
  layer_histograms(
    width = .01, 
    boundary = 0, 
    stack = F,
    opacity := .5)
df2 %>%
  group_by(stock) %>%
  ggvis(~percent_change_daily, fill = ~stock) %>%
  layer_densities()

You could gain or lose more money in amazon on any given day.