Retrieving Data


> import numpy as np
+ import pandas as pd
+ import matplotlib.pyplot as plt
+ import pandas_datareader
+ import datetime
+ import pandas_datareader.data as web

IMPORT HISTORICAL DATA - CAR COMPANIES

> start = datetime.datetime(2012, 1, 1)
+ end = datetime.datetime(2017, 1, 1)
+ tesla = web.DataReader("TSLA", 'yahoo', start, end)
+ ford = web.DataReader("F", 'yahoo', start, end)
+ gm = web.DataReader("GM",'yahoo',start,end)
> tesla.head()
             High    Low   Open  Close   Volume  Adj Close
Date                                                      
2012-01-03  5.900  5.530  5.788  5.616  4640500      5.616
2012-01-04  5.734  5.500  5.642  5.542  3150500      5.542
2012-01-05  5.586  5.370  5.552  5.424  5027500      5.424
2012-01-06  5.558  5.282  5.440  5.382  4931500      5.382
2012-01-09  5.498  5.224  5.400  5.450  4485000      5.450
> ford.head()
             High    Low   Open  Close      Volume  Adj Close
Date                                                         
2012-01-03  11.25  10.99  11.00  11.13  45709900.0   7.511252
2012-01-04  11.53  11.07  11.15  11.30  79725200.0   7.625980
2012-01-05  11.63  11.24  11.33  11.59  67877500.0   7.821692
2012-01-06  11.80  11.52  11.74  11.71  59840700.0   7.902675
2012-01-09  11.95  11.70  11.83  11.80  53981500.0   7.963414
> gm.head()
                 High        Low       Open      Close      Volume  Adj Close
Date                                                                         
2012-01-03  21.180000  20.750000  20.830000  21.049999   9321300.0  16.103352
2012-01-04  21.370001  20.750000  21.049999  21.150000   7856700.0  16.179853
2012-01-05  22.290001  20.959999  21.100000  22.170000  17880600.0  16.960161
2012-01-06  23.030001  22.240000  22.260000  22.920000  18234500.0  17.533915
2012-01-09  23.430000  22.700001  23.200001  22.840000  12084500.0  17.472712

Data Visualization


Price, Volume, Value

ADJUSTED CLOSE

> tesla['Adj Close'].plot(label='Tesla',
+       figsize=(8,4),title='Adj Close');
+ gm['Adj Close'].plot(label='GM');
+ ford['Adj Close'].plot(label='Ford');
+ plt.legend();
+ plt.show()

VOLUME

> tesla['Volume'].plot(label='Tesla',
+     figsize=(8,4),title='Volume Traded');
+ gm['Volume'].plot(label='gm');
+ ford['Volume'].plot(label='ford');
+ plt.legend();
+ plt.show()

Ford had a big spike in 2013

> ford['Volume'].max() 
220363000.0
> ford['Volume'].idxmax() 
Timestamp('2013-12-18 00:00:00')
> # What happened:
+ # http://money.cnn.com/2013/12/18/news/companies/ford-profit/
+ # https://www.usatoday.com/story/money/cars/2013/12/18/ford-2014-profit-warning/4110015/
+ # https://media.ford.com/content/dam/fordmedia/North%20America/US/2014/01/28/4QFinancials.pdf

VALUE TRADED

The product of the average price and total volume would yield the value that was traded. For simplicity I have used the adjusted closing price.

> tesla['Total Traded'] = tesla['Adj Close']*tesla['Volume']
+ ford['Total Traded'] = ford['Adj Close']*ford['Volume']
+ gm['Total Traded'] = gm['Adj Close']*gm['Volume']
> tesla['Total Traded'].plot(label='Tesla',figsize=(8,4));
+ gm['Total Traded'].plot(label='GM');
+ ford['Total Traded'].plot(label='Ford');
+ plt.legend();
+ plt.ylabel('Total Traded');
+ plt.show()

Tesla had a big spike in 2014.

> tesla['Total Traded'].idxmax()
Timestamp('2014-02-25 00:00:00')
> # What happened?
+ # http://money.cnn.com/2014/02/25/investing/tesla-record-high/
+ # https://blogs.wsj.com/moneybeat/2014/02/25/tesla-shares-surge-on-morgan-stanley-report/
+ # https://www.washingtonpost.com/news/wonk/wp/2014/02/25/teslas-stock-is-up-644-why-it-may-not-last/
+ # http://www.cnbc.com/2014/02/25/tesla-soars-ford-falls-in-consumer-reports-study.html

PLOT WITH MOVING AVERAGE - GM

> gm['MA50'] = gm['Adj Close'].rolling(50).mean()
+ gm['MA200'] = gm['Adj Close'].rolling(200).mean()
+ gm[['Adj Close','MA50','MA200']].plot(
+                 label='gm',figsize=(8,4));
+ plt.show()

Scatter Matrix

> from pandas.plotting import scatter_matrix
> car_comp = pd.concat([tesla['Adj Close'],
+                       gm['Adj Close'],
+                       ford['Adj Close']],axis=1)
+ car_comp.columns = ['Tesla Adj Close',
+                     'GM Adj Close',
+                     'Ford Adj Close']
+ car_comp
            Tesla Adj Close  GM Adj Close  Ford Adj Close
Date                                                     
2012-01-03         5.616000     16.103352        7.511252
2012-01-04         5.542000     16.179853        7.625980
2012-01-05         5.424000     16.960161        7.821692
2012-01-06         5.382000     17.533915        7.902675
2012-01-09         5.450000     17.472712        7.963414
...                     ...           ...             ...
2016-12-23        42.667999     30.899082       10.105044
2016-12-27        43.905998     30.769215       10.048276
2016-12-28        43.948002     30.431576        9.934735
2016-12-29        42.936001     30.422913        9.918513
2016-12-30        42.737999     30.163183        9.837415

[1258 rows x 3 columns]
> scatter_matrix(car_comp,figsize=(8,8),
+                alpha=0.2,hist_kwds={'bins':50});
+ plt.show()

Candlestick Chart

FORD JAN 2012

> from mplfinance.original_flavor import candlestick_ohlc
+ from matplotlib.dates import DateFormatter, date2num, WeekdayLocator, DayLocator, MONDAY
+ 
+ # Rest the index to get a column of January Dates
+ ford_reset = ford.loc['2012-01':'2012-01'].reset_index()
+ 
+ # Create a new column of numerical "date" values for matplotlib to use
+ ford_reset['date_ax'] = ford_reset['Date']. \
+ apply(lambda date: date2num(date))
+ ford_values = [tuple(vals) for vals in ford_reset \
+ [['date_ax', 'Open', 'High', 'Low', 'Close']].values]
+ 
+ mondays = WeekdayLocator(MONDAY)        # major ticks on the mondays
+ alldays = DayLocator()              # minor ticks on the days
+ weekFormatter = DateFormatter('%b %d')  # e.g., Jan 12
+ dayFormatter = DateFormatter('%d')      # e.g., 12
+ 
+ #Plot it
+ fig, ax = plt.subplots()
+ fig.subplots_adjust(bottom=0.2)
+ ax.xaxis.set_major_locator(mondays)
+ ax.xaxis.set_minor_locator(alldays)
+ ax.xaxis.set_major_formatter(weekFormatter)
+ 
+ candlestick_ohlc(ax, ford_values, width=0.6,
+ colorup='g',colordown='r');
+ plt.show()

Basic Financial Analysis


Daily Percentage Change

\[ r_t = \frac{p_t}{p_{t-1}} -1\]

> # Method 1: Using shift
+ tesla['returns'] = (tesla['Adj Close'] / 
+                     tesla['Adj Close'].shift(1) ) - 1
> tesla.head()
             High    Low   Open  ...  Adj Close  Total Traded   returns
Date                             ...                                   
2012-01-03  5.900  5.530  5.788  ...      5.616  2.606105e+07       NaN
2012-01-04  5.734  5.500  5.642  ...      5.542  1.746007e+07 -0.013177
2012-01-05  5.586  5.370  5.552  ...      5.424  2.726916e+07 -0.021292
2012-01-06  5.558  5.282  5.440  ...      5.382  2.654133e+07 -0.007743
2012-01-09  5.498  5.224  5.400  ...      5.450  2.444325e+07  0.012635

[5 rows x 8 columns]
> # Method 2: Using pct_change()
+ tesla['returns'] = tesla['Adj Close'].pct_change(1)
+ ford['returns'] = ford['Adj Close'].pct_change(1)
+ gm['returns'] = gm['Adj Close'].pct_change(1)
> tesla.head()
             High    Low   Open  ...  Adj Close  Total Traded   returns
Date                             ...                                   
2012-01-03  5.900  5.530  5.788  ...      5.616  2.606105e+07       NaN
2012-01-04  5.734  5.500  5.642  ...      5.542  1.746007e+07 -0.013177
2012-01-05  5.586  5.370  5.552  ...      5.424  2.726916e+07 -0.021292
2012-01-06  5.558  5.282  5.440  ...      5.382  2.654133e+07 -0.007743
2012-01-09  5.498  5.224  5.400  ...      5.450  2.444325e+07  0.012635

[5 rows x 8 columns]
> ford.head()
             High    Low   Open  ...  Adj Close  Total Traded   returns
Date                             ...                                   
2012-01-03  11.25  10.99  11.00  ...   7.511252  3.433386e+08       NaN
2012-01-04  11.53  11.07  11.15  ...   7.625980  6.079828e+08  0.015274
2012-01-05  11.63  11.24  11.33  ...   7.821692  5.309169e+08  0.025664
2012-01-06  11.80  11.52  11.74  ...   7.902675  4.729016e+08  0.010354
2012-01-09  11.95  11.70  11.83  ...   7.963414  4.298770e+08  0.007686

[5 rows x 8 columns]
> gm.head()
                 High        Low       Open  ...  MA50  MA200   returns
Date                                         ...                       
2012-01-03  21.180000  20.750000  20.830000  ...   NaN    NaN       NaN
2012-01-04  21.370001  20.750000  21.049999  ...   NaN    NaN  0.004751
2012-01-05  22.290001  20.959999  21.100000  ...   NaN    NaN  0.048227
2012-01-06  23.030001  22.240000  22.260000  ...   NaN    NaN  0.033829
2012-01-09  23.430000  22.700001  23.200001  ...   NaN    NaN -0.003491

[5 rows x 10 columns]

HISTOGRAMS

> ford['returns'].hist(bins=50);
+ plt.show()

> gm['returns'].hist(bins=50);
+ plt.show()

> tesla['returns'].hist(bins=50);
+ plt.show()

> tesla['returns'].hist(bins=100,label='Tesla',
+           figsize=(10,8),alpha=0.5);
+ gm['returns'].hist(bins=100,label='GM',alpha=0.5);
+ ford['returns'].hist(bins=100,label='Ford',alpha=0.5);
+ plt.legend();
+ plt.show()

KDE PLOT

> tesla['returns'].plot(kind='kde',label='Tesla',
+                             figsize=(12,6));
+ gm['returns'].plot(kind='kde',label='GM');
+ ford['returns'].plot(kind='kde',label='Ford');
+ plt.legend();
+ plt.show()

BOX PLOT

> box_df = pd.concat([tesla['returns'],
+   gm['returns'],ford['returns']],axis=1)
+ box_df.columns = ['Tesla Returns',' GM Returns','Ford Returns']
+ box_df.plot(kind='box',figsize=(8,11),colormap='jet');
+ plt.show()

Comparing Daily Returns between Stocks

SCATTER MATRIX - RETURNS

> scatter_matrix(box_df,figsize=(8,8),
+                alpha=0.2,hist_kwds={'bins':50});
+ plt.show()

SCATTER - GM & FORD RETURNS

> box_df.plot(kind='scatter',x=' GM Returns',
+             y='Ford Returns',alpha=0.4,figsize=(10,8));
+ plt.show()

Cumulative Daily Returns

\[\text{Cumulative value at time } t = \prod_{i=1}^t(1+r_i)\]

df[daily_cumulative_return] = ( 1 + df[pct_daily_return] ).cumprod()

This formula will yield the ending value for each dollar invested.

> tesla['Cumulative Return'] = (1 + tesla['returns']).cumprod()
> tesla.tail()
                 High        Low  ...   returns  Cumulative Return
Date                              ...                             
2016-12-23  42.689999  41.542000  ...  0.023459           7.597578
2016-12-27  44.450001  42.883999  ...  0.029015           7.818019
2016-12-28  44.759998  43.439999  ...  0.000957           7.825499
2016-12-29  43.840000  42.824001  ... -0.023027           7.645299
2016-12-30  43.500000  42.335999  ... -0.004612           7.610042

[5 rows x 9 columns]

Over this time frame, $1 invested in Tesla would have resulted in an ending value of $7.61

> ford['Cumulative Return'] = (1 + ford['returns']).cumprod()
+ gm['Cumulative Return'] = (1 + gm['returns']).cumprod()

CUMULATIVE RETURN PLOT

> tesla['Cumulative Return'].plot(label='Tesla',
+         figsize=(8,4),title='Cumulative Return');
+ ford['Cumulative Return'].plot(label='Ford');
+ gm['Cumulative Return'].plot(label='GM');
+ plt.legend();
+ plt.show()

CUMULATIVE RETURN PLOT - GM & FORD

> ford['Cumulative Return'].plot(label='Ford',
+         figsize=(8,4),title='Cumulative Return');
+ gm['Cumulative Return'].plot(label='GM');
+ plt.legend();
+ plt.show()