Introduction

Finally, I figure out how to implment python code in Rstuido. I would like to say that Rstudio is the most elegant IDE I have ever seen. I really love it! Okay, now I gonna show you why Rstudio is much better to learn and show our programming process for any languages.

Simple vs. Logarithmic Returns

  1. Explain different methods to compute retruns from financial data. Discuss the advantages and disadvantages of the different concepts.
import numpy as np 
import matplotlib.pyplot as plt 
from scipy import stats 
import seaborn as sns 
import pandas as pd 
from pandas import read_csv 
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
# End Imports 
sns.set(color_codes = True)
np.set_printoptions(precision = 4, suppress = True)
################ Functions we wrote last time #####################
def lr(x):
    return np.log(x[1:]) - np.log(x[:-1])
def dstats(x):
    T = np.size(x,0)
    mu = np.sum(x,0)/T
    sigma2 = np.sum((x-mu)**2,axis=0)/T
    sigma = np.sqrt(sigma2)
    skew = np.sum((x-mu)**3/(sigma**3),axis=0)/T
    kurt = np.sum((x-mu)**4/(sigma**4),axis=0)/T
    d = np.array([mu, sigma, skew, kurt])
    return d
csv_data = read_csv("ibm_2001_2013.csv", header = None)
print(csv_data.head()) # we can use the head() function to see the dataset
##         0
## 0  176.85
## 1  177.80
## 2  175.77
## 3  174.97
## 4  172.86

From the above dataset, we can see that there is only one column. To make the calculation easier, we can transform the dataframe into the array.

IBM = np.array(csv_data)
# Generate simple return 
def sr(x):
    return x[1:]/x[:-1] - 1
ibm_sim = sr(IBM)
# Generate log return 
ibm_log = lr(IBM)
# Descriptive Statistics
print('Descriptive Statistics for simple and log returns')
## Descriptive Statistics for simple and log returns
print(dstats(np.column_stack((ibm_sim,ibm_log))))
## [[-0.0002 -0.0003]
##  [ 0.016   0.0161]
##  [-0.0049 -0.2073]
##  [ 9.2478  9.5724]]
def JBtest(x):
    T = np.size(x,0)
    moments = dstats(x)
    '''Jarque-Bera test for normality'''
    t_JB = (T/6)*(moments[2]**2 + ((moments[3]-3)**2)/4)
    pv_JB =(1- stats.chi2.cdf(t_JB,2))
    return np.column_stack((t_JB,pv_JB))
JBtest(np.column_stack((ibm_sim,ibm_log)))
# We don't see much of a difference, just logarithmic returns are more negatively skewed
print(JBtest(np.column_stack((ibm_sim,ibm_log))))
## [[5242.0661    0.    ]
##  [5823.9296    0.    ]]

As the tests shows that there is no difference between simple and log returns, we need check the datasets again. This also means that the normal distribtuion is not that good for being a benchmark.

Now we will standardize the simple and and log returns by their empirical standard deviation and compare them with a standard normal and a standardized \(t_r\) distribtuion. We will see which type of distribtuion seem to be more approriate to fid the data.

ibm_s = ibm_sim/ibm_sim.std() # standarlized the distribtuion 
ibm_l = ibm_log/ibm_log.std() # standarlized the log return 
fig = plt.figure()
fig.add_subplot(1,2,1)
plt.hist(ibm_l, bins = 50, color = 'silver', density = True)
xt = plt.xticks()[0]
xmin, xmax = min(xt), max(xt)
lnspc = np.linspace(xmin, xmax, len(ibm_l))
plt.plot(lnspc, stats.norm.pdf(lnspc, 0,1), lw=3, color = 'red')
plt.plot(lnspc, stats.t.pdf(lnspc, 1), lw = 3, color = 'green')
plt.plot(lnspc, stats.t.pdf(lnspc, 5), lw = 3, color = 'darkblue')
plt.legend(['N(0,1)', 't(1)', 't(5)', 'return'],fontsize = 12)
plt.title('PDF Comparison for Log Returns')
fig.add_subplot(1,2,2)
plt.hist(ibm_s, bins = 50, color='silver',density = True)
xt = plt.xticks()[0]  
xmin, xmax = min(xt), max(xt)  
lnspc = np.linspace(xmin, xmax, len(ibm_s))
plt.plot(lnspc, stats.norm.pdf(lnspc, 0, 1),lw = 3,color = 'red')
plt.plot(lnspc, stats.t.pdf(lnspc,1),lw = 3,color = 'green')
plt.plot(lnspc, stats.t.pdf(lnspc,5),lw = 3,color = 'darkblue')
plt.legend(['N(0,1)','t(1)','t(5)','return'],fontsize=12)
plt.title('PDF Comparison for Simple Returns')
plt.show()

From the above the figure, we still cannot tell which distribtuion fits the data better. It seems both normal and t-student distribution don’t fit the data that well, expecailly in terms of kurtosis. Now we will write a program which generates aggregate log returns. Then we will analyze the distributional properties of weekly and monthly and (maybe yearly) returns and compare them with daily returns.

def aggret(x,freq):
    if freq==1:
        total = 5 #weekly aggregation
    elif freq==2:
        total =20 #monthly aggregation
    elif freq==3:
        total = 250 #yearly aggregation 
    else:
        print('Error: Wrong Aggregation frequency specified')
    T = np.size(x,0)
    T_freq = int(np.floor(T/total))
    res = np.zeros((T_freq,1))
    for j in range(T_freq):
        agg = np.cumsum(x[j*total:total + j*total,])
        res[j] = agg[-1]
    return res
ibm_w = aggret(ibm_log,1)
ibm_m = aggret(ibm_log,2)
ibm_y = aggret(ibm_log,3)
#Histogram
fig = plt.figure()
fig.add_subplot(2,2,1)
x = ibm_log
(mu, sigma) = stats.norm.fit(x) # fit normal
# the histogram of the data
n, bins, patches = plt.hist(x, bins = 60, density=1)
y = stats.norm.pdf( bins, mu, sigma) # add the fitted normal
l = plt.plot(bins, y, color = 'red', linewidth=4)
plt.title(r'$\mathrm{Histogram\ of\ daily\ log\ returns :}\ \mu=%.3f,\ \sigma=%.3f$' %(mu, sigma))
fig.add_subplot(2,2,2)
x = ibm_w
(mu, sigma) = stats.norm.fit(x) # fit normal
# the histogram of the data
n, bins, patches = plt.hist(x, bins = 40, density=1)
y = stats.norm.pdf( bins, mu, sigma) # add the fitted normal
l = plt.plot(bins, y, color = 'red', linewidth=4)
plt.title(r'$\mathrm{Histogram\ of\ weekly\ log\ returns :}\ \mu=%.3f,\ \sigma=%.3f$' %(mu, sigma))
fig.add_subplot(2,2,3)
x = ibm_m
(mu, sigma) = stats.norm.fit(x) # fit normal
# the histogram of the data
n, bins, patches = plt.hist(x, bins = 20, density=1)
y = stats.norm.pdf( bins, mu, sigma) # add the fitted normal
l = plt.plot(bins, y, color = 'red', linewidth=4)
plt.title(r'$\mathrm{Histogram\ of\ monthly\ log\ returns :}\ \mu=%.3f,\ \sigma=%.3f$' %(mu, sigma))
fig.add_subplot(2,2,4)
x = ibm_y
(mu, sigma) = stats.norm.fit(x) # fit normal
# the histogram of the data
n, bins, patches = plt.hist(x,bins =5, density=1)
y = stats.norm.pdf( bins, mu, sigma) # add the fitted normal
l = plt.plot(bins, y, color = 'red', linewidth=4)
plt.title(r'$\mathrm{Histogram\ of\ yearly\ log\ returns :}\ \mu=%.3f,\ \sigma=%.3f$' %(mu, sigma))
# Descriptive statistic
print(np.column_stack((dstats(ibm_log),dstats(ibm_w),dstats(ibm_m),dstats(ibm_y))))
## [[-0.0003 -0.0012 -0.005  -0.0485]
##  [ 0.0161  0.0371  0.0764  0.1795]
##  [-0.2073  0.222   0.3671  0.1355]
##  [ 9.5724  8.0651  7.6142  2.5941]]
print(np.row_stack((JBtest(ibm_log),JBtest(ibm_w),JBtest(ibm_m),JBtest(ibm_y))))
## [[5823.9296    0.    ]
##  [ 693.7069    0.    ]
##  [ 146.4443    0.    ]
##  [   0.1191    0.9422]]
plt.show()