Instalo e importo librerias

In [ ]:
pip install yfinance > /dev/null
In [2]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

#gráficos
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
plt.style.use('fivethirtyeight') 
color_pal = sns.color_palette()

#modelados
import numpy as np
import sklearn as sk
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from statsmodels.tsa.seasonal import seasonal_decompose
from keras.preprocessing.sequence import TimeseriesGenerator
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.metrics import mean_squared_error
from math import sqrt

#database
import yfinance as yf

#fechas
import datetime as dt
from datetime import timedelta

Creo dataframe

Obtenemos precio de accion Microsoft para los ultimos 5 años, y nos quedamos con el precio de cierre ajustado por si hubieron splits

In [229]:
df = pd.DataFrame(yf.download(tickers='MSFT',period='5y',rounding=True)['Adj Close'])
df.columns = ['MSFT']
[*********************100%***********************]  1 of 1 completed
In [189]:
df.tail()
Out[189]:
MSFT
Date
2022-07-15 256.72
2022-07-18 254.25
2022-07-19 259.53
2022-07-20 262.27
2022-07-21 264.84

Dimensiones

In [230]:
df.shape
Out[230]:
(1258, 1)

Visualización simple

In [233]:
df.plot()
plt.title('Microsoft price over the last 5y')
plt.show()
In [234]:
df.head(1),df.tail(1)
Out[234]:
(             MSFT
 Date             
 2017-07-24  68.94,               MSFT
 Date              
 2022-07-21  264.84)

Observo estacionalidades y tendencias

In [235]:
from pylab import rcParams
import statsmodels.api as sm
rcParams['figure.figsize'] = 11, 9
decomposed_volume = sm.tsa.seasonal_decompose(df["MSFT"],freq=360) # The frequncy is annual
figure = decomposed_volume.plot()
plt.show()

Preprocesamiento

Normalizar y separar la base

In [240]:
scaler = MinMaxScaler()
dataset = scaler.fit_transform(df)
In [241]:
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]

Genero formato para modelo

In [242]:
train_data_gen = TimeseriesGenerator(train, train,
                                    length=3, sampling_rate=1,stride=1,
                                    batch_size=3)
test_data_gen = TimeseriesGenerator(test, test,
                                    length=3, sampling_rate=1,stride=1,
                                    batch_size=1)
In [243]:
X,y = test_data_gen[0]
print(f'Given the Array: \n{X.flatten()}')
print(f'Predict this y: \n {y}')
Given the Array: 
[0.78964036 0.80238061 0.80016016]
Predict this y: 
 [[0.79109639]]
In [207]:
print(X.shape)
y.shape
(1, 3, 1)
Out[207]:
(1, 1)

Modelo

Construccion

In [244]:
model = Sequential()
model.add(LSTM(4, input_shape=(3, 1)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
history = model.fit_generator(train_data_gen, epochs=6).history
Epoch 1/6
335/335 [==============================] - 4s 3ms/step - loss: 0.0791
Epoch 2/6
335/335 [==============================] - 1s 3ms/step - loss: 0.0355
Epoch 3/6
335/335 [==============================] - 1s 3ms/step - loss: 0.0222
Epoch 4/6
335/335 [==============================] - 1s 3ms/step - loss: 0.0086
Epoch 5/6
335/335 [==============================] - 1s 3ms/step - loss: 0.0015
Epoch 6/6
335/335 [==============================] - 1s 3ms/step - loss: 3.6371e-04
In [245]:
model.summary()
Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 lstm_7 (LSTM)               (None, 4)                 96        
                                                                 
 dense_7 (Dense)             (None, 1)                 5         
                                                                 
=================================================================
Total params: 101
Trainable params: 101
Non-trainable params: 0
_________________________________________________________________
In [246]:
loss_per_epoch = history['loss']
plt.plot(range(len(loss_per_epoch)),loss_per_epoch)
plt.show()

Aplicacion

In [247]:
model.evaluate_generator(test_data_gen)
trainPredict = model.predict_generator(train_data_gen)
testPredict = model.predict_generator(test_data_gen)
In [248]:
model.evaluate_generator(test_data_gen)
Out[248]:
0.0011689517414197326
In [249]:
trainPredict.shape,testPredict.shape
Out[249]:
((1003, 1), (249, 1))

Predicciones

Desnormalizar

In [250]:
trainPredict = scaler.inverse_transform(trainPredict)
testPredict = scaler.inverse_transform(testPredict)

Funcion importada para obtener predicciones

In [251]:
def get_y_from_generator(gen):
    '''
    Get all targets y from a TimeseriesGenerator instance.
    '''
    y = None
    for i in range(len(gen)):
        batch_y = gen[i][1]
        if y is None:
            y = batch_y
        else:
            y = np.append(y, batch_y)
    y = y.reshape((-1,1))
    print(y.shape)
    return y
In [252]:
trainY = get_y_from_generator(train_data_gen)
testY = get_y_from_generator(test_data_gen)
(1003, 1)
(249, 1)
In [253]:
trainY = scaler.inverse_transform(trainY)
testY = scaler.inverse_transform(testY)

Calculo RMSE

In [254]:
# calculate root mean squared error
import math
trainScore = math.sqrt(mean_squared_error(trainY[:,0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[:, 0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))
Train Score: 4.89 RMSE
Test Score: 9.39 RMSE
In [281]:
len(testPredict)
len(trainPredict)
Out[281]:
1003
In [255]:
dataset = scaler.inverse_transform(dataset)
dataset.shape
Out[255]:
(1258, 1)

Gráfico resultados

Preparo train y test para plot

In [312]:
trainPredictPlot = np.empty_like(dataset)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[3:len(trainPredict)+3, :] = trainPredict
trainPredictPlot = pd.DataFrame(trainPredictPlot,df.index)
In [311]:
testPredictPlot = np.empty_like(dataset)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(3*2):len(dataset), :] = testPredict

testPredictPlot = pd.DataFrame(testPredictPlot,df.index)

Visualizo

In [317]:
plt.plot(df)
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.legend(['True Price', 'Prediction Train','Prediction Test'],loc='center right')
plt.title('Microsoft Price Prediction - Long-Short-Term-Memory Model')

plt.show()