pip install yfinance > /dev/null
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
#gráficos
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
plt.style.use('fivethirtyeight')
color_pal = sns.color_palette()
#modelados
import numpy as np
import sklearn as sk
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from statsmodels.tsa.seasonal import seasonal_decompose
from keras.preprocessing.sequence import TimeseriesGenerator
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.metrics import mean_squared_error
from math import sqrt
#database
import yfinance as yf
#fechas
import datetime as dt
from datetime import timedelta
Obtenemos precio de accion Microsoft para los ultimos 5 años, y nos quedamos con el precio de cierre ajustado por si hubieron splits
df = pd.DataFrame(yf.download(tickers='MSFT',period='5y',rounding=True)['Adj Close'])
df.columns = ['MSFT']
df.tail()
Dimensiones
df.shape
Visualización simple
df.plot()
plt.title('Microsoft price over the last 5y')
plt.show()
df.head(1),df.tail(1)
Observo estacionalidades y tendencias
from pylab import rcParams
import statsmodels.api as sm
rcParams['figure.figsize'] = 11, 9
decomposed_volume = sm.tsa.seasonal_decompose(df["MSFT"],freq=360) # The frequncy is annual
figure = decomposed_volume.plot()
plt.show()
scaler = MinMaxScaler()
dataset = scaler.fit_transform(df)
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
train_data_gen = TimeseriesGenerator(train, train,
length=3, sampling_rate=1,stride=1,
batch_size=3)
test_data_gen = TimeseriesGenerator(test, test,
length=3, sampling_rate=1,stride=1,
batch_size=1)
X,y = test_data_gen[0]
print(f'Given the Array: \n{X.flatten()}')
print(f'Predict this y: \n {y}')
print(X.shape)
y.shape
model = Sequential()
model.add(LSTM(4, input_shape=(3, 1)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
history = model.fit_generator(train_data_gen, epochs=6).history
model.summary()
loss_per_epoch = history['loss']
plt.plot(range(len(loss_per_epoch)),loss_per_epoch)
plt.show()
model.evaluate_generator(test_data_gen)
trainPredict = model.predict_generator(train_data_gen)
testPredict = model.predict_generator(test_data_gen)
model.evaluate_generator(test_data_gen)
trainPredict.shape,testPredict.shape
Desnormalizar
trainPredict = scaler.inverse_transform(trainPredict)
testPredict = scaler.inverse_transform(testPredict)
Funcion importada para obtener predicciones
def get_y_from_generator(gen):
'''
Get all targets y from a TimeseriesGenerator instance.
'''
y = None
for i in range(len(gen)):
batch_y = gen[i][1]
if y is None:
y = batch_y
else:
y = np.append(y, batch_y)
y = y.reshape((-1,1))
print(y.shape)
return y
trainY = get_y_from_generator(train_data_gen)
testY = get_y_from_generator(test_data_gen)
trainY = scaler.inverse_transform(trainY)
testY = scaler.inverse_transform(testY)
Calculo RMSE
# calculate root mean squared error
import math
trainScore = math.sqrt(mean_squared_error(trainY[:,0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[:, 0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))
len(testPredict)
len(trainPredict)
dataset = scaler.inverse_transform(dataset)
dataset.shape
Preparo train y test para plot
trainPredictPlot = np.empty_like(dataset)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[3:len(trainPredict)+3, :] = trainPredict
trainPredictPlot = pd.DataFrame(trainPredictPlot,df.index)
testPredictPlot = np.empty_like(dataset)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(3*2):len(dataset), :] = testPredict
testPredictPlot = pd.DataFrame(testPredictPlot,df.index)
Visualizo
plt.plot(df)
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.legend(['True Price', 'Prediction Train','Prediction Test'],loc='center right')
plt.title('Microsoft Price Prediction - Long-Short-Term-Memory Model')
plt.show()