S&P500 Stocks Analysis Using SLTM Model¶

Getting stock data of S&P500 from Yahoo Finance to forcast the change of values in near future.

In [ ]:
rng = pd.date_range('2024-03-01', periods=50, freq='D')
rng
Out[ ]:
DatetimeIndex(['2024-03-01', '2024-03-02', '2024-03-03', '2024-03-04',
               '2024-03-05', '2024-03-06', '2024-03-07', '2024-03-08',
               '2024-03-09', '2024-03-10', '2024-03-11', '2024-03-12',
               '2024-03-13', '2024-03-14', '2024-03-15', '2024-03-16',
               '2024-03-17', '2024-03-18', '2024-03-19', '2024-03-20',
               '2024-03-21', '2024-03-22', '2024-03-23', '2024-03-24',
               '2024-03-25', '2024-03-26', '2024-03-27', '2024-03-28',
               '2024-03-29', '2024-03-30', '2024-03-31', '2024-04-01',
               '2024-04-02', '2024-04-03', '2024-04-04', '2024-04-05',
               '2024-04-06', '2024-04-07', '2024-04-08', '2024-04-09',
               '2024-04-10', '2024-04-11', '2024-04-12', '2024-04-13',
               '2024-04-14', '2024-04-15', '2024-04-16', '2024-04-17',
               '2024-04-18', '2024-04-19'],
              dtype='datetime64[ns]', freq='D')

Getting data from Yahoo Finance

In [ ]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
plt.style.use("fivethirtyeight")
%matplotlib inline

# For reading stock data from yahoo
from pandas_datareader.data import DataReader
import yfinance as yf
from pandas_datareader import data as pdr

yf.pdr_override()

# For time stamps
from datetime import datetime
In [ ]:
# Get the stock quote
df = pdr.get_data_yahoo('SPXL', start='2022-01-01', end=datetime.now())
# Show teh data
df.tail(10)
[*********************100%%**********************]  1 of 1 completed
Out[ ]:
Open High Low Close Adj Close Volume
Date
2024-04-15 127.010002 127.099998 118.599998 119.260002 119.260002 8718500
2024-04-16 119.529999 120.620003 117.589996 118.519997 118.519997 5997700
2024-04-17 120.260002 120.360001 115.360001 116.349998 116.349998 6666500
2024-04-18 117.220001 118.720001 114.839996 115.529999 115.529999 5587900
2024-04-19 115.379997 116.099998 111.540001 112.550003 112.550003 11614600
2024-04-22 114.209999 117.309998 112.570000 115.449997 115.449997 6315100
2024-04-23 116.879997 119.870003 116.500000 119.510002 119.510002 5019700
2024-04-24 120.150002 120.720001 117.739998 119.349998 119.349998 10395500
2024-04-25 114.790001 118.419998 113.599998 117.839996 117.839996 6755700
2024-04-26 119.849998 122.309998 119.379997 121.139999 121.139999 6042400
In [ ]:
plt.figure(figsize=(16,6))
plt.title('Close Price History')
plt.plot(df['Close'])
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price USD ($)', fontsize=18)
plt.show()
No description has been provided for this image

Resampling date and set 60-day moving average (MA).

Moving average and exponential weighted function

In [ ]:
plt.figure(figsize=(16,6))
close_px = df['Close'].resample('B').ffill()
ewma60 = df['Close'].ewm(span=60).mean()
df['Close'].plot(label = 'Normal')
ewma60.plot(style='k-', label='EW MA')
close_px.rolling(60).mean().plot(label = 'MA 60')
plt.legend()
Out[ ]:
<matplotlib.legend.Legend at 0x167f673d0>
No description has been provided for this image

Convert data into array

In [ ]:
# Create a new dataframe with only the 'Close column 
data = df.filter(['Close'])
# Convert the dataframe to a numpy array
dataset = data.values
# Get the number of rows to train the model on
training_data_len = int(np.ceil( len(dataset) * .95 ))

training_data_len
Out[ ]:
553

Scale data

In [ ]:
# Scale the data
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(dataset)

scaled_data[:10]
Out[ ]:
array([[1.        ],
       [0.99817559],
       [0.90727624],
       [0.90330549],
       [0.8864563 ],
       [0.88076839],
       [0.91972531],
       [0.93185242],
       [0.87046585],
       [0.87110973]])

Create the training data set

In [ ]:
# Create the training data set 
# Create the scaled training data set
train_data = scaled_data[0:int(training_data_len), :]
# Split the data into x_train and y_train data sets
x_train = []
y_train = []

for i in range(60, len(train_data)):
    x_train.append(train_data[i-60:i, 0])
    y_train.append(train_data[i, 0])
    if i<= 61:
        print(x_train)
        print(y_train)
        print()
        
# Convert the x_train and y_train to numpy arrays 
x_train, y_train = np.array(x_train), np.array(y_train)

# Reshape the data
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_train.shape
[array([1.        , 0.99817559, 0.90727624, 0.90330549, 0.8864563 ,
       0.88076839, 0.91972531, 0.93185242, 0.87046585, 0.87110973,
       0.79544969, 0.75252202, 0.70916506, 0.63329041, 0.64960297,
       0.60130934, 0.59680188, 0.57716253, 0.66333981, 0.72955572,
       0.7568148 , 0.79405457, 0.69660874, 0.7156042 , 0.70261859,
       0.73481434, 0.78986912, 0.71957504, 0.64166129, 0.63125132,
       0.68652071, 0.69210135, 0.61086074, 0.58671393, 0.5518352 ,
       0.48883882, 0.53820562, 0.61010951, 0.60109466, 0.54979611,
       0.610968  , 0.5916506 , 0.56342561, 0.46458466, 0.4399013 ,
       0.52006873, 0.50719039, 0.46576518, 0.44322817, 0.50804896,
       0.58199186, 0.62330973, 0.66130072, 0.66215929, 0.70401379,
       0.65604211, 0.71077487, 0.72826786, 0.75638551, 0.80543037])]
[0.780961614064877]

[array([1.        , 0.99817559, 0.90727624, 0.90330549, 0.8864563 ,
       0.88076839, 0.91972531, 0.93185242, 0.87046585, 0.87110973,
       0.79544969, 0.75252202, 0.70916506, 0.63329041, 0.64960297,
       0.60130934, 0.59680188, 0.57716253, 0.66333981, 0.72955572,
       0.7568148 , 0.79405457, 0.69660874, 0.7156042 , 0.70261859,
       0.73481434, 0.78986912, 0.71957504, 0.64166129, 0.63125132,
       0.68652071, 0.69210135, 0.61086074, 0.58671393, 0.5518352 ,
       0.48883882, 0.53820562, 0.61010951, 0.60109466, 0.54979611,
       0.610968  , 0.5916506 , 0.56342561, 0.46458466, 0.4399013 ,
       0.52006873, 0.50719039, 0.46576518, 0.44322817, 0.50804896,
       0.58199186, 0.62330973, 0.66130072, 0.66215929, 0.70401379,
       0.65604211, 0.71077487, 0.72826786, 0.75638551, 0.80543037]), array([0.99817559, 0.90727624, 0.90330549, 0.8864563 , 0.88076839,
       0.91972531, 0.93185242, 0.87046585, 0.87110973, 0.79544969,
       0.75252202, 0.70916506, 0.63329041, 0.64960297, 0.60130934,
       0.59680188, 0.57716253, 0.66333981, 0.72955572, 0.7568148 ,
       0.79405457, 0.69660874, 0.7156042 , 0.70261859, 0.73481434,
       0.78986912, 0.71957504, 0.64166129, 0.63125132, 0.68652071,
       0.69210135, 0.61086074, 0.58671393, 0.5518352 , 0.48883882,
       0.53820562, 0.61010951, 0.60109466, 0.54979611, 0.610968  ,
       0.5916506 , 0.56342561, 0.46458466, 0.4399013 , 0.52006873,
       0.50719039, 0.46576518, 0.44322817, 0.50804896, 0.58199186,
       0.62330973, 0.66130072, 0.66215929, 0.70401379, 0.65604211,
       0.71077487, 0.72826786, 0.75638551, 0.80543037, 0.78096161])]
[0.780961614064877, 0.7193604326959115]

Out[ ]:
(493, 60, 1)
In [ ]:
from keras.models import Sequential
from keras.layers import Dense, LSTM

# Build the LSTM model
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape= (x_train.shape[1], 1)))
model.add(LSTM(64, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(x_train, y_train, batch_size=1, epochs=1)
2024-04-29 18:45:32.085199: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
/Users/anaconda3/lib/python3.11/site-packages/keras/src/layers/rnn/rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
493/493 ━━━━━━━━━━━━━━━━━━━━ 10s 16ms/step - loss: 0.0082
Out[ ]:
<keras.src.callbacks.history.History at 0x166ff9050>
In [ ]:
# Create the testing data set
# Create a new array containing scaled values from index 1543 to 2002 
test_data = scaled_data[training_data_len - 60: , :]
# Create the data sets x_test and y_test
x_test = []
y_test = dataset[training_data_len:, :]
for i in range(60, len(test_data)):
    x_test.append(test_data[i-60:i, 0])
    
# Convert the data to a numpy array
x_test = np.array(x_test)

# Reshape the data
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1 ))

# Get the models predicted price values 
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)

# Get the root mean squared error (RMSE)
rmse = np.sqrt(np.mean(((predictions - y_test) ** 2)))
rmse
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 56ms/step
Out[ ]:
4.171123001238474
In [ ]:
# Plot the data
train = data[:training_data_len]
valid = data[training_data_len:]
valid['Predictions'] = predictions
# Visualize the data
plt.figure(figsize=(16,6))
plt.title('Model')
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price USD ($)', fontsize=18)
plt.plot(train['Close'])
plt.plot(valid[['Close', 'Predictions']])
plt.legend(['Train', 'Val', 'Predictions'], loc='lower right')
plt.show()
/var/folders/cx/3wbhcqyd3cld6gvk_xjkvr_40000gn/T/ipykernel_24004/2388977846.py:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  valid['Predictions'] = predictions
No description has been provided for this image
In [ ]:
valid.tail(10)
Out[ ]:
Close Predictions
Date
2024-04-15 119.260002 124.863167
2024-04-16 118.519997 122.711395
2024-04-17 116.349998 120.118851
2024-04-18 115.529999 117.307701
2024-04-19 112.550003 114.741470
2024-04-22 115.449997 112.224693
2024-04-23 119.510002 110.807648
2024-04-24 119.349998 111.123055
2024-04-25 117.839996 112.439835
2024-04-26 121.139999 113.900604

Forecast for next 5 days

In [ ]:
last_input_data = scaled_data[-5:]
last_input_data = np.reshape(last_input_data, (1, 5, 1))
forecast = model.predict(last_input_data)

# You'll need to inverse scale the forecasted values if you used MinMaxScaler
forecast = scaler.inverse_transform(forecast)
forecast
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 58ms/step
Out[ ]:
array([[119.75069]], dtype=float32)

Value of SPXL for next 5 day will be 119.75069 $

Ploting forecasts

In [ ]:
plt.figure(figsize=(12,6))
data_30d = data.iloc[-30:]

# Plot the original time series data
plt.plot(data_30d.index, data_30d['Close'], label='Original Data')

# Forecast values for the next 7 days
next_time_points = pd.date_range(start=data.index[-1], periods=8, freq='D')[1:]
forecast = []  # Store forecasted values
last_input_data = scaled_data[-5:].reshape((1, 5, 1))  # Last input data
for _ in range(7):
    # Forecast next value
    next_value = model.predict(last_input_data)
    # Store forecasted value
    forecast.append(next_value[0, 0])
    # Update last input data for the next iteration
    last_input_data = np.append(last_input_data[:, 1:, :], next_value.reshape(1, 1, 1), axis=1)

# Inverse scale the forecasted values
forecast = scaler.inverse_transform(np.array(forecast).reshape(-1, 1))

# Plot the forecasted values
plt.plot(next_time_points, forecast, label='Forecast', marker='o')

plt.xlabel('Date')
plt.ylabel('Close')
plt.xticks(rotation=45, fontsize=12)
plt.title('Forecast S&P500 for Next 7 Days')
plt.legend()
plt.show()
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 24ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 23ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 22ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 22ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step
No description has been provided for this image