S&P500 Stocks Analysis Using SLTM Model¶
Getting stock data of S&P500 from Yahoo Finance to forcast the change of values in near future.
In [ ]:
rng = pd.date_range('2024-03-01', periods=50, freq='D')
rng
Out[ ]:
DatetimeIndex(['2024-03-01', '2024-03-02', '2024-03-03', '2024-03-04', '2024-03-05', '2024-03-06', '2024-03-07', '2024-03-08', '2024-03-09', '2024-03-10', '2024-03-11', '2024-03-12', '2024-03-13', '2024-03-14', '2024-03-15', '2024-03-16', '2024-03-17', '2024-03-18', '2024-03-19', '2024-03-20', '2024-03-21', '2024-03-22', '2024-03-23', '2024-03-24', '2024-03-25', '2024-03-26', '2024-03-27', '2024-03-28', '2024-03-29', '2024-03-30', '2024-03-31', '2024-04-01', '2024-04-02', '2024-04-03', '2024-04-04', '2024-04-05', '2024-04-06', '2024-04-07', '2024-04-08', '2024-04-09', '2024-04-10', '2024-04-11', '2024-04-12', '2024-04-13', '2024-04-14', '2024-04-15', '2024-04-16', '2024-04-17', '2024-04-18', '2024-04-19'], dtype='datetime64[ns]', freq='D')
Getting data from Yahoo Finance
In [ ]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
plt.style.use("fivethirtyeight")
%matplotlib inline
# For reading stock data from yahoo
from pandas_datareader.data import DataReader
import yfinance as yf
from pandas_datareader import data as pdr
yf.pdr_override()
# For time stamps
from datetime import datetime
In [ ]:
# Get the stock quote
df = pdr.get_data_yahoo('SPXL', start='2022-01-01', end=datetime.now())
# Show teh data
df.tail(10)
[*********************100%%**********************] 1 of 1 completed
Out[ ]:
Open | High | Low | Close | Adj Close | Volume | |
---|---|---|---|---|---|---|
Date | ||||||
2024-04-15 | 127.010002 | 127.099998 | 118.599998 | 119.260002 | 119.260002 | 8718500 |
2024-04-16 | 119.529999 | 120.620003 | 117.589996 | 118.519997 | 118.519997 | 5997700 |
2024-04-17 | 120.260002 | 120.360001 | 115.360001 | 116.349998 | 116.349998 | 6666500 |
2024-04-18 | 117.220001 | 118.720001 | 114.839996 | 115.529999 | 115.529999 | 5587900 |
2024-04-19 | 115.379997 | 116.099998 | 111.540001 | 112.550003 | 112.550003 | 11614600 |
2024-04-22 | 114.209999 | 117.309998 | 112.570000 | 115.449997 | 115.449997 | 6315100 |
2024-04-23 | 116.879997 | 119.870003 | 116.500000 | 119.510002 | 119.510002 | 5019700 |
2024-04-24 | 120.150002 | 120.720001 | 117.739998 | 119.349998 | 119.349998 | 10395500 |
2024-04-25 | 114.790001 | 118.419998 | 113.599998 | 117.839996 | 117.839996 | 6755700 |
2024-04-26 | 119.849998 | 122.309998 | 119.379997 | 121.139999 | 121.139999 | 6042400 |
In [ ]:
plt.figure(figsize=(16,6))
plt.title('Close Price History')
plt.plot(df['Close'])
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price USD ($)', fontsize=18)
plt.show()
Resampling date and set 60-day moving average (MA).
Moving average and exponential weighted function
In [ ]:
plt.figure(figsize=(16,6))
close_px = df['Close'].resample('B').ffill()
ewma60 = df['Close'].ewm(span=60).mean()
df['Close'].plot(label = 'Normal')
ewma60.plot(style='k-', label='EW MA')
close_px.rolling(60).mean().plot(label = 'MA 60')
plt.legend()
Out[ ]:
<matplotlib.legend.Legend at 0x167f673d0>
Convert data into array
In [ ]:
# Create a new dataframe with only the 'Close column
data = df.filter(['Close'])
# Convert the dataframe to a numpy array
dataset = data.values
# Get the number of rows to train the model on
training_data_len = int(np.ceil( len(dataset) * .95 ))
training_data_len
Out[ ]:
553
Scale data
In [ ]:
# Scale the data
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(dataset)
scaled_data[:10]
Out[ ]:
array([[1. ], [0.99817559], [0.90727624], [0.90330549], [0.8864563 ], [0.88076839], [0.91972531], [0.93185242], [0.87046585], [0.87110973]])
Create the training data set
In [ ]:
# Create the training data set
# Create the scaled training data set
train_data = scaled_data[0:int(training_data_len), :]
# Split the data into x_train and y_train data sets
x_train = []
y_train = []
for i in range(60, len(train_data)):
x_train.append(train_data[i-60:i, 0])
y_train.append(train_data[i, 0])
if i<= 61:
print(x_train)
print(y_train)
print()
# Convert the x_train and y_train to numpy arrays
x_train, y_train = np.array(x_train), np.array(y_train)
# Reshape the data
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_train.shape
[array([1. , 0.99817559, 0.90727624, 0.90330549, 0.8864563 , 0.88076839, 0.91972531, 0.93185242, 0.87046585, 0.87110973, 0.79544969, 0.75252202, 0.70916506, 0.63329041, 0.64960297, 0.60130934, 0.59680188, 0.57716253, 0.66333981, 0.72955572, 0.7568148 , 0.79405457, 0.69660874, 0.7156042 , 0.70261859, 0.73481434, 0.78986912, 0.71957504, 0.64166129, 0.63125132, 0.68652071, 0.69210135, 0.61086074, 0.58671393, 0.5518352 , 0.48883882, 0.53820562, 0.61010951, 0.60109466, 0.54979611, 0.610968 , 0.5916506 , 0.56342561, 0.46458466, 0.4399013 , 0.52006873, 0.50719039, 0.46576518, 0.44322817, 0.50804896, 0.58199186, 0.62330973, 0.66130072, 0.66215929, 0.70401379, 0.65604211, 0.71077487, 0.72826786, 0.75638551, 0.80543037])] [0.780961614064877] [array([1. , 0.99817559, 0.90727624, 0.90330549, 0.8864563 , 0.88076839, 0.91972531, 0.93185242, 0.87046585, 0.87110973, 0.79544969, 0.75252202, 0.70916506, 0.63329041, 0.64960297, 0.60130934, 0.59680188, 0.57716253, 0.66333981, 0.72955572, 0.7568148 , 0.79405457, 0.69660874, 0.7156042 , 0.70261859, 0.73481434, 0.78986912, 0.71957504, 0.64166129, 0.63125132, 0.68652071, 0.69210135, 0.61086074, 0.58671393, 0.5518352 , 0.48883882, 0.53820562, 0.61010951, 0.60109466, 0.54979611, 0.610968 , 0.5916506 , 0.56342561, 0.46458466, 0.4399013 , 0.52006873, 0.50719039, 0.46576518, 0.44322817, 0.50804896, 0.58199186, 0.62330973, 0.66130072, 0.66215929, 0.70401379, 0.65604211, 0.71077487, 0.72826786, 0.75638551, 0.80543037]), array([0.99817559, 0.90727624, 0.90330549, 0.8864563 , 0.88076839, 0.91972531, 0.93185242, 0.87046585, 0.87110973, 0.79544969, 0.75252202, 0.70916506, 0.63329041, 0.64960297, 0.60130934, 0.59680188, 0.57716253, 0.66333981, 0.72955572, 0.7568148 , 0.79405457, 0.69660874, 0.7156042 , 0.70261859, 0.73481434, 0.78986912, 0.71957504, 0.64166129, 0.63125132, 0.68652071, 0.69210135, 0.61086074, 0.58671393, 0.5518352 , 0.48883882, 0.53820562, 0.61010951, 0.60109466, 0.54979611, 0.610968 , 0.5916506 , 0.56342561, 0.46458466, 0.4399013 , 0.52006873, 0.50719039, 0.46576518, 0.44322817, 0.50804896, 0.58199186, 0.62330973, 0.66130072, 0.66215929, 0.70401379, 0.65604211, 0.71077487, 0.72826786, 0.75638551, 0.80543037, 0.78096161])] [0.780961614064877, 0.7193604326959115]
Out[ ]:
(493, 60, 1)
In [ ]:
from keras.models import Sequential
from keras.layers import Dense, LSTM
# Build the LSTM model
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape= (x_train.shape[1], 1)))
model.add(LSTM(64, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))
# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')
# Train the model
model.fit(x_train, y_train, batch_size=1, epochs=1)
2024-04-29 18:45:32.085199: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. /Users/anaconda3/lib/python3.11/site-packages/keras/src/layers/rnn/rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
493/493 ━━━━━━━━━━━━━━━━━━━━ 10s 16ms/step - loss: 0.0082
Out[ ]:
<keras.src.callbacks.history.History at 0x166ff9050>
In [ ]:
# Create the testing data set
# Create a new array containing scaled values from index 1543 to 2002
test_data = scaled_data[training_data_len - 60: , :]
# Create the data sets x_test and y_test
x_test = []
y_test = dataset[training_data_len:, :]
for i in range(60, len(test_data)):
x_test.append(test_data[i-60:i, 0])
# Convert the data to a numpy array
x_test = np.array(x_test)
# Reshape the data
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1 ))
# Get the models predicted price values
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)
# Get the root mean squared error (RMSE)
rmse = np.sqrt(np.mean(((predictions - y_test) ** 2)))
rmse
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 56ms/step
Out[ ]:
4.171123001238474
In [ ]:
# Plot the data
train = data[:training_data_len]
valid = data[training_data_len:]
valid['Predictions'] = predictions
# Visualize the data
plt.figure(figsize=(16,6))
plt.title('Model')
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price USD ($)', fontsize=18)
plt.plot(train['Close'])
plt.plot(valid[['Close', 'Predictions']])
plt.legend(['Train', 'Val', 'Predictions'], loc='lower right')
plt.show()
/var/folders/cx/3wbhcqyd3cld6gvk_xjkvr_40000gn/T/ipykernel_24004/2388977846.py:4: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy valid['Predictions'] = predictions
In [ ]:
valid.tail(10)
Out[ ]:
Close | Predictions | |
---|---|---|
Date | ||
2024-04-15 | 119.260002 | 124.863167 |
2024-04-16 | 118.519997 | 122.711395 |
2024-04-17 | 116.349998 | 120.118851 |
2024-04-18 | 115.529999 | 117.307701 |
2024-04-19 | 112.550003 | 114.741470 |
2024-04-22 | 115.449997 | 112.224693 |
2024-04-23 | 119.510002 | 110.807648 |
2024-04-24 | 119.349998 | 111.123055 |
2024-04-25 | 117.839996 | 112.439835 |
2024-04-26 | 121.139999 | 113.900604 |
Forecast for next 5 days
In [ ]:
last_input_data = scaled_data[-5:]
last_input_data = np.reshape(last_input_data, (1, 5, 1))
forecast = model.predict(last_input_data)
# You'll need to inverse scale the forecasted values if you used MinMaxScaler
forecast = scaler.inverse_transform(forecast)
forecast
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 58ms/step
Out[ ]:
array([[119.75069]], dtype=float32)
Value of SPXL for next 5 day will be 119.75069 $
Ploting forecasts
In [ ]:
plt.figure(figsize=(12,6))
data_30d = data.iloc[-30:]
# Plot the original time series data
plt.plot(data_30d.index, data_30d['Close'], label='Original Data')
# Forecast values for the next 7 days
next_time_points = pd.date_range(start=data.index[-1], periods=8, freq='D')[1:]
forecast = [] # Store forecasted values
last_input_data = scaled_data[-5:].reshape((1, 5, 1)) # Last input data
for _ in range(7):
# Forecast next value
next_value = model.predict(last_input_data)
# Store forecasted value
forecast.append(next_value[0, 0])
# Update last input data for the next iteration
last_input_data = np.append(last_input_data[:, 1:, :], next_value.reshape(1, 1, 1), axis=1)
# Inverse scale the forecasted values
forecast = scaler.inverse_transform(np.array(forecast).reshape(-1, 1))
# Plot the forecasted values
plt.plot(next_time_points, forecast, label='Forecast', marker='o')
plt.xlabel('Date')
plt.ylabel('Close')
plt.xticks(rotation=45, fontsize=12)
plt.title('Forecast S&P500 for Next 7 Days')
plt.legend()
plt.show()
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 24ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 23ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 22ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 22ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step