### Microsoft Corporation's (MSFT)'s historical price data of past 1 year (Aug 28,2023-Aug 28,2024) data analysis using python packages
## Codes using python packages : Numpy, Pandas, Matplotlib
#Importing the packages
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
##Numpy usage
#Making nd (1D,2D,3D) arrays using numpy and reshaping those
#1d
a = np.arange(0, 27).reshape(3, 3, 3)
print(a)
[[[ 0 1 2]
[ 3 4 5]
[ 6 7 8]]
[[ 9 10 11]
[12 13 14]
[15 16 17]]
[[18 19 20]
[21 22 23]
[24 25 26]]]
#Let's make a look a bit flatten
a_1d = a.flatten()
print(a_1d)
[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26]
#2D
a_2d = a.reshape(9, 3)
print(a_2d)
[[ 0 1 2]
[ 3 4 5]
[ 6 7 8]
[ 9 10 11]
[12 13 14]
[15 16 17]
[18 19 20]
[21 22 23]
[24 25 26]]
#3D
a_3d = a.reshape(3, 3,3)
print(a_3d)
[[[ 0 1 2]
[ 3 4 5]
[ 6 7 8]]
[[ 9 10 11]
[12 13 14]
[15 16 17]]
[[18 19 20]
[21 22 23]
[24 25 26]]]
#Indexing and slicing of a_2d
#1. Extract the first 2 rows and the last 2 columns
print(a_2d[:2, -2:])
[[1 2]
[4 5]]
#2. Extract a 2x2 submatrix starting from the 4th row and 2nd column
submatrix = a_2d[3:5, 1:3]
print(submatrix)
[[10 11]
[13 14]]
#3. Extract elements greater than 15
print(a_2d[a_2d > 15])
[16 17 18 19 20 21 22 23 24 25 26]
## Transpose the 3D array (a_3d)
a_transpose = a_3d.transpose()
print(a_transpose)
[[[ 0 9 18]
[ 3 12 21]
[ 6 15 24]]
[[ 1 10 19]
[ 4 13 22]
[ 7 16 25]]
[[ 2 11 20]
[ 5 14 23]
[ 8 17 26]]]
##Pandas and Matplotlib's usage on Microsoft Corporation (MSFT)'s historical price data of past 1 year (Aug 28,2023-Aug 28,2024)
# Read the CSV data into a DataFrame
df = pd.read_csv(r"/Users/macbookpro2015/Downloads/MSFT.csv")
print(df)
Date Open High Low Close Adj Close \
0 2023-08-28 325.660004 326.149994 321.720001 323.700012 321.288391
1 2023-08-29 321.880005 328.980011 321.880005 328.410004 325.963287
2 2023-08-30 328.670013 329.809998 326.450012 328.790009 326.340485
3 2023-08-31 329.200012 330.910004 326.779999 327.760010 325.318176
4 2023-09-01 331.309998 331.989990 326.779999 328.660004 326.211456
.. ... ... ... ... ... ...
247 2024-08-21 424.079987 426.399994 421.720001 424.140015 424.140015
248 2024-08-22 424.359985 426.790009 414.609985 415.549988 415.549988
249 2024-08-23 416.980011 419.260010 412.089996 416.790009 416.790009
250 2024-08-26 416.369995 417.279999 411.339996 413.489990 413.489990
251 2024-08-27 412.859985 414.359985 410.250000 413.839996 413.839996
Volume
0 14808500
1 19284600
2 15222100
3 26411000
4 14931200
.. ...
247 16067300
248 19361900
249 18493800
250 13152800
251 13473900
[252 rows x 7 columns]
#Defining first few rows
df.head()
| 0 |
2023-08-28 |
325.660004 |
326.149994 |
321.720001 |
323.700012 |
321.288391 |
14808500 |
| 1 |
2023-08-29 |
321.880005 |
328.980011 |
321.880005 |
328.410004 |
325.963287 |
19284600 |
| 2 |
2023-08-30 |
328.670013 |
329.809998 |
326.450012 |
328.790009 |
326.340485 |
15222100 |
| 3 |
2023-08-31 |
329.200012 |
330.910004 |
326.779999 |
327.760010 |
325.318176 |
26411000 |
| 4 |
2023-09-01 |
331.309998 |
331.989990 |
326.779999 |
328.660004 |
326.211456 |
14931200 |
# Basic analysis
# Some descriptive statistics
print(df.describe())
Open High Low Close Adj Close \
count 252.000000 252.000000 252.000000 252.000000 252.000000
mean 393.527460 396.561589 390.121031 393.593770 391.970006
std 39.418508 39.451499 39.107757 39.348420 39.915183
min 310.989990 314.299988 309.450012 312.140015 309.814545
25% 369.274994 373.095001 366.942497 370.517502 368.399093
50% 405.970001 408.540008 402.720001 405.569992 403.365478
75% 420.832505 424.632507 417.592506 421.415001 419.976647
max 467.000000 468.350006 464.459991 467.559998 466.718781
Volume
count 2.520000e+02
mean 2.202851e+07
std 8.068537e+06
min 9.932800e+06
25% 1.681705e+07
50% 2.034405e+07
75% 2.488688e+07
max 7.847820e+07
# Time series analysis
df['Date'] = pd.to_datetime(df['Date'])
# Setting the 'Date' column as the index
df.set_index('Date', inplace=True)
# Resampling to daily frequency and calculating the mean
daily_df = df.resample('D').mean()
print(daily_df.head())
Open High Low Close Adj Close \
Date
2023-08-28 325.660004 326.149994 321.720001 323.700012 321.288391
2023-08-29 321.880005 328.980011 321.880005 328.410004 325.963287
2023-08-30 328.670013 329.809998 326.450012 328.790009 326.340485
2023-08-31 329.200012 330.910004 326.779999 327.760010 325.318176
2023-09-01 331.309998 331.989990 326.779999 328.660004 326.211456
Volume Daily Return
Date
2023-08-28 14808500.0 NaN
2023-08-29 19284600.0 1.455048
2023-08-30 15222100.0 0.115711
2023-08-31 26411000.0 -0.313270
2023-09-01 14931200.0 0.274589
# Calculating the daily return (percentage change)
df['Daily Return'] = df['Close'].pct_change() * 100
print(df['Daily Return'])
0 NaN
1 1.455048
2 0.115711
3 -0.313270
4 0.274589
...
247 -0.155361
248 -2.025281
249 0.298405
250 -0.791770
251 0.084647
Name: Daily Return, Length: 252, dtype: float64
# Analyzing volatility (standard deviation of daily returns)
print(df['Daily Return'].std())
1.2502064239309325
# Finding out the highest and lowest closing prices
print(f"Highest Closing Price: ${df['Close'].max()}")
print(f"Lowest Closing Price: ${df['Close'].min()}")
Highest Closing Price: $467.559998
Lowest Closing Price: $312.140015
# Visualizing the closing price over time using chart (Here the green colour is used as Microsoft's logo colour)
import matplotlib.dates as mdates
plt.figure(figsize=(16, 8))
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
plt.gca().xaxis.set_major_locator(mdates.MonthLocator())
plt.plot(df['Date'], df['Close'], marker='o', color='green', linestyle='-', label='MSFT Closing Price')
plt.title('MSFT Closing Price Over Time')
plt.xlabel('Date')
plt.ylabel('Closing Price')
plt.grid(True)
plt.show()
png
#Histogram of Daily Returns in percentage change
df['Daily Return'] = df['Close'].pct_change()
plt.figure(figsize=(18, 9))
plt.hist(df['Daily Return'], bins=50, color='green', alpha=0.8)
plt.title('Distribution of Daily Returns')
plt.xlabel('Daily Return (%)')
plt.ylabel('Frequency')
plt.show()
png