Python-Project-1

### Microsoft Corporation's (MSFT)'s historical price data of past 1 year (Aug 28,2023-Aug 28,2024) data analysis using python packages

## Codes using python packages : Numpy, Pandas, Matplotlib

#Importing the packages

import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt

##Numpy usage
#Making nd (1D,2D,3D) arrays using numpy and reshaping those
#1d
a = np.arange(0, 27).reshape(3, 3, 3)

print(a)

[[[ 0  1  2]
  [ 3  4  5]
  [ 6  7  8]]

 [[ 9 10 11]
  [12 13 14]
  [15 16 17]]

 [[18 19 20]
  [21 22 23]
  [24 25 26]]]

#Let's make a look a bit flatten
a_1d = a.flatten()

print(a_1d)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26]

#2D
a_2d = a.reshape(9, 3)

print(a_2d)

[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]
 [12 13 14]
 [15 16 17]
 [18 19 20]
 [21 22 23]
 [24 25 26]]

#3D
a_3d = a.reshape(3, 3,3)

print(a_3d)

[[[ 0  1  2]
  [ 3  4  5]
  [ 6  7  8]]

 [[ 9 10 11]
  [12 13 14]
  [15 16 17]]

 [[18 19 20]
  [21 22 23]
  [24 25 26]]]

#Indexing and slicing of a_2d

#1.  Extract the first 2 rows and the last 2 columns
print(a_2d[:2, -2:])

[[1 2]
 [4 5]]

#2.  Extract a 2x2 submatrix starting from the 4th row and 2nd column
submatrix = a_2d[3:5, 1:3]
print(submatrix)

[[10 11]
 [13 14]]

#3. Extract elements greater than 15
print(a_2d[a_2d > 15])

[16 17 18 19 20 21 22 23 24 25 26]

## Transpose the 3D array (a_3d)

a_transpose = a_3d.transpose()
print(a_transpose)

[[[ 0  9 18]
  [ 3 12 21]
  [ 6 15 24]]

 [[ 1 10 19]
  [ 4 13 22]
  [ 7 16 25]]

 [[ 2 11 20]
  [ 5 14 23]
  [ 8 17 26]]]

##Pandas and Matplotlib's usage on Microsoft Corporation (MSFT)'s historical price data of past 1 year (Aug 28,2023-Aug 28,2024)


# Read the CSV data into a DataFrame 
df = pd.read_csv(r"/Users/macbookpro2015/Downloads/MSFT.csv")
print(df)

           Date        Open        High         Low       Close   Adj Close  \
0    2023-08-28  325.660004  326.149994  321.720001  323.700012  321.288391   
1    2023-08-29  321.880005  328.980011  321.880005  328.410004  325.963287   
2    2023-08-30  328.670013  329.809998  326.450012  328.790009  326.340485   
3    2023-08-31  329.200012  330.910004  326.779999  327.760010  325.318176   
4    2023-09-01  331.309998  331.989990  326.779999  328.660004  326.211456   
..          ...         ...         ...         ...         ...         ...   
247  2024-08-21  424.079987  426.399994  421.720001  424.140015  424.140015   
248  2024-08-22  424.359985  426.790009  414.609985  415.549988  415.549988   
249  2024-08-23  416.980011  419.260010  412.089996  416.790009  416.790009   
250  2024-08-26  416.369995  417.279999  411.339996  413.489990  413.489990   
251  2024-08-27  412.859985  414.359985  410.250000  413.839996  413.839996   

       Volume  
0    14808500  
1    19284600  
2    15222100  
3    26411000  
4    14931200  
..        ...  
247  16067300  
248  19361900  
249  18493800  
250  13152800  
251  13473900  

[252 rows x 7 columns]

#Defining first few rows
df.head()

	Date	Open	High	Low	Close	Adj Close	Volume
0	2023-08-28	325.660004	326.149994	321.720001	323.700012	321.288391	14808500
1	2023-08-29	321.880005	328.980011	321.880005	328.410004	325.963287	19284600
2	2023-08-30	328.670013	329.809998	326.450012	328.790009	326.340485	15222100
3	2023-08-31	329.200012	330.910004	326.779999	327.760010	325.318176	26411000
4	2023-09-01	331.309998	331.989990	326.779999	328.660004	326.211456	14931200

# Basic analysis
# Some descriptive statistics
print(df.describe())

             Open        High         Low       Close   Adj Close  \
count  252.000000  252.000000  252.000000  252.000000  252.000000   
mean   393.527460  396.561589  390.121031  393.593770  391.970006   
std     39.418508   39.451499   39.107757   39.348420   39.915183   
min    310.989990  314.299988  309.450012  312.140015  309.814545   
25%    369.274994  373.095001  366.942497  370.517502  368.399093   
50%    405.970001  408.540008  402.720001  405.569992  403.365478   
75%    420.832505  424.632507  417.592506  421.415001  419.976647   
max    467.000000  468.350006  464.459991  467.559998  466.718781   

             Volume  
count  2.520000e+02  
mean   2.202851e+07  
std    8.068537e+06  
min    9.932800e+06  
25%    1.681705e+07  
50%    2.034405e+07  
75%    2.488688e+07  
max    7.847820e+07

# Time series analysis

df['Date'] = pd.to_datetime(df['Date'])

# Setting the 'Date' column as the index
df.set_index('Date', inplace=True)

# Resampling to daily frequency and calculating the mean
daily_df = df.resample('D').mean()

print(daily_df.head())

                  Open        High         Low       Close   Adj Close  \
Date                                                                     
2023-08-28  325.660004  326.149994  321.720001  323.700012  321.288391   
2023-08-29  321.880005  328.980011  321.880005  328.410004  325.963287   
2023-08-30  328.670013  329.809998  326.450012  328.790009  326.340485   
2023-08-31  329.200012  330.910004  326.779999  327.760010  325.318176   
2023-09-01  331.309998  331.989990  326.779999  328.660004  326.211456   

                Volume  Daily Return  
Date                                  
2023-08-28  14808500.0           NaN  
2023-08-29  19284600.0      1.455048  
2023-08-30  15222100.0      0.115711  
2023-08-31  26411000.0     -0.313270  
2023-09-01  14931200.0      0.274589

# Calculating the daily return (percentage change)

df['Daily Return'] = df['Close'].pct_change() * 100
print(df['Daily Return'])

0           NaN
1      1.455048
2      0.115711
3     -0.313270
4      0.274589
         ...   
247   -0.155361
248   -2.025281
249    0.298405
250   -0.791770
251    0.084647
Name: Daily Return, Length: 252, dtype: float64

# Analyzing volatility (standard deviation of daily returns)

print(df['Daily Return'].std())

1.2502064239309325

# Finding out the highest and lowest closing prices

print(f"Highest Closing Price: ${df['Close'].max()}")
print(f"Lowest Closing Price: ${df['Close'].min()}")

Highest Closing Price: $467.559998
Lowest Closing Price: $312.140015

# Visualizing the closing price over time using chart (Here the green colour is used as Microsoft's logo colour)

import matplotlib.dates as mdates

plt.figure(figsize=(16, 8))
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
plt.gca().xaxis.set_major_locator(mdates.MonthLocator()) 
plt.plot(df['Date'], df['Close'], marker='o', color='green', linestyle='-', label='MSFT Closing Price')
plt.title('MSFT Closing Price Over Time')
plt.xlabel('Date')
plt.ylabel('Closing Price')
plt.grid(True)
plt.show()

png

#Histogram of Daily Returns in percentage change

df['Daily Return'] = df['Close'].pct_change()

plt.figure(figsize=(18, 9))
plt.hist(df['Daily Return'], bins=50, color='green', alpha=0.8)
plt.title('Distribution of Daily Returns')
plt.xlabel('Daily Return (%)')
plt.ylabel('Frequency')
plt.show()

png