import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import NullFormatter
import plotly.express as px
import plotly.graph_objects as go
import arabic_reshaper
from bidi.algorithm import get_display
from matplotlib.ticker import MultipleLocator
import seaborn as sn
from statsmodels.graphics.tsaplots import month_plot
from statsmodels.tsa.seasonal import seasonal_decompose
data_g=pd.read_excel(r"C:\Users\Desktop\me_\Gold.xlsx",sheet_name="gold")
data_o=pd.read_excel(r"C:\Users\Desktop\me_\Gold.xlsx",sheet_name="oil")
data_i=pd.read_excel(r"C:\Users\Desktop\me_\Gold.xlsx",sheet_name="USA_index")
data_g.columns
Index(['Date', 'High (kg)', 'Low (kg)', 'Close (kg)', 'High (troy oz)',
       'Low (troy oz)', 'Close (troy oz)', 'Unnamed: 7'],
      dtype='object')
data_g.head()
| Date | High (kg) | Low (kg) | Close (kg) | High (troy oz) | Low (troy oz) | Close (troy oz) | Unnamed: 7 | |
|---|---|---|---|---|---|---|---|---|
| 0 | 2025-09-09 | 126822.48 | 116212.39 | 126516.64 | 3944.62 | 3614.61 | 3935.11 | NaN | 
| 1 | 2025-08-10 | 117227.97 | 106471.54 | 116870.29 | 3646.20 | 3311.64 | 3635.07 | -0.076247 | 
| 2 | 2025-07-11 | 110566.42 | 105081.58 | 109272.35 | 3439.00 | 3268.40 | 3398.75 | -0.065011 | 
| 3 | 2025-06-11 | 110956.49 | 104455.52 | 106868.28 | 3451.13 | 3248.93 | 3323.98 | -0.021999 | 
| 4 | 2025-05-12 | 109417.27 | 100364.74 | 106992.94 | 3403.26 | 3121.69 | 3327.85 | 0.001164 | 
def calculate_log_returns_and_volatility(df):
    # Calculate daily log returns: log(P_t / P_{t-1})
    df['Daily_Log_Return'] = np.log(df['Close (troy oz)'] / df['Close (troy oz)'].shift(1))
    
    # Calculate 30-day rolling volatility (annualized)
    df['Daily_Volatility'] = df['Daily_Log_Return'].rolling(window=30).std() * np.sqrt(252)
    
    # Resample to monthly, taking the last closing price of each month
    monthly_df = df[['Close (troy oz)']].resample('ME').last()
    monthly_df['Monthly_Log_Return'] = np.log(monthly_df['Close (troy oz)'] / monthly_df['Close (troy oz)'].shift(1))
    
    # Calculate 12-month rolling volatility (annualized)
    monthly_df['Monthly_Volatility'] = monthly_df['Monthly_Log_Return'].rolling(window=12).std() * np.sqrt(12)
    # Drop NaN values
    df = df.dropna()
    monthly_df = monthly_df.dropna()
    
    return df[['Close (troy oz)', 'Daily_Log_Return', 'Daily_Volatility']], monthly_df[['Close (troy oz)', 'Monthly_Log_Return', 'Monthly_Volatility']]
data_g['Daily_Log_Return'] = np.log(data_g['Close (troy oz)'] / data_g['Close (troy oz)'].shift(1))
# Calculate daily log returns: log(P_t / P_{t-1})
data_g['Daily_Volatility'] = data_g['Daily_Log_Return'].rolling(window=30).std() * np.sqrt(252)
 # Calculate 30-day rolling volatility (annualized)
data_g.head()
| Date | High (kg) | Low (kg) | Close (kg) | High (troy oz) | Low (troy oz) | Close (troy oz) | Unnamed: 7 | Daily_Log_Return | Daily_Volatility | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2025-09-09 | 126822.48 | 116212.39 | 126516.64 | 3944.62 | 3614.61 | 3935.11 | NaN | NaN | NaN | 
| 1 | 2025-08-10 | 117227.97 | 106471.54 | 116870.29 | 3646.20 | 3311.64 | 3635.07 | -0.076247 | -0.079310 | NaN | 
| 2 | 2025-07-11 | 110566.42 | 105081.58 | 109272.35 | 3439.00 | 3268.40 | 3398.75 | -0.065011 | -0.067221 | NaN | 
| 3 | 2025-06-11 | 110956.49 | 104455.52 | 106868.28 | 3451.13 | 3248.93 | 3323.98 | -0.021999 | -0.022245 | NaN | 
| 4 | 2025-05-12 | 109417.27 | 100364.74 | 106992.94 | 3403.26 | 3121.69 | 3327.85 | 0.001164 | 0.001164 | NaN | 
# Convert the 'Date' column to datetime (if it's not already)
data_g['Date1'] = pd.to_datetime(data_g['Date'])
# Set the 'Date' column as the index
data_g.set_index('Date1', inplace=True)
# Calculate the mean by year and rename the 'Price' column to 'Mean'
df_1 = data_g.groupby(data_g.index.year)['Close (troy oz)'].mean().rename('Mean')
# Calculate the standard deviation by year and rename the 'Price' column to 'std'
df_std = data_g.groupby(data_g.index.year)['Close (troy oz)'].std().rename('std')
# Merge the mean and standard deviation DataFrames
df_1 = df_1.to_frame().merge(df_std.to_frame(), left_index=True, right_index=True)
# Calculate the coefficient of variation percentage
df_1['Cov_pct'] = ((df_1['std'] / df_1['Mean']) * 100).round(2)
# Display the first few rows
df_1.head()
| Mean | std | Cov_pct | |
|---|---|---|---|
| Date1 | |||
| 2020 | 1802.516667 | 139.883474 | 7.76 | 
| 2021 | 1791.037692 | 51.249610 | 2.86 | 
| 2022 | 1802.735000 | 99.041236 | 5.49 | 
| 2023 | 1957.726667 | 53.281227 | 2.72 | 
| 2024 | 2444.355000 | 210.947118 | 8.63 | 
x=data_g['Date']
y=data_g['Close (troy oz)']
# plotting line graph
fig = px.line(data_g, x = x, y =y)#, text=y)
# displaying data labels
fig.update_layout(
    yaxis=dict(range=[0, max(y) + 10]),  # Adjust max value as needed
    #title="Scatter Plot with Y-axis Starting at Zero",
    xaxis_title="Date",
    yaxis_title="Oun USD Dollar"
)
fig.update_layout(width=1000, height=700)  # Set figure size
fig.update_traces(texttemplate='%{text:.1s}', textposition='top center')
# displaying the graph
fig.show()
_ax = plt.subplots(figsize=(25, 8))  # Creates a figure and axes for the plot
sn.boxplot(x=data_g.index.year, y=data_g['Close (troy oz)'])  # Creates a boxplot using Seaborn
plt.title("Gold Prices Monthly Since 1950 and Onwards")  # Adds a title to the plot
plt.xlabel('Year')  # Sets the label for the x-axis
plt.ylabel('Price')  # Sets the label for the y-axis
plt.xticks(rotation=90)  # Rotates the x-axis labels by 90 degrees for better visibility
plt.grid(True)  # Enables grid lines on the plot
decomposition = seasonal_decompose(data_g['Close (troy oz)'], model='additive')
# Plot the decomposition results
decomposition.plot()
plt.show()
# Extract and analyze components
trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid
data_g.columns
Index(['Date', 'High (kg)', 'Low (kg)', 'Close (kg)', 'High (troy oz)',
       'Low (troy oz)', 'Close (troy oz)', 'Unnamed: 7', 'Daily_Log_Return',
       'Daily_Volatility'],
      dtype='object')
df=data_g[['Date','Close (troy oz)']]
month_plot(df.resample('M').mean())
plt.show()
#volatility analysis for gold prices
 # Plot daily log returns with rolling volatility
fig, ax1 = plt.subplots(figsize=(12, 6))
ax1.plot(data_g.index, data_g['Daily_Log_Return'], 'b-', label='Gold Daily Log Returns')
ax1.axhline(0, color='black', linestyle='--', linewidth=0.5)
ax1.set_xlabel('Date', fontsize=12)
ax1.set_ylabel('Log Return', fontsize=12, color='blue')
ax1.tick_params(axis='y', labelcolor='blue')
ax1.grid(True)
    
ax2 = ax1.twinx()
ax2.plot(data_g.index, data_g['Daily_Volatility'], 'r-', alpha=0.5, label='30-Day Rolling Volatility (Annualized)')
ax2.set_ylabel('Volatility', fontsize=12, color='red')
ax2.tick_params(axis='y', labelcolor='red')
  
plt.title('Daily Log Returns of Gold with 30-Day Rolling Volatility (Past 5 Years)', fontsize=14)
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(lines1 + lines2, labels1 + labels2, loc='upper left')
<matplotlib.legend.Legend at 0x1a90bd8f370>
data_g['YoY_Percent_Change'] = data_g['Close (troy oz)'].pct_change(periods=1) * 100
data_g['YoY_Percent_Change']
Date1
2025-09-09         NaN
2025-08-10   -7.624692
2025-07-11   -6.501113
2025-06-11   -2.199926
2025-05-12    0.116427
                ...   
2020-05-08   -5.630774
2020-04-08    1.715974
2020-03-09   -3.829641
2020-02-08    3.127483
2020-01-09   -7.659234
Name: YoY_Percent_Change, Length: 70, dtype: float64
x=data_g['Date']
y=data_g['YoY_Percent_Change']
# plotting line graph
fig = px.line(data_g, x = x, y =y)#, text=y)
# displaying data labels
fig.update_layout(
    yaxis=dict(range=[0, max(y) + 10]),  # Adjust max value as needed
    #title="Scatter Plot with Y-axis Starting at Zero",
    xaxis_title="Date",
    yaxis_title="% change"
)
fig.update_layout(width=900, height=700)  # Set figure size
fig.update_traces(texttemplate='%{text:.1s}', textposition='top center')
# displaying the graph
fig.show()
data_o.columns
Index(['observation_date', 'WTISPLC', 'Unnamed: 2', 'Unnamed: 3',
       'Unnamed: 4'],
      dtype='object')
x=data_o['observation_date']
y=data_o['WTISPLC']
# plotting line graph
fig = px.line(data_o, x = x, y =y)#, text=y)
# displaying data labels
fig.update_layout(
    yaxis=dict(range=[0, max(y) + 10]),  # Adjust max value as needed
    #title="Scatter Plot with Y-axis Starting at Zero",
    xaxis_title="Date",
    yaxis_title="Oil Prices USD"
)
fig.update_layout(width=900, height=700)  # Set figure size
fig.update_traces(texttemplate='%{text:.1s}', textposition='top center')
# displaying the graph
fig.show()
data_o['YoY_Percent_Change'] = data_o['WTISPLC'].pct_change(periods=1) * 100
data_o['YoY_Percent_Change']
0           NaN
1    -12.134910
2    -42.204195
3    -43.341321
4     72.567976
        ...    
63    -6.887456
64    -2.156122
65     9.650957
66     0.322723
67    -5.161573
Name: YoY_Percent_Change, Length: 68, dtype: float64
data_i.columns
Index(['observation_date', 'DTWEXBGS'], dtype='object')
x=data_i['observation_date']
y=data_i['DTWEXBGS']
# plotting line graph
fig = px.line(data_i, x = x, y =y)#, text=y)
# displaying data labels
fig.update_layout(
    yaxis=dict(range=[0, max(y) + 10]),  # Adjust max value as needed
    #title="Scatter Plot with Y-axis Starting at Zero",
    xaxis_title="Date",
    yaxis_title="USD index"
)
fig.update_layout(width=900, height=700)  # Set figure size
fig.update_traces(texttemplate='%{text:.1s}', textposition='top center')
# displaying the graph
fig.show()
data_i['YoY_Percent_Change'] = data_i['DTWEXBGS'].pct_change(periods=1) * 100
data_i['YoY_Percent_Change']
0            NaN
1            NaN
2       0.010089
3      -0.026960
4       0.166587
          ...   
1493    0.024539
1494   -0.157427
1495    0.454452
1496    0.299414
1497   -0.084323
Name: YoY_Percent_Change, Length: 1498, dtype: float64
data_all=pd.read_excel(r"C:\Users\Desktop\me_\Gold.xlsx",sheet_name="all")
data_all.corr()
| Ounce_price(USD) | Index | Oil | |
|---|---|---|---|
| Ounce_price(USD) | 1.000000 | 0.501699 | 0.405505 | 
| Index | 0.501699 | 1.000000 | 0.624673 | 
| Oil | 0.405505 | 0.624673 | 1.000000 | 
plt.figure(figsize=(10, 8))  # Set the size of the heat map
sn.heatmap(data_all.corr(), center=0, cmap="Blues",annot=True)
plt.show()