import pandas as pd
import numpy as np
import plotly.graph_objects as go
from scipy.stats import chisquare
# Load the dataset
file_path = 'C:\\Users\\loydt\\Downloads\\Superstore Sales Dataset.csv'
data = pd.read_csv(file_path)
# Count observed frequencies of customer segments
observed_counts = data['Segment'].value_counts()
# Define the expected distribution (equal for simplicity)
n_segments = len(observed_counts)
total_counts = observed_counts.sum()
expected_counts = [total_counts / n_segments] * n_segments
# Chi-squared goodness-of-fit test
chi2_stat, p_value = chisquare(observed_counts, expected_counts)
# Create the bar plot with Plotly
fig = go.Figure()
# Add observed and expected bars
fig.add_trace(go.Bar(
x=observed_counts.index,
y=observed_counts.values,
name='Observed',
marker_color='darkorange'
))
fig.add_trace(go.Bar(
x=observed_counts.index,
y=expected_counts,
name='Expected',
marker_color='darkslateblue'
))
# Customize layout
fig.update_layout(
title='Observed vs. Expected Frequencies of Customer Segments',
xaxis_title='Customer Segment',
yaxis_title='Frequency',
plot_bgcolor='grey', # Set the plot background color to black
paper_bgcolor='black', # Set the entire figure background to black
font=dict(color='ghostwhite'), # Set font color to white for contrast
barmode='group', # Group the bars together
)
# Show the plot
fig.show()