chi-squared-goodness-fit.py

import pandas as pd
import numpy as np
import plotly.graph_objects as go
from scipy.stats import chisquare

# Load the dataset
file_path = 'C:\\Users\\loydt\\Downloads\\Superstore Sales Dataset.csv'
data = pd.read_csv(file_path)

# Count observed frequencies of customer segments
observed_counts = data['Segment'].value_counts()

# Define the expected distribution (equal for simplicity)
n_segments = len(observed_counts)
total_counts = observed_counts.sum()
expected_counts = [total_counts / n_segments] * n_segments

# Chi-squared goodness-of-fit test
chi2_stat, p_value = chisquare(observed_counts, expected_counts)

# Create the bar plot with Plotly
fig = go.Figure()

# Add observed and expected bars
fig.add_trace(go.Bar(
    x=observed_counts.index,
    y=observed_counts.values,
    name='Observed',
    marker_color='darkorange'
))

fig.add_trace(go.Bar(
    x=observed_counts.index,
    y=expected_counts,
    name='Expected',
    marker_color='darkslateblue'
))

# Customize layout
fig.update_layout(
    title='Observed vs. Expected Frequencies of Customer Segments',
    xaxis_title='Customer Segment',
    yaxis_title='Frequency',
    plot_bgcolor='grey',  # Set the plot background color to black
    paper_bgcolor='black',  # Set the entire figure background to black
    font=dict(color='ghostwhite'),  # Set font color to white for contrast
    barmode='group',  # Group the bars together
)

# Show the plot
fig.show()