import numpy as npimport pandas as pdimport matplotlib.pyplot as pltimport seaborn as snsimport statsmodels.api as smdf = pd.read_csv("D:/Users/User/Documents/Application Harvard University/Harvard University 2025/MCSO Fall 2025/CSO 707 Quantitative Science and Clinical Research in Healthcare Service Operations/ED_LOS.csv")df.info()df.head()x
df["Network"].unique()## Perform Linear Regressionfrom sklearn.linear_model import LinearRegressionx = df["ALOS_Avg"]y = df["EDOS_Avg"] plt.plot(x, y, 'o', zorder = 4, label="df")x = df["ALOS_Avg"]y = df["EDOS_Avg"]# 2. Set the style for better visualization (optional)sns.set_style("whitegrid")# 3. Create the plot# 'ci=95' adds a 95% confidence interval band around the regression line.plt.figure(figsize=(8, 5))sns.regplot(x= df["ALOS_Avg"], y=df["EDOS_Avg"], data=df, ci=95, scatter_kws={'color': 'darkblue'}, line_kws={'color': 'red'})# 4. Add titles and labelsplt.title('Regression Plot of ALOS vs EDOS')plt.xlabel('ALOS')plt.ylabel('EDOS')# 5. Display the plotplt.show()sns.lmplot(data = df, x="ALOS_Avg", y="EDOS_Avg")# Visualizing the Relationship between ALOS and EDOS for each category of Networksns.lmplot(data = df, x='ALOS_Avg', y='EDOS_Avg', hue='Network')plt.show()xxxxxxxxxxdf["Network"].nunique()x
# Distribution of ALOS# Plotting the distributionplt.figure(figsize=(8, 5))sns.histplot(df['ALOS_Avg'], kde=True) # kde=True adds a Kernel Density Estimate lineplt.title('Distribution of ALOS')plt.xlabel('Avg ALOS')plt.ylabel('Frequency/Density')ALOS_mean = df['ALOS_Avg'].mean()ALOS_std = df['ALOS_Avg'].std()# Optional: Add vertical lines for mean and +/- 1 standard deviationplt.axvline(ALOS_mean, color='r', linestyle='--', label=f'Mean: {ALOS_mean:.2f}')plt.axvline(ALOS_mean - ALOS_std, color='g', linestyle=':', label=f'-1 Std Dev: {ALOS_mean - ALOS_std:.2f}')plt.axvline(ALOS_mean + ALOS_std, color='g', linestyle=':', label=f'+1 Std Dev: {ALOS_mean + ALOS_std:.2f}')plt.legend()xxxxxxxxxx# Distribution of ALOS# Plotting the distributionplt.figure(figsize=(8, 5))sns.histplot(df['EDOS_Avg'], kde=True) # kde=True adds a Kernel Density Estimate lineplt.title('Distribution of EDOS')plt.xlabel('Avg EDOS')plt.ylabel('Frequency/Density')EDOS_mean = df['EDOS_Avg'].mean()EDOS_std = df['EDOS_Avg'].std()# Optional: Add vertical lines for mean and +/- 1 standard deviationplt.axvline(EDOS_mean, color='r', linestyle='--', label=f'Mean: {EDOS_mean:.2f}')plt.axvline(EDOS_mean - EDOS_std, color='g', linestyle=':', label=f'-1 Std Dev: {EDOS_mean - EDOS_std:.2f}')plt.axvline(EDOS_mean + 2*EDOS_std, color='g', linestyle=':', label=f'+2 Std Dev: {EDOS_mean + 2*EDOS_std:.2f}')plt.legend()# Making waterfall charts!pip install waterfallchartsimport waterfall_chart as wcimport matplotlib.pyplot as pltfrom waterfall_chart import plot as waterfallfrom waterfallcharts import quick_charts as qcx
data = { 'Hospital': df['Hospital'], 'ALOS_Avg': df['ALOS_Avg']}dfw = pd.DataFrame(data)dfw.info()x
import pandas as pdimport plotly.graph_objects as goimport plotly.express as px# Sample datadata = { 'Hospital': df['Hospital'], 'ALOS_Avg': df['ALOS_Avg']}dfw = pd.DataFrame(data)# Add the "Total" rows in their appropriate place within the dataframe# For a simple cumulative total, you can add a final 'Total' category# If specific intermediate totals are needed, a "measure" list is usedfig = go.Figure(go.Waterfall( name="ALOS", orientation="v", measure=["relative", "relative"], # All are changes relative to the previous x=dfw['Hospital'], y=dfw['ALOS_Avg'], connector={"line": {"color": "rgb(10, 20, 30)"}}, increasing = {"marker":{"color":"Teal"}},))fig.update_layout(title="Siloam Hospitals: Avg ALOS for 2024", xaxis_title="Hospital", yaxis_title = "Average Length of Stay", showlegend=True)fig.show()# Save the figure as an HTML filefig.write_html("Siloam Hospitals: Avg ALOS for 2024.html")import pandas as pdimport plotly.graph_objects as goimport plotly.express as px# Sample datadata = { 'Hospital': df['Hospital'], 'EDOS_Avg': df['EDOS_Avg']}dfw = pd.DataFrame(data)# Add the "Total" rows in their appropriate place within the dataframe# For a simple cumulative total, you can add a final 'Total' category# If specific intermediate totals are needed, a "measure" list is usedfig = go.Figure(go.Waterfall( name="EDOS", orientation="v", measure=["relative", "relative"], # All are changes relative to the previous x=dfw['Hospital'], y=dfw['EDOS_Avg'], connector={"line": {"color": "rgb(160, 160, 160)"}}, increasing = {"marker":{"color":"rgb(255, 102, 102)"}},))fig.update_layout(title="Siloam Hospitals: Avg EDOS for 2024", xaxis_title="Hospital", yaxis_title = "ED Overcrowding Score", showlegend=True)fig.show()