About the Assignment

This data set looks into the amount of hospital infections. This project, specifically looks into the average scores of hospital infections of each state to show which ones had the most infections.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

file_path = "U:\\"

Bar Chart

This bar chart looks into the average scores of all 50 states and shows that New York had the highest score.

df = pd.read_csv('/Healthcare_Associated_Infections_-_Hospital.csv')

df['Score'] = pd.to_numeric(df['Score'], errors='coerce')

state_scores = df.groupby('State')['Score'].mean().dropna()

state_scores = state_scores.sort_index()

plt.figure(figsize=(14, 6))
plt.bar(state_scores.index, state_scores.values)
plt.xlabel('State')
plt.ylabel('Average Score')
plt.title('Average Hospital Infection Scores by State')
plt.xticks(rotation=90);
plt.ylim(0, max(state_scores.values) * 1.1);  # Add buffer to y-axis
## (0.0, 5390.749749733334)
plt.grid(axis='y', linestyle='--', alpha=0.7);
plt.tight_layout();
plt.show()

Scatter Chart

This scatter plot shows a better range of the states with each of their scores

plt.figure(figsize=(14, 6))
plt.scatter(state_scores.index, state_scores.values)
plt.xlabel('State')
plt.ylabel('Average Score')
plt.title('Average Hospital Infection Scores by State')
plt.xticks(rotation=90);
plt.ylim(0, max(state_scores.values) * 1.1);  # Add buffer to y-axis
## (0.0, 5390.749749733334)
plt.grid(axis='y', linestyle='--', alpha=0.7);
plt.tight_layout();
plt.show()

Horizontal Bar Chart

This horizontal bar chart looks into the average scores of all 50 states and shows that New York had the highest score.

import matplotlib.pyplot as plt

plt.figure(figsize=(10, 12))
plt.barh(state_scores.index, state_scores.values)
plt.ylabel('State')
plt.xlabel('Average Score')
plt.title('Average Hospital Infection Scores by State')
plt.xlim(0, max(state_scores.values) * 1.1)  # Add buffer to x-axis
## (0.0, 5390.749749733334)
plt.grid(axis='x', linestyle='--', alpha=0.7)

# Add labels to each bar
for i, (score, state) in enumerate(zip(state_scores.values, state_scores.index)):
    plt.text(score + 0.1, i, f'{score:.2f}', va='center')

plt.tight_layout()
plt.show()

Heatmap

This heat map shows which state had higher scores with red being higher and blue being lower.

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

heatmap_data = pd.DataFrame(state_scores).T 

plt.figure(figsize=(14, 2)) 
sns.heatmap(heatmap_data, annot=True, fmt=".2f", cmap="coolwarm", cbar_kws={'label': 'Average Score'})

plt.title('Average Hospital Infection Scores by State (Heatmap)')
plt.yticks(rotation=0)  
## (array([0.5]), [Text(0, 0.5, 'Score')])
plt.xticks(rotation=90)  
## (array([ 0.5,  1.5,  2.5,  3.5,  4.5,  5.5,  6.5,  7.5,  8.5,  9.5, 10.5,
##        11.5, 12.5, 13.5, 14.5, 15.5, 16.5, 17.5, 18.5, 19.5, 20.5, 21.5,
##        22.5, 23.5, 24.5, 25.5, 26.5, 27.5, 28.5, 29.5, 30.5, 31.5, 32.5,
##        33.5, 34.5, 35.5, 36.5, 37.5, 38.5, 39.5, 40.5, 41.5, 42.5, 43.5,
##        44.5, 45.5, 46.5, 47.5, 48.5, 49.5, 50.5, 51.5, 52.5, 53.5]), [Text(0.5, 0, 'AK'), Text(1.5, 0, 'AL'), Text(2.5, 0, 'AR'), Text(3.5, 0, 'AZ'), Text(4.5, 0, 'CA'), Text(5.5, 0, 'CO'), Text(6.5, 0, 'CT'), Text(7.5, 0, 'DC'), Text(8.5, 0, 'DE'), Text(9.5, 0, 'FL'), Text(10.5, 0, 'GA'), Text(11.5, 0, 'GU'), Text(12.5, 0, 'HI'), Text(13.5, 0, 'IA'), Text(14.5, 0, 'ID'), Text(15.5, 0, 'IL'), Text(16.5, 0, 'IN'), Text(17.5, 0, 'KS'), Text(18.5, 0, 'KY'), Text(19.5, 0, 'LA'), Text(20.5, 0, 'MA'), Text(21.5, 0, 'MD'), Text(22.5, 0, 'ME'), Text(23.5, 0, 'MI'), Text(24.5, 0, 'MN'), Text(25.5, 0, 'MO'), Text(26.5, 0, 'MS'), Text(27.5, 0, 'MT'), Text(28.5, 0, 'NC'), Text(29.5, 0, 'ND'), Text(30.5, 0, 'NE'), Text(31.5, 0, 'NH'), Text(32.5, 0, 'NJ'), Text(33.5, 0, 'NM'), Text(34.5, 0, 'NV'), Text(35.5, 0, 'NY'), Text(36.5, 0, 'OH'), Text(37.5, 0, 'OK'), Text(38.5, 0, 'OR'), Text(39.5, 0, 'PA'), Text(40.5, 0, 'PR'), Text(41.5, 0, 'RI'), Text(42.5, 0, 'SC'), Text(43.5, 0, 'SD'), Text(44.5, 0, 'TN'), Text(45.5, 0, 'TX'), Text(46.5, 0, 'UT'), Text(47.5, 0, 'VA'), Text(48.5, 0, 'VI'), Text(49.5, 0, 'VT'), Text(50.5, 0, 'WA'), Text(51.5, 0, 'WI'), Text(52.5, 0, 'WV'), Text(53.5, 0, 'WY')])
plt.tight_layout()
plt.show()

Waterfall Diagram

This waterfall diagram shows the hospital infections scores by a range of states

import matplotlib.pyplot as plt
import numpy as np

values = np.asarray(state_scores.values).flatten()
labels = state_scores.index
cumulative = [0]
for val in values[:-1]:  
    cumulative.append(cumulative[-1] + val)

colors = ['green' if v >= 0 else 'red' for v in values]
colors[0] = 'blue'  
colors[-1] = 'blue'  

fig, ax = plt.subplots(figsize=(12, 8))

for i in range(len(values)):
    left = cumulative[i]
    width = values[i]
    ax.barh(y=i, width=width, left=left, color=colors[i], edgecolor='black')
    ax.text(left + width / 2, i, f'{values[i]:.2f}', va='center', ha='center', color='white', fontweight='bold')

ax.set_yticks(range(len(labels)))
ax.set_yticklabels(labels)
ax.set_xlabel('Cumulative Score')
ax.set_title('Waterfall Diagram: Hospital Infection Score Changes by State')
ax.grid(axis='x', linestyle='--', alpha=0.7)

plt.tight_layout()
plt.show()