This data set looks into the amount of hospital infections. This project, specifically looks into the average scores of hospital infections of each state to show which ones had the most infections.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
file_path = "U:\\"
This bar chart looks into the average scores of all 50 states and shows that New York had the highest score.
df = pd.read_csv('/Healthcare_Associated_Infections_-_Hospital.csv')
df['Score'] = pd.to_numeric(df['Score'], errors='coerce')
state_scores = df.groupby('State')['Score'].mean().dropna()
state_scores = state_scores.sort_index()
plt.figure(figsize=(14, 6))
plt.bar(state_scores.index, state_scores.values)
plt.xlabel('State')
plt.ylabel('Average Score')
plt.title('Average Hospital Infection Scores by State')
plt.xticks(rotation=90);
plt.ylim(0, max(state_scores.values) * 1.1); # Add buffer to y-axis
## (0.0, 5390.749749733334)
plt.grid(axis='y', linestyle='--', alpha=0.7);
plt.tight_layout();
plt.show()
This scatter plot shows a better range of the states with each of their scores
plt.figure(figsize=(14, 6))
plt.scatter(state_scores.index, state_scores.values)
plt.xlabel('State')
plt.ylabel('Average Score')
plt.title('Average Hospital Infection Scores by State')
plt.xticks(rotation=90);
plt.ylim(0, max(state_scores.values) * 1.1); # Add buffer to y-axis
## (0.0, 5390.749749733334)
plt.grid(axis='y', linestyle='--', alpha=0.7);
plt.tight_layout();
plt.show()
This horizontal bar chart looks into the average scores of all 50 states and shows that New York had the highest score.
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 12))
plt.barh(state_scores.index, state_scores.values)
plt.ylabel('State')
plt.xlabel('Average Score')
plt.title('Average Hospital Infection Scores by State')
plt.xlim(0, max(state_scores.values) * 1.1) # Add buffer to x-axis
## (0.0, 5390.749749733334)
plt.grid(axis='x', linestyle='--', alpha=0.7)
# Add labels to each bar
for i, (score, state) in enumerate(zip(state_scores.values, state_scores.index)):
plt.text(score + 0.1, i, f'{score:.2f}', va='center')
plt.tight_layout()
plt.show()
This heat map shows which state had higher scores with red being higher and blue being lower.
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
heatmap_data = pd.DataFrame(state_scores).T
plt.figure(figsize=(14, 2))
sns.heatmap(heatmap_data, annot=True, fmt=".2f", cmap="coolwarm", cbar_kws={'label': 'Average Score'})
plt.title('Average Hospital Infection Scores by State (Heatmap)')
plt.yticks(rotation=0)
## (array([0.5]), [Text(0, 0.5, 'Score')])
plt.xticks(rotation=90)
## (array([ 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5,
## 11.5, 12.5, 13.5, 14.5, 15.5, 16.5, 17.5, 18.5, 19.5, 20.5, 21.5,
## 22.5, 23.5, 24.5, 25.5, 26.5, 27.5, 28.5, 29.5, 30.5, 31.5, 32.5,
## 33.5, 34.5, 35.5, 36.5, 37.5, 38.5, 39.5, 40.5, 41.5, 42.5, 43.5,
## 44.5, 45.5, 46.5, 47.5, 48.5, 49.5, 50.5, 51.5, 52.5, 53.5]), [Text(0.5, 0, 'AK'), Text(1.5, 0, 'AL'), Text(2.5, 0, 'AR'), Text(3.5, 0, 'AZ'), Text(4.5, 0, 'CA'), Text(5.5, 0, 'CO'), Text(6.5, 0, 'CT'), Text(7.5, 0, 'DC'), Text(8.5, 0, 'DE'), Text(9.5, 0, 'FL'), Text(10.5, 0, 'GA'), Text(11.5, 0, 'GU'), Text(12.5, 0, 'HI'), Text(13.5, 0, 'IA'), Text(14.5, 0, 'ID'), Text(15.5, 0, 'IL'), Text(16.5, 0, 'IN'), Text(17.5, 0, 'KS'), Text(18.5, 0, 'KY'), Text(19.5, 0, 'LA'), Text(20.5, 0, 'MA'), Text(21.5, 0, 'MD'), Text(22.5, 0, 'ME'), Text(23.5, 0, 'MI'), Text(24.5, 0, 'MN'), Text(25.5, 0, 'MO'), Text(26.5, 0, 'MS'), Text(27.5, 0, 'MT'), Text(28.5, 0, 'NC'), Text(29.5, 0, 'ND'), Text(30.5, 0, 'NE'), Text(31.5, 0, 'NH'), Text(32.5, 0, 'NJ'), Text(33.5, 0, 'NM'), Text(34.5, 0, 'NV'), Text(35.5, 0, 'NY'), Text(36.5, 0, 'OH'), Text(37.5, 0, 'OK'), Text(38.5, 0, 'OR'), Text(39.5, 0, 'PA'), Text(40.5, 0, 'PR'), Text(41.5, 0, 'RI'), Text(42.5, 0, 'SC'), Text(43.5, 0, 'SD'), Text(44.5, 0, 'TN'), Text(45.5, 0, 'TX'), Text(46.5, 0, 'UT'), Text(47.5, 0, 'VA'), Text(48.5, 0, 'VI'), Text(49.5, 0, 'VT'), Text(50.5, 0, 'WA'), Text(51.5, 0, 'WI'), Text(52.5, 0, 'WV'), Text(53.5, 0, 'WY')])
plt.tight_layout()
plt.show()
This waterfall diagram shows the hospital infections scores by a range of states
import matplotlib.pyplot as plt
import numpy as np
values = np.asarray(state_scores.values).flatten()
labels = state_scores.index
cumulative = [0]
for val in values[:-1]:
cumulative.append(cumulative[-1] + val)
colors = ['green' if v >= 0 else 'red' for v in values]
colors[0] = 'blue'
colors[-1] = 'blue'
fig, ax = plt.subplots(figsize=(12, 8))
for i in range(len(values)):
left = cumulative[i]
width = values[i]
ax.barh(y=i, width=width, left=left, color=colors[i], edgecolor='black')
ax.text(left + width / 2, i, f'{values[i]:.2f}', va='center', ha='center', color='white', fontweight='bold')
ax.set_yticks(range(len(labels)))
ax.set_yticklabels(labels)
ax.set_xlabel('Cumulative Score')
ax.set_title('Waterfall Diagram: Hospital Infection Score Changes by State')
ax.grid(axis='x', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()