#add these two lines underneath the chunk where you have included the use_python line.
import os
os.environ['QT_QPA_PLATFORM_PLUGIN_PATH'] = 'C:/ProgramData/Anaconda3/Library/plugins/platforms'
import folium
import seaborn as sns
import matplotlib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
path = "C:/Users/srich/OneDrive/Documents/IS 460 Data Visualization SP24/"
filename = path + "National_Obesity_By_State.csv"
df = pd.read_csv(filename)
#print(df.columns)
#data reports obesity percentage within population by state
#includes Puerto Rico and Washington DC
df_sorted = df.sort_values(by='Obesity', ascending=False)
df.rename(columns={'Obesity': 'Obesity Rate %', 'NAME': 'State'}, inplace=True)
#calcualte avg obesity rate among all states
avg_obesity_rate = df['Obesity Rate %'].mean()
ObesityRanks = df.sort_values(by="Obesity Rate %", ascending=False)
#ObesityRanks
This bar chart visualization portrays the all the states with the highest obesity rate % descending left to right. It also adds the total average to better understand the states with more obese population.
#create visualization 1 bar chart
plt.figure(figsize=(24,12))
plt.bar(ObesityRanks['State'], ObesityRanks['Obesity Rate %'], color='maroon',label = 'Obesity Rate %')
plt.axhline(y=avg_obesity_rate, color='black', linestyle='-', linewidth=2, label='Average Obesity Rate')
plt.text(len(ObesityRanks) + 2.2, avg_obesity_rate, '{:.2f}%'.format(avg_obesity_rate), va='center', ha='left')
plt.xlabel('STATES', fontsize=14)
plt.ylabel('Obesity Rate %', fontsize=14)
plt.title('Obesity Rates by States compared to Avg',fontsize=16)
plt.xticks(rotation=55, ha='right')
## ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51], [Text(0, 0, 'Louisiana'), Text(1, 0, 'Alabama'), Text(2, 0, 'Mississippi'), Text(3, 0, 'West Virginia'), Text(4, 0, 'Kentucky'), Text(5, 0, 'Arkansas'), Text(6, 0, 'Kansas'), Text(7, 0, 'Oklahoma'), Text(8, 0, 'Tennessee'), Text(9, 0, 'Texas'), Text(10, 0, 'Missouri'), Text(11, 0, 'Iowa'), Text(12, 0, 'South Carolina'), Text(13, 0, 'Nebraska'), Text(14, 0, 'Indiana'), Text(15, 0, 'Michigan'), Text(16, 0, 'North Dakota'), Text(17, 0, 'Illinois'), Text(18, 0, 'Wisconsin'), Text(19, 0, 'Georgia'), Text(20, 0, 'South Dakota'), Text(21, 0, 'North Carolina'), Text(22, 0, 'Oregon'), Text(23, 0, 'Pennsylvania'), Text(24, 0, 'Maine'), Text(25, 0, 'Ohio'), Text(26, 0, 'Alaska'), Text(27, 0, 'Delaware'), Text(28, 0, 'Puerto Rico'), Text(29, 0, 'Virginia'), Text(30, 0, 'Wyoming'), Text(31, 0, 'Maryland'), Text(32, 0, 'New Mexico'), Text(33, 0, 'Idaho'), Text(34, 0, 'Arizona'), Text(35, 0, 'Florida'), Text(36, 0, 'Nevada'), Text(37, 0, 'Washington'), Text(38, 0, 'New Hampshire'), Text(39, 0, 'Minnesota'), Text(40, 0, 'Rhode Island'), Text(41, 0, 'New Jersey'), Text(42, 0, 'Connecticut'), Text(43, 0, 'Vermont'), Text(44, 0, 'New York'), Text(45, 0, 'Utah'), Text(46, 0, 'Massachusetts'), Text(47, 0, 'California'), Text(48, 0, 'Montana'), Text(49, 0, 'Hawaii'), Text(50, 0, 'District of Columbia'), Text(51, 0, 'Colorado')])
plt.legend()
plt.show()
This visualization provides overview of the states with the lowest obesity rate %. The stacked bar charts ranks the top 25 states with the lowest obesity rates according to the data.
#visualization 2
top_25_healthiest_states = ObesityRanks.tail(25)
# Create visualization
plt.figure(figsize=(24, 12))
# Create horizontal bar chart
plt.barh(top_25_healthiest_states['State'], top_25_healthiest_states['Obesity Rate %'], color='darkgreen')
# Customize labels and title
plt.xlabel('Obesity Rating %', fontsize=14)
plt.ylabel('States', fontsize=14)
plt.title('Top 25 Healthiest States based on Lowest Obesity Rating', fontsize=16)
plt.tight_layout()
plt.show()
This pie chart visualization required creating a new column for my data frame called Obesity Category. I created three categories for the obesity rates. High Obesity states have rates of 30% or higher. Moderate Obesity states have rates between 29.9-24%. Low Obesity states have rates less than 24%. This pie charts show the total number of states in each category.
#create visualization 3
def categorize_obesity_rate(obesity_rate):
if obesity_rate >= 30:
return 'High Obesity'
elif obesity_rate <= 29.9 and obesity_rate >=24:
return 'Moderate Obesity'
elif obesity_rate <24:
return 'Low Obesity'
# Apply the function to create a new column 'Obesity Category'
ObesityRanks['Obesity Category'] = ObesityRanks['Obesity Rate %'].apply(categorize_obesity_rate)
#Print the DataFrame with the new column
#ObesityRanks
category_counts = ObesityRanks['Obesity Category'].value_counts()
# Create the pie chart
fig, ax = plt.subplots()
colors = ['red', 'orange', 'green']
wedges, texts, autotexts = ax.pie(category_counts, labels=category_counts.index, labeldistance=1.1,
autopct=lambda pct: f"{pct:.1f}%\n({int(pct/100*sum(category_counts))})", startangle=90, colors=colors, wedgeprops=dict(linewidth=1, edgecolor='w'))
plt.title('Distribution of States based on Obesity Category')
# Show the plot
ax.axis('equal')
## (-1.0999987365618598, 1.0999860816892946, -1.0999972590152571, 1.099999869476917)
plt.tight_layout()
plt.show()
For this visualization, I created another new column called Region. I organized the states into their respective regions, such as Midwest, Northeast, Southeast, Southwest, West, and Puerto Rico as its own region. identifies the percentage and frequency of battery vs. plug-in hybrid vehicles within the Electric Vehicles data in Washington. The heatmap illustrates the states and their heatmaps within their respective regions.
#Visual 4 heatmap
#add new column for regions
regions = {
'Northeast': ['Maine', 'New Hampshire', 'Vermont', 'Delaware', 'Massachusetts', 'Rhode Island', 'Connecticut', 'New York', 'New Jersey', 'Pennsylvania'],
'Southeast': ['District of Columbia','West Virginia', 'Maryland', 'Virginia', 'North Carolina', 'South Carolina', 'Georgia', 'Florida', 'Kentucky', 'Tennessee', 'Mississippi', 'Alabama', 'Arkansas', 'Louisiana'],
'Southwest':['Texas', 'Oklahoma', 'Arizona', 'New Mexico', 'Nevada'],
'Midwest': ['Ohio', 'Indiana', 'Michigan', 'Illinois', 'Missouri', 'Wisconsin', 'Minnesota', 'Iowa', 'Kansas', 'Nebraska', 'South Dakota', 'North Dakota'],
'West': ['Montana', 'Idaho', 'Wyoming', 'Colorado', 'Utah', 'California', 'Oregon', 'Washington', 'Alaska', 'Hawaii'],
'Puerto Rico': ['Puerto Rico'], # Treat Puerto Rico as a separate region
}
#map states to regions
state_to_region = {state: region for region, states in regions.items() for state in states}
# Add a new column to the DataFrame indicating the region for each state
df['Region'] = df['State'].map(state_to_region)
# Find the state with the highest obesity rate in each region
highest_obesity_by_region = df.loc[df.groupby('Region')['Obesity Rate %'].idxmax()]
#print(highest_obesity_by_region[['Region', 'State', 'Obesity Rate %']])
# Calculate the mean obesity rate for each region
region_obesity_avg = df.groupby('Region')['Obesity Rate %'].mean().reset_index()
# Find the region with the highest average obesity rate
highest_avg_region = region_obesity_avg.loc[region_obesity_avg['Obesity Rate %'].idxmax()]
#print("Region with the highest average obesity rate:", highest_avg_region['Region'])
#add new column to ObesityRanks data frame
ObesityRanks['Region'] = ObesityRanks['State'].map(state_to_region)
#ObesityRanks
#clean up data frame to remove unecessary columns
ObesityRanks_modified = ObesityRanks.drop(['SHAPE_Length', 'SHAPE_Area'], axis=1)
#ObesityRanks_modified
#import seaborn as sns
heatmap_data = ObesityRanks_modified.pivot_table(index='Region', columns='State', values='Obesity Rate %')
#Plot the heatmap
plt.figure(figsize=(30, 25))
ax = sns.heatmap(heatmap_data, linewidths=0.5, annot=True, cmap='YlOrBr', fmt=".2f",
square=True, annot_kws={'size': 12}, cbar_kws={'orientation': 'horizontal'})
plt.title('Obesity Rate by State and Region', pad=20, fontsize=22)
plt.xlabel('State', labelpad=20, fontsize=18)
plt.ylabel('Region', labelpad=20, fontsize=18)
#Rotate x-axis labels for better visibility
plt.xticks(rotation=60)
## (array([ 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5,
## 11.5, 12.5, 13.5, 14.5, 15.5, 16.5, 17.5, 18.5, 19.5, 20.5, 21.5,
## 22.5, 23.5, 24.5, 25.5, 26.5, 27.5, 28.5, 29.5, 30.5, 31.5, 32.5,
## 33.5, 34.5, 35.5, 36.5, 37.5, 38.5, 39.5, 40.5, 41.5, 42.5, 43.5,
## 44.5, 45.5, 46.5, 47.5, 48.5, 49.5, 50.5, 51.5]), [Text(0.5, 0, 'Alabama'), Text(1.5, 0, 'Alaska'), Text(2.5, 0, 'Arizona'), Text(3.5, 0, 'Arkansas'), Text(4.5, 0, 'California'), Text(5.5, 0, 'Colorado'), Text(6.5, 0, 'Connecticut'), Text(7.5, 0, 'Delaware'), Text(8.5, 0, 'District of Columbia'), Text(9.5, 0, 'Florida'), Text(10.5, 0, 'Georgia'), Text(11.5, 0, 'Hawaii'), Text(12.5, 0, 'Idaho'), Text(13.5, 0, 'Illinois'), Text(14.5, 0, 'Indiana'), Text(15.5, 0, 'Iowa'), Text(16.5, 0, 'Kansas'), Text(17.5, 0, 'Kentucky'), Text(18.5, 0, 'Louisiana'), Text(19.5, 0, 'Maine'), Text(20.5, 0, 'Maryland'), Text(21.5, 0, 'Massachusetts'), Text(22.5, 0, 'Michigan'), Text(23.5, 0, 'Minnesota'), Text(24.5, 0, 'Mississippi'), Text(25.5, 0, 'Missouri'), Text(26.5, 0, 'Montana'), Text(27.5, 0, 'Nebraska'), Text(28.5, 0, 'Nevada'), Text(29.5, 0, 'New Hampshire'), Text(30.5, 0, 'New Jersey'), Text(31.5, 0, 'New Mexico'), Text(32.5, 0, 'New York'), Text(33.5, 0, 'North Carolina'), Text(34.5, 0, 'North Dakota'), Text(35.5, 0, 'Ohio'), Text(36.5, 0, 'Oklahoma'), Text(37.5, 0, 'Oregon'), Text(38.5, 0, 'Pennsylvania'), Text(39.5, 0, 'Puerto Rico'), Text(40.5, 0, 'Rhode Island'), Text(41.5, 0, 'South Carolina'), Text(42.5, 0, 'South Dakota'), Text(43.5, 0, 'Tennessee'), Text(44.5, 0, 'Texas'), Text(45.5, 0, 'Utah'), Text(46.5, 0, 'Vermont'), Text(47.5, 0, 'Virginia'), Text(48.5, 0, 'Washington'), Text(49.5, 0, 'West Virginia'), Text(50.5, 0, 'Wisconsin'), Text(51.5, 0, 'Wyoming')])
#Adjust layout to prevent clipping of labels
plt.tight_layout()
#edit colorbar
cbar = ax.collections[0].colorbar
max_count = 37
min_count = 21
my_colorbar_ticks = [*range(min_count, max_count, 1)] # Adjusting tick frequency
cbar.set_ticks(my_colorbar_ticks)
cbar.set_label('Obesity Rate %', labelpad=20, fontsize=18)
plt.show()
#end of viz 4
This map with markers allows the user to see where the state is located and its obesity rate % and obesity category.
#Visual 5
import folium
ob_mapdf = ObesityRanks
#Manually define latitude and longitude for each state
#coordinates are the center of each state according to google
latitude = [30.5191, 32.3182, 32.3437, 38.5976,37.8393, 35.2010, 39.0119, 35.0078, 35.5175, 31.9686, 37.9643, 42.0167, 33.8361, 41.4925, 40.5512, 44.3148, 47.1164, 40.6331, 43.7844, 32.1574, 43.9695, 35.7596, 43.8041, 40.8766, 45.2538, 40.4173, 63.5888, 38.9108, 18.2208, 37.4316, 43.0760, 39.0458, 34.9727, 44.0682, 34.0489, 27.6648, 38.8026, 47.7511, 43.1939, 46.7296, 41.5801, 40.0583, 41.6032, 44.5588, 43.2994, 39.3210, 42.4072, 36.7783, 46.8797, 19.8987, 38.9072, 39.5501]
longitude = [-91.5209, -86.9023, -89.3985, -80.4549, -84.2700, -91.8318, -98.4842, -97.0929, -86.5804, -99.9018, -91.8318, -93.1635, -81.1637, -99.9018, -85.6024, -85.6024, -101.2996, -89.3985, -88.7879, -82.9071, -99.9018, -79.0193, -120.5542, -77.8367, -69.4455, -82.9071, -154.4931, -75.5277, -66.5901, -78.6569, -107.2903, -76.6413, -105.0324, -114.7420, -111.0937, -81.5158, -116.4194, -120.7401, -71.5724, -94.6859, -71.4774, -74.4057, -73.0877, -72.5778, -74.2179, -111.0937, -71.3824, -119.4179, -110.3626, -155.6659, -77.0369, -105.7821]
#Add latitude and longitude columns to your DataFrame
ob_mapdf['Latitude'] = latitude
ob_mapdf['Longitude'] = longitude
#Display the updated DataFrame
#ob_mapdf
# Create map
my_map = folium.Map(location=[37.0902, -95.7129], zoom_start=4)
# Iterate through each row in the DataFrame and add a marker for each state
for index, row in ob_mapdf.iterrows():
state = row['State']
obesity_rate = row['Obesity Rate %']
obesity_category = row['Obesity Category']
latitude = row['Latitude']
longitude = row['Longitude']
#Define the marker tooltip with state name and obesity rate
tooltip = f"{state}: {obesity_rate}%,<br> {obesity_category}"
# Add a marker to the map
folium.CircleMarker([latitude, longitude],radius=5,tooltip=tooltip,color='red',fill=True,fill_color='red').add_to(my_map)
## <folium.vector_layers.CircleMarker object at 0x00000228CD1B1D50>
## <folium.vector_layers.CircleMarker object at 0x00000228CD18B710>
## <folium.vector_layers.CircleMarker object at 0x00000228CE0D32D0>
## <folium.vector_layers.CircleMarker object at 0x00000228C6A2BE50>
## <folium.vector_layers.CircleMarker object at 0x00000228CD1C9B90>
## <folium.vector_layers.CircleMarker object at 0x00000228CB404510>
## <folium.vector_layers.CircleMarker object at 0x00000228CD1BB6D0>
## <folium.vector_layers.CircleMarker object at 0x00000228C4EA6810>
## <folium.vector_layers.CircleMarker object at 0x00000228CD19F8D0>
## <folium.vector_layers.CircleMarker object at 0x00000228CD1C4A50>
## <folium.vector_layers.CircleMarker object at 0x00000228CD1E9C10>
## <folium.vector_layers.CircleMarker object at 0x00000228CD1DFA90>
## <folium.vector_layers.CircleMarker object at 0x00000228CD1D1010>
## <folium.vector_layers.CircleMarker object at 0x00000228CD1D1190>
## <folium.vector_layers.CircleMarker object at 0x00000228CD17EB90>
## <folium.vector_layers.CircleMarker object at 0x00000228CD17EA10>
## <folium.vector_layers.CircleMarker object at 0x00000228CD1E3310>
## <folium.vector_layers.CircleMarker object at 0x00000228CD1E1B10>
## <folium.vector_layers.CircleMarker object at 0x00000228CD1E2A50>
## <folium.vector_layers.CircleMarker object at 0x00000228CD1E3F50>
## <folium.vector_layers.CircleMarker object at 0x00000228CD207010>
## <folium.vector_layers.CircleMarker object at 0x00000228CD204A10>
## <folium.vector_layers.CircleMarker object at 0x00000228CD204AD0>
## <folium.vector_layers.CircleMarker object at 0x00000228CB425890>
## <folium.vector_layers.CircleMarker object at 0x00000228CE139A50>
## <folium.vector_layers.CircleMarker object at 0x00000228CD20CD90>
## <folium.vector_layers.CircleMarker object at 0x00000228CD20CB50>
## <folium.vector_layers.CircleMarker object at 0x00000228CD20D250>
## <folium.vector_layers.CircleMarker object at 0x00000228CD20D3D0>
## <folium.vector_layers.CircleMarker object at 0x00000228CD20D550>
## <folium.vector_layers.CircleMarker object at 0x00000228CD20D6D0>
## <folium.vector_layers.CircleMarker object at 0x00000228CD20D850>
## <folium.vector_layers.CircleMarker object at 0x00000228CD20D9D0>
## <folium.vector_layers.CircleMarker object at 0x00000228CD20DB50>
## <folium.vector_layers.CircleMarker object at 0x00000228CD20DCD0>
## <folium.vector_layers.CircleMarker object at 0x00000228CD20DE50>
## <folium.vector_layers.CircleMarker object at 0x00000228CD20DFD0>
## <folium.vector_layers.CircleMarker object at 0x00000228CD20E150>
## <folium.vector_layers.CircleMarker object at 0x00000228CD20E2D0>
## <folium.vector_layers.CircleMarker object at 0x00000228CD20E450>
## <folium.vector_layers.CircleMarker object at 0x00000228CD20E5D0>
## <folium.vector_layers.CircleMarker object at 0x00000228CD20E750>
## <folium.vector_layers.CircleMarker object at 0x00000228CD20E8D0>
## <folium.vector_layers.CircleMarker object at 0x00000228CD20EA50>
## <folium.vector_layers.CircleMarker object at 0x00000228CD20EBD0>
## <folium.vector_layers.CircleMarker object at 0x00000228CD20ED50>
## <folium.vector_layers.CircleMarker object at 0x00000228CD20EED0>
## <folium.vector_layers.CircleMarker object at 0x00000228CD20F050>
## <folium.vector_layers.CircleMarker object at 0x00000228CD20F1D0>
## <folium.vector_layers.CircleMarker object at 0x00000228CD20F350>
## <folium.vector_layers.CircleMarker object at 0x00000228CD20F4D0>
## <folium.vector_layers.CircleMarker object at 0x00000228CD20F650>
#Display the map
my_map.save(path + 'MapofObesityRate.html')
#END
<iframe src=“https://drive.google.com/file/d/1dn-hnFaZ0yBbt_LMnJm0Fq4vsAJyczKN/view?usp=drive_link” width=“850” height=“650” style=“border:none”;>