import os
os.environ['QT_QPA_PLATFORM_PLUGIN_PATH'] = 'D:/Anaconda3/Library/plugins/platforms'
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
path = 'U:/'
filename = path + 'Sleep_Efficiency.csv'
#make subdf with sleep efficiency
df1 = pd.read_csv(filename, usecols = ['ID', 'Sleep efficiency'])
df1 = df1.sort_values('Sleep efficiency',ascending = False)
df1.reset_index(inplace=True,drop=True)
# plot sleep efficiency numbers for each person, color by relation to average
numbers = np.arange(452)
def colorsByMean(indata):
colors = []
avg = indata.SleepEfficiency.mean()
for each in indata.SleepEfficiency:
if each > avg*1.01:
colors.append('lightcoral')
elif each < avg*0.99:
colors.append('green')
else:
colors.append('black')
return colors
df1.rename(columns = {'Sleep efficiency':'SleepEfficiency'},inplace = True)
import matplotlib.patches as mpatches
mycolors1 = colorsByMean(df1)
plt.figure(figsize=(18,10))
above = mpatches.Patch(color='lightcoral',label='Above Average')
at = mpatches.Patch(color='black',label='Within 1% of Average')
below = mpatches.Patch(color='green',label='Below Average')
plt.title("Sleep Efficiency Compared to Average Sleep Efficiency",fontsize=18)
plt.legend(handles=[above,at,below],fontsize=14)
plt.axhline(df1.SleepEfficiency.mean(), color='black',linestyle='dashed')
plt.text(420,df1.SleepEfficiency.mean()+.05,'Mean = '+str(round(df1.SleepEfficiency.mean(),3)),fontsize=14)
plt.bar(numbers,df1.loc[0:452,'SleepEfficiency'],label='Count',color=mycolors1)
## <BarContainer object of 452 artists>
plt.legend(loc='upper right', fontsize=14)
plt.show()
From the graph it can be inferred that the average sleep efficiency (time in bed asleep) is approximately 79%. In other words people typically spend about 30% of their time in bed awake
#awakenings by caffeine
df2 = pd.read_csv(filename, usecols = ['ID', 'Awakenings','Caffeine consumption'])
df2.rename(columns = {'Caffeine consumption':'Caffeine'},inplace = True)
df2['Caffeine'] = df2['Caffeine'].replace(np.nan, 0)
df2['Awakenings'] = df2['Awakenings'].replace(np.nan, 0)
df3 = df2.head(25)
def autolabel (bars, ax, decimalPlaces, symbol):
for each_bar in bars:
height = each_bar.get_height()
ax.text(each_bar.get_x()+each_bar.get_width()/2, height*1.01, symbol+format(height,decimalPlaces),
fontsize=11,color='purple',ha='center',va='bottom')
fig = plt.figure(figsize=(18,10))
ax1 = fig.add_subplot(1,1,1)
ax2 = ax1.twinx()
bar_width = 0.4
x_pos = np.arange(25)
#plot count of awakenings for each person
count_bars = ax1.bar(x_pos-(0.5*bar_width),df3.Awakenings,bar_width,color='gray',edgecolor='black',label='Number of Awakenings')
#plot the intake of caffeine by the person
caff_bars = ax2.bar(x_pos+(0.5*bar_width), df3.Caffeine, bar_width, color='green',edgecolor='black',label='Caffeine Consumption')
ax1.set_xlabel('ID Number', fontsize=18)
ax1.set_ylabel('Count of Awakenings',fontsize=18,labelpad=20)
ax2.set_ylabel('Caffeine Consumption', fontsize =18, rotation=270,labelpad=20)
ax1.tick_params(axis='y',labelsize=14)
ax2.tick_params(axis='y',labelsize=14)
plt.title('Number of Awakenings by Caffeine Consumption\n 25 random individuals', fontsize=18)
ax1.set_xticks(x_pos)
ax1.set_xticklabels(df3.ID, fontsize=14)
count_color, count_label = ax1.get_legend_handles_labels()
caff_color, caff_label = ax2.get_legend_handles_labels()
legend = ax1.legend(count_color + caff_color,
count_label+ caff_label,
loc='upper left',
frameon=True,
ncol=1,
shadow=True,
borderpad=1,
fontsize=14)
ax1.set_ylim(0,df3.Awakenings.max()*1.5)
## (0.0, 6.0)
autolabel(count_bars,ax1,'.0f','')
autolabel(caff_bars,ax2,'.0f','')
plt.show()
The correlation between the count of awakenings and caffeine consumption is significantly less than anticipated. The effect of caffeine on most people does not have a major impact on their sleep.
#Stacked bar chart of sleep efficiency by group
df4 = pd.read_csv(filename, usecols = ['ID', 'Gender','Sleep efficiency','Smoking status'])
df4.rename(columns = {'Smoking status':'SmokingStatus'},inplace = True)
df4.rename(columns = {'Sleep efficiency':'SleepEfficiency'},inplace = True)
#Group categories: Male smoker, Female smoker, male non-smoker, female non-smoker
ms_df=df4[(df4.Gender=='Male')&(df4.SmokingStatus=='Yes')]
mns_df=df4[(df4.Gender=='Male')&(df4.SmokingStatus=='No')]
fs_df=df4[(df4.Gender=='Female')&(df4.SmokingStatus=='Yes')]
fns_df=df4[(df4.Gender=='Female')&(df4.SmokingStatus=='No')]
fs_df['Group']='fs'
ms_df['Group']='ms'
mns_df['Group']='mns'
fns_df['Group']='fns'
fns_df['AVGSE']=fns_df.SleepEfficiency.mean()
mns_df['AVGSE']=mns_df.SleepEfficiency.mean()
fs_df['AVGSE']=fs_df.SleepEfficiency.mean()
ms_df['AVGSE']=ms_df.SleepEfficiency.mean()
df4x = pd.concat([fs_df.head(1),ms_df.head(1),mns_df.head(1),fns_df.head(1)])
df4x.fillna(0)
## ID Gender SleepEfficiency SmokingStatus Group AVGSE
## 0 1 Female 0.88 Yes fs 0.693088
## 1 2 Male 0.66 Yes ms 0.754731
## 4 5 Male 0.76 No mns 0.814741
## 2 3 Female 0.89 No fns 0.828718
df4x=df4x.drop(columns=['ID','SmokingStatus','Gender','SleepEfficiency'])
df4x
## Group AVGSE
## 0 fs 0.693088
## 1 ms 0.754731
## 4 mns 0.814741
## 2 fns 0.828718
xlabs=['Female Smokers','Male Smokers','Male Non-Smokers','Female Non-Smokers']
fig = plt.figure(figsize=(18,10))
ax=fig.add_subplot(1,1,1)
df4x.plot(kind='bar',ax=ax)
ax.set_ylabel('Sleep Efficiency %',fontsize=18,labelpad=20)
plt.title('Sleep Efficiency by Gender and Smoking Status',color='purple', fontsize=18)
ax.legend('Sleep Efficiency',loc='upper left')
ax.set_xticklabels(xlabs, fontsize=14,rotation=0)
From the graph it can be inferred that smoking impacts the female’s rest more so than the males and that females who do not smoke typically sleep better than men.
df5 = pd.read_csv(filename, usecols = ['ID', 'Caffeine consumption','Sleep efficiency'])
import seaborn as sns
#boxplot for caff x SE
fig=plt.figure(figsize=(18,10))
ax1 = fig.add_subplot(1,1,1)
sns.boxplot(data=df5,x="Caffeine consumption",y="Sleep efficiency", color="cyan")
plt.title("Does caffeine consumption affect sleep?", color="purple",fontsize=18)
ax1.set_xlabel('Caffeine Consumption', fontsize=18,color="purple")
ax1.set_ylabel('Sleep Efficiency', fontsize=18,color="purple")
plt.show()
The Box plot indicates that caffeine does not have a major impact on sleep efficiency. Even those with a high caffeine consumption rate have a similar mean sleep efficiency value compared to those who do not use caffeine. Perhaps more observations and/or users of higher doses of caffeine may provide more insightful results.
# pie chart for general split between sleep levels; one for smokers and one for non-smokers
df6 = pd.read_csv(filename, usecols = ['REM sleep percentage', 'Deep sleep percentage',
'Light sleep percentage', 'Smoking status'])
# sort dataframe on Smoking status column
df6 = df6.sort_values(by="Smoking status")
nonsmoke_df = df6[0:291]
smoke_df = df6[291:452]
smoke_df['REM sleep percentage avg']=(smoke_df['REM sleep percentage']).mean()
smoke_df['Deep sleep percentage avg']=(smoke_df['Deep sleep percentage']).mean()
smoke_df['Light sleep percentage avg']=(smoke_df['Light sleep percentage']).mean()
nonsmoke_df['REM sleep percentage avg']=(nonsmoke_df['REM sleep percentage']).mean()
nonsmoke_df['Deep sleep percentage avg']=(nonsmoke_df['Deep sleep percentage']).mean()
nonsmoke_df['Light sleep percentage avg']=(nonsmoke_df['Light sleep percentage']).mean()
nonsmoke_df = nonsmoke_df.drop(['REM sleep percentage','Deep sleep percentage','Light sleep percentage','Smoking status'],axis=1)
nonsmoke_df2 = nonsmoke_df.head(1)
smoke_df = smoke_df.drop(['REM sleep percentage','Deep sleep percentage','Light sleep percentage','Smoking status'],axis=1)
smoke_df2 = smoke_df.head(1)
smoke_list = [smoke_df2.iloc[0]['REM sleep percentage avg'],smoke_df2.iloc[0]['Deep sleep percentage avg'],smoke_df2.iloc[0]['Light sleep percentage avg']]
nonsmoke_list = [nonsmoke_df2.iloc[0]['REM sleep percentage avg'],nonsmoke_df2.iloc[0]['Deep sleep percentage avg'],nonsmoke_df2.iloc[0]['Light sleep percentage avg']]
mylabels = ['REM sleep percentage average','Deep sleep percentage average','Light sleep percentage average']
#create two pie charts one for smokers and one for non smokers
fig = plt.figure(figsize=(10,10))
fig.suptitle('Sleep Level Variation by Smoking Status', color="purple",fontsize=18)
ax1 = fig.add_subplot(2,1,1)
ax1.pie(smoke_list,labels=mylabels, startangle = 90,autopct=lambda p: '{:.2f}%'.format(p))
## ([<matplotlib.patches.Wedge object at 0x0000028824620EE0>, <matplotlib.patches.Wedge object at 0x00000288246201F0>, <matplotlib.patches.Wedge object at 0x00000288245A6D30>], [Text(-0.7263787548813433, 0.8260592620732666, 'REM sleep percentage average'), Text(-0.23908645054094801, -1.0737027843717883, 'Deep sleep percentage average'), Text(0.8885585166888745, 0.6484317715995009, 'Light sleep percentage average')], [Text(-0.39620659357164173, 0.45057777931269083, '22.96%'), Text(-0.13041079120415344, -0.5856560642027936, '47.11%'), Text(0.4846682818302951, 0.3536900572360914, '29.93%')])
ax1.set_title("Smokers", color="purple",fontsize=18)
ax2 = fig.add_subplot(2,1,2)
ax2.pie(nonsmoke_list,labels=mylabels, startangle = 90,autopct=lambda p: '{:.2f}%'.format(p))
## ([<matplotlib.patches.Wedge object at 0x0000028825FC0F10>, <matplotlib.patches.Wedge object at 0x0000028825FC0970>, <matplotlib.patches.Wedge object at 0x00000288245DE700>], [Text(-0.7207849315839179, 0.8309446927452916, 'REM sleep percentage average'), Text(0.03661061547501223, -1.0993905870228928, 'Deep sleep percentage average'), Text(0.6927299131153151, 0.8544736786322022, 'Light sleep percentage average')], [Text(-0.3931554172275915, 0.4532425596792499, '22.74%'), Text(0.01996942662273394, -0.5996675929215778, '55.57%'), Text(0.3778526798810809, 0.4660765519812012, '21.68%')])
ax2.set_title("Non-Smokers", color="purple",fontsize=18)
plt.show()
The pie charts indicate that those who do not smoke typically have a higher rate of deep sleep compared to those who do smoke. REM sleep is consistent between both groups.