Start Date: 1 Nov 2024

Report Date: 30 November 2024

Source: Psi Chi R

Welcome to the contest for November! The prompts can be found here, and the data is available here. The contest will run until the last day of November (November 30th), latest time zone on earth. The contest entry form is available here

knitr::opts_chunk$set(echo = T,message = F,warning = F)

setwd("C:/Users/alanh/Documents/R/Psi_Chi_R")

library(tidyverse)

data=read_csv('data.csv')

#access CSV directly through URL; local copy will be overwritten with a different month's data

#data= read.csv('https://osf.io/download/7uc9x/')

Skim and clean

## EDA

names(data) = make.names(colnames(data))

skimr::skim(data)
Data summary
Name data
Number of rows 374
Number of columns 12
_______________________
Column type frequency:
character 4
numeric 8
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
Gender 0 1 4 6 0 2 0
Occupation 0 1 5 20 0 11 0
Blood.Pressure 0 1 6 6 0 25 0
Sleep.Disorder 0 1 4 11 0 3 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Person.ID 0 1 187.50 108.11 1.0 94.25 187.5 280.75 374.0 ▇▇▇▇▇
Age 0 1 42.18 8.67 27.0 35.25 43.0 50.00 59.0 ▆▆▇▃▅
Sleep.Duration 0 1 7.13 0.80 5.8 6.40 7.2 7.80 8.5 ▇▆▇▇▆
Quality.of.Sleep 0 1 7.31 1.20 4.0 6.00 7.0 8.00 9.0 ▁▇▆▇▅
Physical.Activity.Level 0 1 59.17 20.83 30.0 45.00 60.0 75.00 90.0 ▇▇▇▇▇
Stress.Level 0 1 5.52 3.12 3.0 4.00 5.0 7.00 55.0 ▇▁▁▁▁
Heart.Rate 0 1 70.17 4.14 65.0 68.00 70.0 72.00 86.0 ▇▇▂▁▁
Daily.Steps 0 1 6816.84 1617.92 3000.0 5600.00 7000.0 8000.00 10000.0 ▁▅▇▆▂

Data processing (level 1)

Level 1:

Write a script to create a visualization showing the distribution of the ‘Quality of Sleep’ variable. Is ‘quality of sleep’ normally distributed?

#no, Quality of Sleep isn't normally distributed.
hist(data$Quality.of.Sleep)

Write a script to identify any outliers present in the ‘Stress Level’ variable. Are there any outliers present in ‘Stress Level?’

#Yes, there are outliers.
boxplot(data$Stress.Level)
title(ylab='Stress Level')

data %>% 
  ggplot(aes(y=Stress.Level,x=Person.ID))+
  geom_boxplot(outlier.shape= NA)+
  geom_jitter()+
  theme_bw()

Descriptive Statistics (level 2)

Level 2: If you identify any outliers present in the ‘Stress Level’ variable, write a script that removes the outliers.

data %>% 
  group_by(Stress.Level) %>% 
  count() %>% 
  arrange(desc(n))
data1 = data %>% 
  filter(Stress.Level < 55)

boxplot(data1$Stress.Level)
title(main = 'no outliers',ylab='Stress Level')

data1 %>% 
  ggplot(aes(y=Stress.Level,x=Person.ID))+
  geom_boxplot(outlier.shape= NA)+
  geom_jitter()+
  theme_bw()

Find the min, median, max, average, and standard deviation of ‘Stress Level,’ ‘Quality of Sleep,’ and ‘Physical Activity Level.’

desc_stats = function(x){
  c(min = min(x,na.rm=T),
    median = median(x,na.rm=T),
    median = median(x,na.rm=T),
    max = max(x,na.rm=T),
    mean = mean(x,na.rm=T),
    sd = sd(x,na.rm=T))
}

desc_list = list("Stress.Level","Quality.of.Sleep","Physical.Activity.Level")
results= list()

for (x in desc_list){
  results[[x]]=desc_stats(data1[[x]])
}

print(results)
## $Stress.Level
##      min   median   median      max     mean       sd 
## 3.000000 5.000000 5.000000 8.000000 5.386059 1.776798 
## 
## $Quality.of.Sleep
##      min   median   median      max     mean       sd 
## 4.000000 7.000000 7.000000 9.000000 7.313673 1.198454 
## 
## $Physical.Activity.Level
##      min   median   median      max     mean       sd 
## 30.00000 60.00000 60.00000 90.00000 59.22252 20.83502
## using PURRR; stick with base

# results1 = map(desc_list,~desc_stats(data1[[.x]]))
# 
# names(results1) = desc_list
# 
# results1

Data visualization (level 3)

Level 3: Plot the relationship between ‘Age’ and ‘Quality of Sleep.’

data1 %>% 
  ggplot(aes(x=Age,y=Quality.of.Sleep,col=Quality.of.Sleep))+
  geom_point()+
  geom_jitter()+
  theme_bw()+
  labs(col=' ',y='Sleep Quality',
       title = 'Sleeping Beauty')+
  theme(plot.title = element_text(hjust = .5))

data1 %>% 
  ggplot(aes(y=Occupation,x=rev(Quality.of.Sleep),fill=Gender))+
  geom_col()+
  theme_bw()+
  labs(fill=' ',x='Sleep Quality',
       title = 'Sleeping "On the Job"',
       y='Occupation')+
  theme(plot.title = element_text(hjust = .5))

data1 %>% 
  ggplot(aes(x=Age,y=Quality.of.Sleep,fill=Sleep.Disorder))+
  geom_col()+
  theme_bw()+
  labs(fill=' ',y='Sleep Quality',
       title = 'Sleeping Differently')+
  theme(plot.title = element_text(hjust = .5))+
  facet_grid(Gender~Sleep.Disorder)

Create a table to show the average ‘Stress Level’ by ‘Occupation.’

tab1 = data1 %>% 
  group_by(Occupation) %>% 
  summarise(Avg_Stress = mean(Stress.Level,na.rm = T)) %>% 
  arrange(desc(Avg_Stress))

print(tab1)
## # A tibble: 11 × 2
##    Occupation           Avg_Stress
##    <chr>                     <dbl>
##  1 Sales Representative       8   
##  2 Salesperson                7   
##  3 Scientist                  7   
##  4 Doctor                     6.73
##  5 Software Engineer          6   
##  6 Nurse                      5.55
##  7 Lawyer                     5.06
##  8 Manager                    5   
##  9 Accountant                 4.59
## 10 Teacher                    4.51
## 11 Engineer                   3.89
tab1 %>% 
  ggplot(aes(y=fct_inorder(Occupation),x=Avg_Stress,fill=Avg_Stress))+
  geom_col()+
  theme_bw()+
  theme(plot.title = element_text(hjust = .5))+
  labs(title="Who's the 'Meanest' (Stress) of Them All?",x='Mean Stress Levels',fill=" ",y='Occupation')

# dollar format
# dollars = function(x) {
#   paste0("$", format(x,big.mark= ",",scientific=F))
# }

Inferential statistics (level 4)

Level 4: Is there a relationship between ‘Quality of Sleep’ and ‘Stress Level?’

shapiro.test(data1$Quality.of.Sleep)
## 
##  Shapiro-Wilk normality test
## 
## data:  data1$Quality.of.Sleep
## W = 0.89346, p-value = 1.845e-15
shapiro.test(data1$Stress.Level)
## 
##  Shapiro-Wilk normality test
## 
## data:  data1$Stress.Level
## W = 0.89051, p-value = 1.098e-15
hist(data1$Quality.of.Sleep)

hist(data1$Stress.Level)

cor.test(data1$Quality.of.Sleep,data1$Stress.Level,method = "kendall")
## 
##  Kendall's rank correlation tau
## 
## data:  data1$Quality.of.Sleep and data1$Stress.Level
## z = -19.857, p-value < 2.2e-16
## alternative hypothesis: true tau is not equal to 0
## sample estimates:
##        tau 
## -0.8263603
#yes, there's a relationship
LS0tDQp0aXRsZTogIlBzaSBDaGkgUiAtIE5vdmVtYmVyIDIwMjQiDQphdXRob3I6ICJieSBBbGFuIExhbSINCiNkYXRlOiAiYHIgU3lzLkRhdGUoKWAiDQojZGF0ZTogIkRhdGU6IGByIGZvcm1hdChTeXMuRGF0ZSgpLCAnJWQgJUIgJVknKWAiIA0Kb3V0cHV0Og0KICBodG1sX2RvY3VtZW50Og0KICAgIHRoZW1lOiByZWFkYWJsZQ0KICAgIGFsd2F5c19hbGxvd19odG1sOiB5ZXMNCiAgICBkZl9wcmludDogcGFnZWQNCiAgICB0b2M6IHllcw0KICAgIHRvY19mbG9hdDogeWVzDQogICAgbnVtYmVyX3NlY3Rpb25zOiBubw0KICAgIGFuY2hvcl9zZWN0aW9uczogVFJVRQ0KICAgIGNvZGVfZm9sZGluZzogaGlkZQ0KICAgIGNvZGVfZG93bmxvYWQ6IHRydWUNCi0tLQ0KU3RhcnQgRGF0ZTogMSBOb3YgMjAyNA0KDQpSZXBvcnQgRGF0ZTogYHIgZm9ybWF0KFN5cy5EYXRlKCksICclZCAlQiAlWScpYA0KDQpbKipTb3VyY2UqKjogUHNpIENoaSBSXShodHRwczovL29zZi5pby9kaHJhdi8pDQoNCldlbGNvbWUgdG8gdGhlIGNvbnRlc3QgZm9yIE5vdmVtYmVyISBUaGUgcHJvbXB0cyBjYW4gYmUgZm91bmQgaGVyZSwgYW5kIHRoZSBkYXRhIGlzIGF2YWlsYWJsZSBoZXJlLiBUaGUgY29udGVzdCB3aWxsIHJ1biB1bnRpbCB0aGUgbGFzdCBkYXkgb2YgTm92ZW1iZXIgKE5vdmVtYmVyIDMwdGgpLCBsYXRlc3QgdGltZSB6b25lIG9uIGVhcnRoLiBUaGUgWyoqY29udGVzdCBlbnRyeSBmb3JtIGlzIGF2YWlsYWJsZSBoZXJlKipdKGh0dHBzOi8vZm9ybXMuZ2xlL05LU3hKMjJYVmRIQXJVUDdBKQ0KDQpgYGB7ciBzZXR1cCwgd2FybmluZz1GLG1lc3NhZ2U9Rn0NCmtuaXRyOjpvcHRzX2NodW5rJHNldChlY2hvID0gVCxtZXNzYWdlID0gRix3YXJuaW5nID0gRikNCg0Kc2V0d2QoIkM6L1VzZXJzL2FsYW5oL0RvY3VtZW50cy9SL1BzaV9DaGlfUiIpDQoNCmxpYnJhcnkodGlkeXZlcnNlKQ0KDQpkYXRhPXJlYWRfY3N2KCdkYXRhLmNzdicpDQoNCiNhY2Nlc3MgQ1NWIGRpcmVjdGx5IHRocm91Z2ggVVJMOyBsb2NhbCBjb3B5IHdpbGwgYmUgb3ZlcndyaXR0ZW4gd2l0aCBhIGRpZmZlcmVudCBtb250aCdzIGRhdGENCg0KI2RhdGE9IHJlYWQuY3N2KCdodHRwczovL29zZi5pby9kb3dubG9hZC83dWM5eC8nKQ0KYGBgDQoNCiMjIFNraW0gYW5kIGNsZWFuDQpgYGB7cn0NCiMjIEVEQQ0KDQpuYW1lcyhkYXRhKSA9IG1ha2UubmFtZXMoY29sbmFtZXMoZGF0YSkpDQoNCnNraW1yOjpza2ltKGRhdGEpDQoNCg0KYGBgDQoNCg0KIyMgRGF0YSBwcm9jZXNzaW5nIChsZXZlbCAxKQ0KDQpMZXZlbCAxOiANCg0KV3JpdGUgYSBzY3JpcHQgdG8gY3JlYXRlIGEgdmlzdWFsaXphdGlvbiBzaG93aW5nIHRoZSBkaXN0cmlidXRpb24gb2YgdGhlIOKAmFF1YWxpdHkgb2YgU2xlZXDigJkgdmFyaWFibGUuIElzIOKAmHF1YWxpdHkgb2Ygc2xlZXDigJkgbm9ybWFsbHkgZGlzdHJpYnV0ZWQ/DQoNCmBgYHtyfQ0KI25vLCBRdWFsaXR5IG9mIFNsZWVwIGlzbid0IG5vcm1hbGx5IGRpc3RyaWJ1dGVkLg0KaGlzdChkYXRhJFF1YWxpdHkub2YuU2xlZXApDQpgYGANCg0KV3JpdGUgYSBzY3JpcHQgdG8gaWRlbnRpZnkgYW55IG91dGxpZXJzIHByZXNlbnQgaW4gdGhlIOKAmFN0cmVzcyBMZXZlbOKAmSB2YXJpYWJsZS4gQXJlIHRoZXJlIGFueSBvdXRsaWVycyBwcmVzZW50IGluIOKAmFN0cmVzcyBMZXZlbD/igJkNCmBgYHtyfQ0KI1llcywgdGhlcmUgYXJlIG91dGxpZXJzLg0KYm94cGxvdChkYXRhJFN0cmVzcy5MZXZlbCkNCnRpdGxlKHlsYWI9J1N0cmVzcyBMZXZlbCcpDQoNCmRhdGEgJT4lIA0KICBnZ3Bsb3QoYWVzKHk9U3RyZXNzLkxldmVsLHg9UGVyc29uLklEKSkrDQogIGdlb21fYm94cGxvdChvdXRsaWVyLnNoYXBlPSBOQSkrDQogIGdlb21faml0dGVyKCkrDQogIHRoZW1lX2J3KCkNCmBgYA0KDQojIyBEZXNjcmlwdGl2ZSBTdGF0aXN0aWNzIChsZXZlbCAyKQ0KDQpMZXZlbCAyOiBJZiB5b3UgaWRlbnRpZnkgYW55IG91dGxpZXJzIHByZXNlbnQgaW4gdGhlIOKAmFN0cmVzcyBMZXZlbOKAmSB2YXJpYWJsZSwgd3JpdGUgYSBzY3JpcHQgdGhhdCByZW1vdmVzIHRoZSBvdXRsaWVycy4NCg0KYGBge3J9DQpkYXRhICU+JSANCiAgZ3JvdXBfYnkoU3RyZXNzLkxldmVsKSAlPiUgDQogIGNvdW50KCkgJT4lIA0KICBhcnJhbmdlKGRlc2MobikpDQoNCmRhdGExID0gZGF0YSAlPiUgDQogIGZpbHRlcihTdHJlc3MuTGV2ZWwgPCA1NSkNCg0KYm94cGxvdChkYXRhMSRTdHJlc3MuTGV2ZWwpDQp0aXRsZShtYWluID0gJ25vIG91dGxpZXJzJyx5bGFiPSdTdHJlc3MgTGV2ZWwnKQ0KDQpkYXRhMSAlPiUgDQogIGdncGxvdChhZXMoeT1TdHJlc3MuTGV2ZWwseD1QZXJzb24uSUQpKSsNCiAgZ2VvbV9ib3hwbG90KG91dGxpZXIuc2hhcGU9IE5BKSsNCiAgZ2VvbV9qaXR0ZXIoKSsNCiAgdGhlbWVfYncoKQ0KYGBgDQoNCkZpbmQgdGhlIG1pbiwgbWVkaWFuLCBtYXgsIGF2ZXJhZ2UsIGFuZCBzdGFuZGFyZCBkZXZpYXRpb24gb2Yg4oCYU3RyZXNzIExldmVsLOKAmSDigJhRdWFsaXR5IG9mIFNsZWVwLOKAmSBhbmQg4oCYUGh5c2ljYWwgQWN0aXZpdHkgTGV2ZWwu4oCZDQoNCmBgYHtyfQ0KZGVzY19zdGF0cyA9IGZ1bmN0aW9uKHgpew0KICBjKG1pbiA9IG1pbih4LG5hLnJtPVQpLA0KICAgIG1lZGlhbiA9IG1lZGlhbih4LG5hLnJtPVQpLA0KICAgIG1lZGlhbiA9IG1lZGlhbih4LG5hLnJtPVQpLA0KICAgIG1heCA9IG1heCh4LG5hLnJtPVQpLA0KICAgIG1lYW4gPSBtZWFuKHgsbmEucm09VCksDQogICAgc2QgPSBzZCh4LG5hLnJtPVQpKQ0KfQ0KDQpkZXNjX2xpc3QgPSBsaXN0KCJTdHJlc3MuTGV2ZWwiLCJRdWFsaXR5Lm9mLlNsZWVwIiwiUGh5c2ljYWwuQWN0aXZpdHkuTGV2ZWwiKQ0KYGBgDQoNCmBgYHtyfQ0KcmVzdWx0cz0gbGlzdCgpDQoNCmZvciAoeCBpbiBkZXNjX2xpc3Qpew0KICByZXN1bHRzW1t4XV09ZGVzY19zdGF0cyhkYXRhMVtbeF1dKQ0KfQ0KDQpwcmludChyZXN1bHRzKQ0KYGBgDQoNCg0KYGBge3J9DQojIyB1c2luZyBQVVJSUjsgc3RpY2sgd2l0aCBiYXNlDQoNCiMgcmVzdWx0czEgPSBtYXAoZGVzY19saXN0LH5kZXNjX3N0YXRzKGRhdGExW1sueF1dKSkNCiMgDQojIG5hbWVzKHJlc3VsdHMxKSA9IGRlc2NfbGlzdA0KIyANCiMgcmVzdWx0czENCmBgYA0KDQojIyBEYXRhIHZpc3VhbGl6YXRpb24gKGxldmVsIDMpDQoNCkxldmVsIDM6IA0KUGxvdCB0aGUgcmVsYXRpb25zaGlwIGJldHdlZW4g4oCYQWdl4oCZIGFuZCDigJhRdWFsaXR5IG9mIFNsZWVwLuKAmSANCg0KYGBge3J9DQpkYXRhMSAlPiUgDQogIGdncGxvdChhZXMoeD1BZ2UseT1RdWFsaXR5Lm9mLlNsZWVwLGNvbD1RdWFsaXR5Lm9mLlNsZWVwKSkrDQogIGdlb21fcG9pbnQoKSsNCiAgZ2VvbV9qaXR0ZXIoKSsNCiAgdGhlbWVfYncoKSsNCiAgbGFicyhjb2w9JyAnLHk9J1NsZWVwIFF1YWxpdHknLA0KICAgICAgIHRpdGxlID0gJ1NsZWVwaW5nIEJlYXV0eScpKw0KICB0aGVtZShwbG90LnRpdGxlID0gZWxlbWVudF90ZXh0KGhqdXN0ID0gLjUpKQ0KYGBgDQoNCmBgYHtyfQ0KZGF0YTEgJT4lIA0KICBnZ3Bsb3QoYWVzKHk9T2NjdXBhdGlvbix4PXJldihRdWFsaXR5Lm9mLlNsZWVwKSxmaWxsPUdlbmRlcikpKw0KICBnZW9tX2NvbCgpKw0KICB0aGVtZV9idygpKw0KICBsYWJzKGZpbGw9JyAnLHg9J1NsZWVwIFF1YWxpdHknLA0KICAgICAgIHRpdGxlID0gJ1NsZWVwaW5nICJPbiB0aGUgSm9iIicsDQogICAgICAgeT0nT2NjdXBhdGlvbicpKw0KICB0aGVtZShwbG90LnRpdGxlID0gZWxlbWVudF90ZXh0KGhqdXN0ID0gLjUpKQ0KDQpkYXRhMSAlPiUgDQogIGdncGxvdChhZXMoeD1BZ2UseT1RdWFsaXR5Lm9mLlNsZWVwLGZpbGw9U2xlZXAuRGlzb3JkZXIpKSsNCiAgZ2VvbV9jb2woKSsNCiAgdGhlbWVfYncoKSsNCiAgbGFicyhmaWxsPScgJyx5PSdTbGVlcCBRdWFsaXR5JywNCiAgICAgICB0aXRsZSA9ICdTbGVlcGluZyBEaWZmZXJlbnRseScpKw0KICB0aGVtZShwbG90LnRpdGxlID0gZWxlbWVudF90ZXh0KGhqdXN0ID0gLjUpKSsNCiAgZmFjZXRfZ3JpZChHZW5kZXJ+U2xlZXAuRGlzb3JkZXIpDQpgYGANCg0KQ3JlYXRlIGEgdGFibGUgdG8gc2hvdyB0aGUgYXZlcmFnZSDigJhTdHJlc3MgTGV2ZWzigJkgYnkg4oCYT2NjdXBhdGlvbi7igJkNCmBgYHtyfQ0KdGFiMSA9IGRhdGExICU+JSANCiAgZ3JvdXBfYnkoT2NjdXBhdGlvbikgJT4lIA0KICBzdW1tYXJpc2UoQXZnX1N0cmVzcyA9IG1lYW4oU3RyZXNzLkxldmVsLG5hLnJtID0gVCkpICU+JSANCiAgYXJyYW5nZShkZXNjKEF2Z19TdHJlc3MpKQ0KDQpwcmludCh0YWIxKQ0KDQp0YWIxICU+JSANCiAgZ2dwbG90KGFlcyh5PWZjdF9pbm9yZGVyKE9jY3VwYXRpb24pLHg9QXZnX1N0cmVzcyxmaWxsPUF2Z19TdHJlc3MpKSsNCiAgZ2VvbV9jb2woKSsNCiAgdGhlbWVfYncoKSsNCiAgdGhlbWUocGxvdC50aXRsZSA9IGVsZW1lbnRfdGV4dChoanVzdCA9IC41KSkrDQogIGxhYnModGl0bGU9IldobydzIHRoZSAnTWVhbmVzdCcgKFN0cmVzcykgb2YgVGhlbSBBbGw/Iix4PSdNZWFuIFN0cmVzcyBMZXZlbHMnLGZpbGw9IiAiLHk9J09jY3VwYXRpb24nKQ0KYGBgDQoNCg0KYGBge3J9DQojIGRvbGxhciBmb3JtYXQNCiMgZG9sbGFycyA9IGZ1bmN0aW9uKHgpIHsNCiMgICBwYXN0ZTAoIiQiLCBmb3JtYXQoeCxiaWcubWFyaz0gIiwiLHNjaWVudGlmaWM9RikpDQojIH0NCg0KYGBgDQoNCg0KIyMgSW5mZXJlbnRpYWwgc3RhdGlzdGljcyAobGV2ZWwgNCkNCg0KTGV2ZWwgNDogSXMgdGhlcmUgYSByZWxhdGlvbnNoaXAgYmV0d2VlbiDigJhRdWFsaXR5IG9mIFNsZWVw4oCZIGFuZCDigJhTdHJlc3MgTGV2ZWw/4oCZDQpgYGB7cn0NCnNoYXBpcm8udGVzdChkYXRhMSRRdWFsaXR5Lm9mLlNsZWVwKQ0KDQpzaGFwaXJvLnRlc3QoZGF0YTEkU3RyZXNzLkxldmVsKQ0KDQpoaXN0KGRhdGExJFF1YWxpdHkub2YuU2xlZXApDQoNCmhpc3QoZGF0YTEkU3RyZXNzLkxldmVsKQ0KDQpjb3IudGVzdChkYXRhMSRRdWFsaXR5Lm9mLlNsZWVwLGRhdGExJFN0cmVzcy5MZXZlbCxtZXRob2QgPSAia2VuZGFsbCIpDQoNCiN5ZXMsIHRoZXJlJ3MgYSByZWxhdGlvbnNoaXANCmBgYA0KDQoNCg0KYGBge3IgaW5jbHVkZT1GQUxTRX0NCiNiZWVwIHdoZW4gZG9uZQ0KaWYgKHJlcXVpcmUoImJlZXByIixxdWlldGx5ID0gVCkpDQogIGJlZXByOjpiZWVwKDIpDQpgYGANCg==