Start Date: August 30, 2024

Report Date: 30 September 2024

Source: Psi Chi R

According to the National Day Calendar, National Coffee Day is September 29th(plus, with back to school in full swing, it seems appropriate)! This contest won’t keep you working late, we’ll keep it short and sweet (like espresso).

#library(tidyverse)
library(purrr)
library(stringr)
library(dplyr)
library(ggplot2)

#data = read.csv('https://osf.io/download/jma8e/')
data=read.csv('data.csv')
#write.csv(data,'data.csv')
## EDA
#skimer=skimr::skim(data)

data1=data %>% 
  mutate(across(roast, ~ if_else(. == "", "N/A", .)))

Data processing (level 1)

Create a table that shows how many times each type of roast is represented in the data set.

table1_roast=data1 %>% 
  filter(complete.cases(roast)) %>% 
  #mutate(across(roast, ~ if_else(. == "", "N/A", .))) %>% 
  group_by(roast) %>% 
  summarise(COUNT = n()) %>% 
  arrange(-COUNT)

table1_roast

Create a table that shows how many times each roaster is represented in the data set.

#library(stringr)

table2_roaster=data1 %>% 
  filter(complete.cases(roaster)) %>% 
  mutate(roaster = case_when(str_detect(roaster,"Simon Hsieh") ~ "Simon Hsieh", TRUE~roaster))%>%
  group_by(roaster) %>% 
  summarise(COUNT = n()) %>% 
  arrange(-COUNT)

table2_roaster

Descriptive Statistics (level 2)

What is the average cost (USD) for 100 grams of coffee reviewed in the data set?

mean(data1$X100g_USD,na.rm = T)
## [1] 9.323313
# Average cost is $9.32 for 100 grams of coffee

What is the cost of the least and most expensive coffee?

a1=data1 %>% 
  summarize(least = min(X100g_USD,na.rm = T),
            most = max(X100g_USD,na.rm = T)) 
  

a1
# Most expensive is $132.28 and least expensive is $0.12.

What is the average rating of the coffee reviewed in the dataset?

mean(data1$rating,na.rm = T)
## [1] 93.11408
# Average rating is 93.11

What is the most received rating?

data1 %>% 
  group_by(rating) %>% 
  count() %>% 
  arrange(-n) %>% 
  head(5)
# Most received rating is a 93

Data visualization (level 3)

Create a table that shows the roast type and average ratings based on the roaster.

dv1=data1 %>% 
  group_by(Roast_Type=roast,Roaster=roaster) %>% 
  summarise(Average_Rating=mean(rating,na.rm = T)) %>% 
  arrange(-Average_Rating)
## `summarise()` has grouped output by 'Roast_Type'. You can override using the
## `.groups` argument.
dv1

Create a graph that shows the cost (USD) for 100 grams of coffee based on the bean origin (origin 1)

dv2=data1 %>% 
  group_by(origin_1) %>% 
  summarise(Roast_Count = n(),
            Cost=sum(X100g_USD,na.rm = T))

dv2 %>% 
  ggplot(aes(y=Cost,x=Roast_Count))+
  geom_point()+
  theme_bw()+
  labs(x="Roast Types by Count",
       y="Cost for 100 Grams of Coffee",
       title="Espresso Yourself: The Cost of Coffee")+
  theme(plot.title = element_text(hjust = .5))

Inferential statistics (level 4)

Is there a significant difference in the rating between roasts? If so, which one(s)?

table3_sig_diff=data1 %>%
  select(roast,rating) %>% 
  group_by(roast)
## Create DFs for roast
test_dark = table3_sig_diff%>% 
  filter(roast=="Dark")

test_NA = table3_sig_diff%>% 
  filter(roast=="N/A")

test_MD = table3_sig_diff%>% 
  filter(roast=="Medium-Dark")

test_light = table3_sig_diff%>% 
  filter(roast=="Light")

test_med = table3_sig_diff%>% 
  filter(roast=="Medium")

test_ML = table3_sig_diff%>% 
  filter(roast=="Medium-Light")

Normality test

roast_type = c("Dark", "N/A", "Medium-Dark", "Light", "Medium", "Medium-Light")

map(roast_type, ~ {
  filtered =table3_sig_diff %>%
    filter(roast == .x)
  shapiro.test(filtered$rating)
})
## [[1]]
## 
##  Shapiro-Wilk normality test
## 
## data:  filtered$rating
## W = 0.95296, p-value = 0.7583
## 
## 
## [[2]]
## 
##  Shapiro-Wilk normality test
## 
## data:  filtered$rating
## W = 0.7878, p-value = 0.002574
## 
## 
## [[3]]
## 
##  Shapiro-Wilk normality test
## 
## data:  filtered$rating
## W = 0.90034, p-value = 0.002256
## 
## 
## [[4]]
## 
##  Shapiro-Wilk normality test
## 
## data:  filtered$rating
## W = 0.9117, p-value = 5.936e-12
## 
## 
## [[5]]
## 
##  Shapiro-Wilk normality test
## 
## data:  filtered$rating
## W = 0.9497, p-value = 8.715e-08
## 
## 
## [[6]]
## 
##  Shapiro-Wilk normality test
## 
## data:  filtered$rating
## W = 0.94392, p-value < 2.2e-16
LS0tDQp0aXRsZTogIlBzaSBDaGkgUiAtIFNlcHRlbWJlciAyMDI0Ig0KYXV0aG9yOiAiYnkgQWxhbiBMYW0iDQojZGF0ZTogImByIFN5cy5EYXRlKClgIg0KI2RhdGU6ICJEYXRlOiBgciBmb3JtYXQoU3lzLkRhdGUoKSwgJyVkICVCICVZJylgIiANCm91dHB1dDoNCiAgaHRtbF9kb2N1bWVudDoNCiAgICB0aGVtZTogcmVhZGFibGUNCiAgICBhbHdheXNfYWxsb3dfaHRtbDogeWVzDQogICAgZGZfcHJpbnQ6IHBhZ2VkDQogICAgdG9jOiB5ZXMNCiAgICB0b2NfZmxvYXQ6IHllcw0KICAgIG51bWJlcl9zZWN0aW9uczogbm8NCiAgICBhbmNob3Jfc2VjdGlvbnM6IFRSVUUNCiAgICBjb2RlX2ZvbGRpbmc6IGhpZGUNCiAgICBjb2RlX2Rvd25sb2FkOiB0cnVlDQotLS0NClN0YXJ0IERhdGU6IEF1Z3VzdCAzMCwgMjAyNA0KDQpSZXBvcnQgRGF0ZTogYHIgZm9ybWF0KFN5cy5EYXRlKCksICclZCAlQiAlWScpYA0KDQpbKipTb3VyY2UqKjogUHNpIENoaSBSXShodHRwczovL29zZi5pby8zem04dy8pDQoNCkFjY29yZGluZyB0byB0aGUgTmF0aW9uYWwgRGF5IENhbGVuZGFyLCBOYXRpb25hbCBDb2ZmZWUgRGF5IGlzIFNlcHRlbWJlciAyOXRoKHBsdXMsIHdpdGggYmFjayB0byBzY2hvb2wgaW4gZnVsbCBzd2luZywgaXQgc2VlbXMgYXBwcm9wcmlhdGUpISBUaGlzIGNvbnRlc3Qgd29u4oCZdCBrZWVwIHlvdSB3b3JraW5nIGxhdGUsIHdl4oCZbGwga2VlcCBpdCBzaG9ydCBhbmQgc3dlZXQgKGxpa2UgZXNwcmVzc28pLg0KDQpgYGB7ciBzZXR1cCwgd2FybmluZz1GLG1lc3NhZ2U9Rn0NCiNsaWJyYXJ5KHRpZHl2ZXJzZSkNCmxpYnJhcnkocHVycnIpDQpsaWJyYXJ5KHN0cmluZ3IpDQpsaWJyYXJ5KGRwbHlyKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KDQojZGF0YSA9IHJlYWQuY3N2KCdodHRwczovL29zZi5pby9kb3dubG9hZC9qbWE4ZS8nKQ0KZGF0YT1yZWFkLmNzdignZGF0YS5jc3YnKQ0KI3dyaXRlLmNzdihkYXRhLCdkYXRhLmNzdicpDQpgYGANCg0KDQpgYGB7cn0NCiMjIEVEQQ0KI3NraW1lcj1za2ltcjo6c2tpbShkYXRhKQ0KDQpkYXRhMT1kYXRhICU+JSANCiAgbXV0YXRlKGFjcm9zcyhyb2FzdCwgfiBpZl9lbHNlKC4gPT0gIiIsICJOL0EiLCAuKSkpDQpgYGANCg0KIyMgRGF0YSBwcm9jZXNzaW5nIChsZXZlbCAxKQ0KDQojIyMgQ3JlYXRlIGEgdGFibGUgdGhhdCBzaG93cyBob3cgbWFueSB0aW1lcyBlYWNoIHR5cGUgb2Ygcm9hc3QgaXMgcmVwcmVzZW50ZWQgaW4gdGhlIGRhdGEgc2V0Lg0KYGBge3J9DQoNCnRhYmxlMV9yb2FzdD1kYXRhMSAlPiUgDQogIGZpbHRlcihjb21wbGV0ZS5jYXNlcyhyb2FzdCkpICU+JSANCiAgI211dGF0ZShhY3Jvc3Mocm9hc3QsIH4gaWZfZWxzZSguID09ICIiLCAiTi9BIiwgLikpKSAlPiUgDQogIGdyb3VwX2J5KHJvYXN0KSAlPiUgDQogIHN1bW1hcmlzZShDT1VOVCA9IG4oKSkgJT4lIA0KICBhcnJhbmdlKC1DT1VOVCkNCg0KdGFibGUxX3JvYXN0DQpgYGANCg0KIyMjIENyZWF0ZSBhIHRhYmxlIHRoYXQgc2hvd3MgaG93IG1hbnkgdGltZXMgZWFjaCByb2FzdGVyIGlzIHJlcHJlc2VudGVkIGluIHRoZSBkYXRhIHNldC4NCmBgYHtyfQ0KDQojbGlicmFyeShzdHJpbmdyKQ0KDQp0YWJsZTJfcm9hc3Rlcj1kYXRhMSAlPiUgDQogIGZpbHRlcihjb21wbGV0ZS5jYXNlcyhyb2FzdGVyKSkgJT4lIA0KICBtdXRhdGUocm9hc3RlciA9IGNhc2Vfd2hlbihzdHJfZGV0ZWN0KHJvYXN0ZXIsIlNpbW9uIEhzaWVoIikgfiAiU2ltb24gSHNpZWgiLCBUUlVFfnJvYXN0ZXIpKSU+JQ0KICBncm91cF9ieShyb2FzdGVyKSAlPiUgDQogIHN1bW1hcmlzZShDT1VOVCA9IG4oKSkgJT4lIA0KICBhcnJhbmdlKC1DT1VOVCkNCg0KdGFibGUyX3JvYXN0ZXINCmBgYA0KDQojIyBEZXNjcmlwdGl2ZSBTdGF0aXN0aWNzIChsZXZlbCAyKQ0KDQojIyMgV2hhdCBpcyB0aGUgYXZlcmFnZSBjb3N0IChVU0QpIGZvciAxMDAgZ3JhbXMgb2YgY29mZmVlIHJldmlld2VkIGluIHRoZSBkYXRhIHNldD8NCmBgYHtyfQ0KbWVhbihkYXRhMSRYMTAwZ19VU0QsbmEucm0gPSBUKQ0KIyBBdmVyYWdlIGNvc3QgaXMgJDkuMzIgZm9yIDEwMCBncmFtcyBvZiBjb2ZmZWUNCmBgYA0KDQojIyMgV2hhdCBpcyB0aGUgY29zdCBvZiB0aGUgbGVhc3QgYW5kIG1vc3QgZXhwZW5zaXZlIGNvZmZlZT8NCmBgYHtyfQ0KDQphMT1kYXRhMSAlPiUgDQogIHN1bW1hcml6ZShsZWFzdCA9IG1pbihYMTAwZ19VU0QsbmEucm0gPSBUKSwNCiAgICAgICAgICAgIG1vc3QgPSBtYXgoWDEwMGdfVVNELG5hLnJtID0gVCkpIA0KICANCg0KYTENCiMgTW9zdCBleHBlbnNpdmUgaXMgJDEzMi4yOCBhbmQgbGVhc3QgZXhwZW5zaXZlIGlzICQwLjEyLg0KYGBgDQoNCiMjIyBXaGF0IGlzIHRoZSBhdmVyYWdlIHJhdGluZyBvZiB0aGUgY29mZmVlIHJldmlld2VkIGluIHRoZSBkYXRhc2V0Pw0KYGBge3J9DQoNCm1lYW4oZGF0YTEkcmF0aW5nLG5hLnJtID0gVCkNCiMgQXZlcmFnZSByYXRpbmcgaXMgOTMuMTENCmBgYA0KDQojIyMgV2hhdCBpcyB0aGUgbW9zdCByZWNlaXZlZCByYXRpbmc/DQpgYGB7cn0NCg0KZGF0YTEgJT4lIA0KICBncm91cF9ieShyYXRpbmcpICU+JSANCiAgY291bnQoKSAlPiUgDQogIGFycmFuZ2UoLW4pICU+JSANCiAgaGVhZCg1KQ0KIyBNb3N0IHJlY2VpdmVkIHJhdGluZyBpcyBhIDkzDQpgYGANCg0KIyMgRGF0YSB2aXN1YWxpemF0aW9uIChsZXZlbCAzKQ0KDQojIyMgQ3JlYXRlIGEgdGFibGUgdGhhdCBzaG93cyB0aGUgcm9hc3QgdHlwZSBhbmQgYXZlcmFnZSByYXRpbmdzIGJhc2VkIG9uIHRoZSByb2FzdGVyLg0KYGBge3J9DQpkdjE9ZGF0YTEgJT4lIA0KICBncm91cF9ieShSb2FzdF9UeXBlPXJvYXN0LFJvYXN0ZXI9cm9hc3RlcikgJT4lIA0KICBzdW1tYXJpc2UoQXZlcmFnZV9SYXRpbmc9bWVhbihyYXRpbmcsbmEucm0gPSBUKSkgJT4lIA0KICBhcnJhbmdlKC1BdmVyYWdlX1JhdGluZykNCg0KZHYxDQpgYGANCg0KIyMjIENyZWF0ZSBhIGdyYXBoIHRoYXQgc2hvd3MgdGhlIGNvc3QgKFVTRCkgZm9yIDEwMCBncmFtcyBvZiBjb2ZmZWUgYmFzZWQgb24gdGhlIGJlYW4gb3JpZ2luIChvcmlnaW4gMSkNCmBgYHtyfQ0KZHYyPWRhdGExICU+JSANCiAgZ3JvdXBfYnkob3JpZ2luXzEpICU+JSANCiAgc3VtbWFyaXNlKFJvYXN0X0NvdW50ID0gbigpLA0KICAgICAgICAgICAgQ29zdD1zdW0oWDEwMGdfVVNELG5hLnJtID0gVCkpDQoNCmR2MiAlPiUgDQogIGdncGxvdChhZXMoeT1Db3N0LHg9Um9hc3RfQ291bnQpKSsNCiAgZ2VvbV9wb2ludCgpKw0KICB0aGVtZV9idygpKw0KICBsYWJzKHg9IlJvYXN0IFR5cGVzIGJ5IENvdW50IiwNCiAgICAgICB5PSJDb3N0IGZvciAxMDAgR3JhbXMgb2YgQ29mZmVlIiwNCiAgICAgICB0aXRsZT0iRXNwcmVzc28gWW91cnNlbGY6IFRoZSBDb3N0IG9mIENvZmZlZSIpKw0KICB0aGVtZShwbG90LnRpdGxlID0gZWxlbWVudF90ZXh0KGhqdXN0ID0gLjUpKQ0KYGBgDQoNCiMjIEluZmVyZW50aWFsIHN0YXRpc3RpY3MgKGxldmVsIDQpDQoNCiMjIyBJcyB0aGVyZSBhIHNpZ25pZmljYW50IGRpZmZlcmVuY2UgaW4gdGhlICoqcmF0aW5nKiogYmV0d2VlbiByb2FzdHM/IElmIHNvLCB3aGljaCBvbmUocyk/DQpgYGB7cn0NCnRhYmxlM19zaWdfZGlmZj1kYXRhMSAlPiUNCiAgc2VsZWN0KHJvYXN0LHJhdGluZykgJT4lIA0KICBncm91cF9ieShyb2FzdCkNCg0KYGBgDQoNCg0KYGBge3J9DQojIyBDcmVhdGUgREZzIGZvciByb2FzdA0KdGVzdF9kYXJrID0gdGFibGUzX3NpZ19kaWZmJT4lIA0KICBmaWx0ZXIocm9hc3Q9PSJEYXJrIikNCg0KdGVzdF9OQSA9IHRhYmxlM19zaWdfZGlmZiU+JSANCiAgZmlsdGVyKHJvYXN0PT0iTi9BIikNCg0KdGVzdF9NRCA9IHRhYmxlM19zaWdfZGlmZiU+JSANCiAgZmlsdGVyKHJvYXN0PT0iTWVkaXVtLURhcmsiKQ0KDQp0ZXN0X2xpZ2h0ID0gdGFibGUzX3NpZ19kaWZmJT4lIA0KICBmaWx0ZXIocm9hc3Q9PSJMaWdodCIpDQoNCnRlc3RfbWVkID0gdGFibGUzX3NpZ19kaWZmJT4lIA0KICBmaWx0ZXIocm9hc3Q9PSJNZWRpdW0iKQ0KDQp0ZXN0X01MID0gdGFibGUzX3NpZ19kaWZmJT4lIA0KICBmaWx0ZXIocm9hc3Q9PSJNZWRpdW0tTGlnaHQiKQ0KYGBgDQoNCiMjIE5vcm1hbGl0eSB0ZXN0DQpgYGB7cn0NCg0Kcm9hc3RfdHlwZSA9IGMoIkRhcmsiLCAiTi9BIiwgIk1lZGl1bS1EYXJrIiwgIkxpZ2h0IiwgIk1lZGl1bSIsICJNZWRpdW0tTGlnaHQiKQ0KDQptYXAocm9hc3RfdHlwZSwgfiB7DQogIGZpbHRlcmVkID10YWJsZTNfc2lnX2RpZmYgJT4lDQogICAgZmlsdGVyKHJvYXN0ID09IC54KQ0KICBzaGFwaXJvLnRlc3QoZmlsdGVyZWQkcmF0aW5nKQ0KfSkNCg0KYGBgDQoNCg0KDQpgYGB7ciBpbmNsdWRlPUZBTFNFfQ0KI2JlZXAgd2hlbiBkb25lDQppZiAocmVxdWlyZSgiYmVlcHIiLHF1aWV0bHkgPSBUKSkNCiAgYmVlcHI6OmJlZXAoMikNCmBgYA0K