Start Date: August 30, 2024
Report Date: 30 September 2024
Source: Psi Chi
R
According to the National Day Calendar, National Coffee Day is
September 29th(plus, with back to school in full swing, it seems
appropriate)! This contest won’t keep you working late, we’ll keep it
short and sweet (like espresso).
#library(tidyverse)
library(purrr)
library(stringr)
library(dplyr)
library(ggplot2)
#data = read.csv('https://osf.io/download/jma8e/')
data=read.csv('data.csv')
#write.csv(data,'data.csv')
## EDA
#skimer=skimr::skim(data)
data1=data %>%
mutate(across(roast, ~ if_else(. == "", "N/A", .)))
Data processing (level 1)
Create a table that shows how many times each type
of roast is represented in the data set.
table1_roast=data1 %>%
filter(complete.cases(roast)) %>%
#mutate(across(roast, ~ if_else(. == "", "N/A", .))) %>%
group_by(roast) %>%
summarise(COUNT = n()) %>%
arrange(-COUNT)
table1_roast
Create a table that shows how many times each
roaster is represented in the data set.
#library(stringr)
table2_roaster=data1 %>%
filter(complete.cases(roaster)) %>%
mutate(roaster = case_when(str_detect(roaster,"Simon Hsieh") ~ "Simon Hsieh", TRUE~roaster))%>%
group_by(roaster) %>%
summarise(COUNT = n()) %>%
arrange(-COUNT)
table2_roaster
Descriptive Statistics (level 2)
What is the average cost (USD) for 100 grams of
coffee reviewed in the data set?
mean(data1$X100g_USD,na.rm = T)
## [1] 9.323313
# Average cost is $9.32 for 100 grams of coffee
What is the cost of the least and most expensive
coffee?
a1=data1 %>%
summarize(least = min(X100g_USD,na.rm = T),
most = max(X100g_USD,na.rm = T))
a1
# Most expensive is $132.28 and least expensive is $0.12.
What is the average rating of the coffee reviewed
in the dataset?
mean(data1$rating,na.rm = T)
## [1] 93.11408
# Average rating is 93.11
What is the most received rating?
data1 %>%
group_by(rating) %>%
count() %>%
arrange(-n) %>%
head(5)
# Most received rating is a 93
Data visualization (level 3)
Create a table that shows the roast type and
average ratings based on the roaster.
dv1=data1 %>%
group_by(Roast_Type=roast,Roaster=roaster) %>%
summarise(Average_Rating=mean(rating,na.rm = T)) %>%
arrange(-Average_Rating)
## `summarise()` has grouped output by 'Roast_Type'. You can override using the
## `.groups` argument.
dv1
Create a graph that shows the cost (USD) for 100
grams of coffee based on the bean origin (origin 1)
dv2=data1 %>%
group_by(origin_1) %>%
summarise(Roast_Count = n(),
Cost=sum(X100g_USD,na.rm = T))
dv2 %>%
ggplot(aes(y=Cost,x=Roast_Count))+
geom_point()+
theme_bw()+
labs(x="Roast Types by Count",
y="Cost for 100 Grams of Coffee",
title="Espresso Yourself: The Cost of Coffee")+
theme(plot.title = element_text(hjust = .5))

Inferential statistics (level 4)
Is there a significant difference in the
rating between roasts? If so, which one(s)?
table3_sig_diff=data1 %>%
select(roast,rating) %>%
group_by(roast)
## Create DFs for roast
test_dark = table3_sig_diff%>%
filter(roast=="Dark")
test_NA = table3_sig_diff%>%
filter(roast=="N/A")
test_MD = table3_sig_diff%>%
filter(roast=="Medium-Dark")
test_light = table3_sig_diff%>%
filter(roast=="Light")
test_med = table3_sig_diff%>%
filter(roast=="Medium")
test_ML = table3_sig_diff%>%
filter(roast=="Medium-Light")
Normality test
roast_type = c("Dark", "N/A", "Medium-Dark", "Light", "Medium", "Medium-Light")
map(roast_type, ~ {
filtered =table3_sig_diff %>%
filter(roast == .x)
shapiro.test(filtered$rating)
})
## [[1]]
##
## Shapiro-Wilk normality test
##
## data: filtered$rating
## W = 0.95296, p-value = 0.7583
##
##
## [[2]]
##
## Shapiro-Wilk normality test
##
## data: filtered$rating
## W = 0.7878, p-value = 0.002574
##
##
## [[3]]
##
## Shapiro-Wilk normality test
##
## data: filtered$rating
## W = 0.90034, p-value = 0.002256
##
##
## [[4]]
##
## Shapiro-Wilk normality test
##
## data: filtered$rating
## W = 0.9117, p-value = 5.936e-12
##
##
## [[5]]
##
## Shapiro-Wilk normality test
##
## data: filtered$rating
## W = 0.9497, p-value = 8.715e-08
##
##
## [[6]]
##
## Shapiro-Wilk normality test
##
## data: filtered$rating
## W = 0.94392, p-value < 2.2e-16
LS0tDQp0aXRsZTogIlBzaSBDaGkgUiAtIFNlcHRlbWJlciAyMDI0Ig0KYXV0aG9yOiAiYnkgQWxhbiBMYW0iDQojZGF0ZTogImByIFN5cy5EYXRlKClgIg0KI2RhdGU6ICJEYXRlOiBgciBmb3JtYXQoU3lzLkRhdGUoKSwgJyVkICVCICVZJylgIiANCm91dHB1dDoNCiAgaHRtbF9kb2N1bWVudDoNCiAgICB0aGVtZTogcmVhZGFibGUNCiAgICBhbHdheXNfYWxsb3dfaHRtbDogeWVzDQogICAgZGZfcHJpbnQ6IHBhZ2VkDQogICAgdG9jOiB5ZXMNCiAgICB0b2NfZmxvYXQ6IHllcw0KICAgIG51bWJlcl9zZWN0aW9uczogbm8NCiAgICBhbmNob3Jfc2VjdGlvbnM6IFRSVUUNCiAgICBjb2RlX2ZvbGRpbmc6IGhpZGUNCiAgICBjb2RlX2Rvd25sb2FkOiB0cnVlDQotLS0NClN0YXJ0IERhdGU6IEF1Z3VzdCAzMCwgMjAyNA0KDQpSZXBvcnQgRGF0ZTogYHIgZm9ybWF0KFN5cy5EYXRlKCksICclZCAlQiAlWScpYA0KDQpbKipTb3VyY2UqKjogUHNpIENoaSBSXShodHRwczovL29zZi5pby8zem04dy8pDQoNCkFjY29yZGluZyB0byB0aGUgTmF0aW9uYWwgRGF5IENhbGVuZGFyLCBOYXRpb25hbCBDb2ZmZWUgRGF5IGlzIFNlcHRlbWJlciAyOXRoKHBsdXMsIHdpdGggYmFjayB0byBzY2hvb2wgaW4gZnVsbCBzd2luZywgaXQgc2VlbXMgYXBwcm9wcmlhdGUpISBUaGlzIGNvbnRlc3Qgd29u4oCZdCBrZWVwIHlvdSB3b3JraW5nIGxhdGUsIHdl4oCZbGwga2VlcCBpdCBzaG9ydCBhbmQgc3dlZXQgKGxpa2UgZXNwcmVzc28pLg0KDQpgYGB7ciBzZXR1cCwgd2FybmluZz1GLG1lc3NhZ2U9Rn0NCiNsaWJyYXJ5KHRpZHl2ZXJzZSkNCmxpYnJhcnkocHVycnIpDQpsaWJyYXJ5KHN0cmluZ3IpDQpsaWJyYXJ5KGRwbHlyKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KDQojZGF0YSA9IHJlYWQuY3N2KCdodHRwczovL29zZi5pby9kb3dubG9hZC9qbWE4ZS8nKQ0KZGF0YT1yZWFkLmNzdignZGF0YS5jc3YnKQ0KI3dyaXRlLmNzdihkYXRhLCdkYXRhLmNzdicpDQpgYGANCg0KDQpgYGB7cn0NCiMjIEVEQQ0KI3NraW1lcj1za2ltcjo6c2tpbShkYXRhKQ0KDQpkYXRhMT1kYXRhICU+JSANCiAgbXV0YXRlKGFjcm9zcyhyb2FzdCwgfiBpZl9lbHNlKC4gPT0gIiIsICJOL0EiLCAuKSkpDQpgYGANCg0KIyMgRGF0YSBwcm9jZXNzaW5nIChsZXZlbCAxKQ0KDQojIyMgQ3JlYXRlIGEgdGFibGUgdGhhdCBzaG93cyBob3cgbWFueSB0aW1lcyBlYWNoIHR5cGUgb2Ygcm9hc3QgaXMgcmVwcmVzZW50ZWQgaW4gdGhlIGRhdGEgc2V0Lg0KYGBge3J9DQoNCnRhYmxlMV9yb2FzdD1kYXRhMSAlPiUgDQogIGZpbHRlcihjb21wbGV0ZS5jYXNlcyhyb2FzdCkpICU+JSANCiAgI211dGF0ZShhY3Jvc3Mocm9hc3QsIH4gaWZfZWxzZSguID09ICIiLCAiTi9BIiwgLikpKSAlPiUgDQogIGdyb3VwX2J5KHJvYXN0KSAlPiUgDQogIHN1bW1hcmlzZShDT1VOVCA9IG4oKSkgJT4lIA0KICBhcnJhbmdlKC1DT1VOVCkNCg0KdGFibGUxX3JvYXN0DQpgYGANCg0KIyMjIENyZWF0ZSBhIHRhYmxlIHRoYXQgc2hvd3MgaG93IG1hbnkgdGltZXMgZWFjaCByb2FzdGVyIGlzIHJlcHJlc2VudGVkIGluIHRoZSBkYXRhIHNldC4NCmBgYHtyfQ0KDQojbGlicmFyeShzdHJpbmdyKQ0KDQp0YWJsZTJfcm9hc3Rlcj1kYXRhMSAlPiUgDQogIGZpbHRlcihjb21wbGV0ZS5jYXNlcyhyb2FzdGVyKSkgJT4lIA0KICBtdXRhdGUocm9hc3RlciA9IGNhc2Vfd2hlbihzdHJfZGV0ZWN0KHJvYXN0ZXIsIlNpbW9uIEhzaWVoIikgfiAiU2ltb24gSHNpZWgiLCBUUlVFfnJvYXN0ZXIpKSU+JQ0KICBncm91cF9ieShyb2FzdGVyKSAlPiUgDQogIHN1bW1hcmlzZShDT1VOVCA9IG4oKSkgJT4lIA0KICBhcnJhbmdlKC1DT1VOVCkNCg0KdGFibGUyX3JvYXN0ZXINCmBgYA0KDQojIyBEZXNjcmlwdGl2ZSBTdGF0aXN0aWNzIChsZXZlbCAyKQ0KDQojIyMgV2hhdCBpcyB0aGUgYXZlcmFnZSBjb3N0IChVU0QpIGZvciAxMDAgZ3JhbXMgb2YgY29mZmVlIHJldmlld2VkIGluIHRoZSBkYXRhIHNldD8NCmBgYHtyfQ0KbWVhbihkYXRhMSRYMTAwZ19VU0QsbmEucm0gPSBUKQ0KIyBBdmVyYWdlIGNvc3QgaXMgJDkuMzIgZm9yIDEwMCBncmFtcyBvZiBjb2ZmZWUNCmBgYA0KDQojIyMgV2hhdCBpcyB0aGUgY29zdCBvZiB0aGUgbGVhc3QgYW5kIG1vc3QgZXhwZW5zaXZlIGNvZmZlZT8NCmBgYHtyfQ0KDQphMT1kYXRhMSAlPiUgDQogIHN1bW1hcml6ZShsZWFzdCA9IG1pbihYMTAwZ19VU0QsbmEucm0gPSBUKSwNCiAgICAgICAgICAgIG1vc3QgPSBtYXgoWDEwMGdfVVNELG5hLnJtID0gVCkpIA0KICANCg0KYTENCiMgTW9zdCBleHBlbnNpdmUgaXMgJDEzMi4yOCBhbmQgbGVhc3QgZXhwZW5zaXZlIGlzICQwLjEyLg0KYGBgDQoNCiMjIyBXaGF0IGlzIHRoZSBhdmVyYWdlIHJhdGluZyBvZiB0aGUgY29mZmVlIHJldmlld2VkIGluIHRoZSBkYXRhc2V0Pw0KYGBge3J9DQoNCm1lYW4oZGF0YTEkcmF0aW5nLG5hLnJtID0gVCkNCiMgQXZlcmFnZSByYXRpbmcgaXMgOTMuMTENCmBgYA0KDQojIyMgV2hhdCBpcyB0aGUgbW9zdCByZWNlaXZlZCByYXRpbmc/DQpgYGB7cn0NCg0KZGF0YTEgJT4lIA0KICBncm91cF9ieShyYXRpbmcpICU+JSANCiAgY291bnQoKSAlPiUgDQogIGFycmFuZ2UoLW4pICU+JSANCiAgaGVhZCg1KQ0KIyBNb3N0IHJlY2VpdmVkIHJhdGluZyBpcyBhIDkzDQpgYGANCg0KIyMgRGF0YSB2aXN1YWxpemF0aW9uIChsZXZlbCAzKQ0KDQojIyMgQ3JlYXRlIGEgdGFibGUgdGhhdCBzaG93cyB0aGUgcm9hc3QgdHlwZSBhbmQgYXZlcmFnZSByYXRpbmdzIGJhc2VkIG9uIHRoZSByb2FzdGVyLg0KYGBge3J9DQpkdjE9ZGF0YTEgJT4lIA0KICBncm91cF9ieShSb2FzdF9UeXBlPXJvYXN0LFJvYXN0ZXI9cm9hc3RlcikgJT4lIA0KICBzdW1tYXJpc2UoQXZlcmFnZV9SYXRpbmc9bWVhbihyYXRpbmcsbmEucm0gPSBUKSkgJT4lIA0KICBhcnJhbmdlKC1BdmVyYWdlX1JhdGluZykNCg0KZHYxDQpgYGANCg0KIyMjIENyZWF0ZSBhIGdyYXBoIHRoYXQgc2hvd3MgdGhlIGNvc3QgKFVTRCkgZm9yIDEwMCBncmFtcyBvZiBjb2ZmZWUgYmFzZWQgb24gdGhlIGJlYW4gb3JpZ2luIChvcmlnaW4gMSkNCmBgYHtyfQ0KZHYyPWRhdGExICU+JSANCiAgZ3JvdXBfYnkob3JpZ2luXzEpICU+JSANCiAgc3VtbWFyaXNlKFJvYXN0X0NvdW50ID0gbigpLA0KICAgICAgICAgICAgQ29zdD1zdW0oWDEwMGdfVVNELG5hLnJtID0gVCkpDQoNCmR2MiAlPiUgDQogIGdncGxvdChhZXMoeT1Db3N0LHg9Um9hc3RfQ291bnQpKSsNCiAgZ2VvbV9wb2ludCgpKw0KICB0aGVtZV9idygpKw0KICBsYWJzKHg9IlJvYXN0IFR5cGVzIGJ5IENvdW50IiwNCiAgICAgICB5PSJDb3N0IGZvciAxMDAgR3JhbXMgb2YgQ29mZmVlIiwNCiAgICAgICB0aXRsZT0iRXNwcmVzc28gWW91cnNlbGY6IFRoZSBDb3N0IG9mIENvZmZlZSIpKw0KICB0aGVtZShwbG90LnRpdGxlID0gZWxlbWVudF90ZXh0KGhqdXN0ID0gLjUpKQ0KYGBgDQoNCiMjIEluZmVyZW50aWFsIHN0YXRpc3RpY3MgKGxldmVsIDQpDQoNCiMjIyBJcyB0aGVyZSBhIHNpZ25pZmljYW50IGRpZmZlcmVuY2UgaW4gdGhlICoqcmF0aW5nKiogYmV0d2VlbiByb2FzdHM/IElmIHNvLCB3aGljaCBvbmUocyk/DQpgYGB7cn0NCnRhYmxlM19zaWdfZGlmZj1kYXRhMSAlPiUNCiAgc2VsZWN0KHJvYXN0LHJhdGluZykgJT4lIA0KICBncm91cF9ieShyb2FzdCkNCg0KYGBgDQoNCg0KYGBge3J9DQojIyBDcmVhdGUgREZzIGZvciByb2FzdA0KdGVzdF9kYXJrID0gdGFibGUzX3NpZ19kaWZmJT4lIA0KICBmaWx0ZXIocm9hc3Q9PSJEYXJrIikNCg0KdGVzdF9OQSA9IHRhYmxlM19zaWdfZGlmZiU+JSANCiAgZmlsdGVyKHJvYXN0PT0iTi9BIikNCg0KdGVzdF9NRCA9IHRhYmxlM19zaWdfZGlmZiU+JSANCiAgZmlsdGVyKHJvYXN0PT0iTWVkaXVtLURhcmsiKQ0KDQp0ZXN0X2xpZ2h0ID0gdGFibGUzX3NpZ19kaWZmJT4lIA0KICBmaWx0ZXIocm9hc3Q9PSJMaWdodCIpDQoNCnRlc3RfbWVkID0gdGFibGUzX3NpZ19kaWZmJT4lIA0KICBmaWx0ZXIocm9hc3Q9PSJNZWRpdW0iKQ0KDQp0ZXN0X01MID0gdGFibGUzX3NpZ19kaWZmJT4lIA0KICBmaWx0ZXIocm9hc3Q9PSJNZWRpdW0tTGlnaHQiKQ0KYGBgDQoNCiMjIE5vcm1hbGl0eSB0ZXN0DQpgYGB7cn0NCg0Kcm9hc3RfdHlwZSA9IGMoIkRhcmsiLCAiTi9BIiwgIk1lZGl1bS1EYXJrIiwgIkxpZ2h0IiwgIk1lZGl1bSIsICJNZWRpdW0tTGlnaHQiKQ0KDQptYXAocm9hc3RfdHlwZSwgfiB7DQogIGZpbHRlcmVkID10YWJsZTNfc2lnX2RpZmYgJT4lDQogICAgZmlsdGVyKHJvYXN0ID09IC54KQ0KICBzaGFwaXJvLnRlc3QoZmlsdGVyZWQkcmF0aW5nKQ0KfSkNCg0KYGBgDQoNCg0KDQpgYGB7ciBpbmNsdWRlPUZBTFNFfQ0KI2JlZXAgd2hlbiBkb25lDQppZiAocmVxdWlyZSgiYmVlcHIiLHF1aWV0bHkgPSBUKSkNCiAgYmVlcHI6OmJlZXAoMikNCmBgYA0K