library(here)
## here() starts at /Users/ying/Desktop
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.1.2
## Warning: package 'tibble' was built under R version 4.1.2
## Warning: package 'tidyr' was built under R version 4.1.2
## Warning: package 'readr' was built under R version 4.1.2
## Warning: package 'dplyr' was built under R version 4.1.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
if(!dir.exists(here("data1"))) { dir.create(here("data1")) }
# saves data only once (not each time you knit a R Markdown)
if(!file.exists(here("data1","chocolate.RDS"))) {
url_csv <- 'https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-01-18/chocolate.csv'
chocolate <- readr::read_csv(url_csv)
# save the file to RDS objects
saveRDS(chocolate, file= here("data1","chocolate.RDS"))
}
chocolate <- readRDS(here("data1","chocolate.RDS"))
as_tibble(chocolate)
## # A tibble: 2,530 × 10
## ref compan…¹ compa…² revie…³ count…⁴ speci…⁵ cocoa…⁶ ingre…⁷ most_…⁸ rating
## <dbl> <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 2454 5150 U.S.A. 2019 Tanzan… Kokoa … 76% 3- B,S… rich c… 3.25
## 2 2458 5150 U.S.A. 2019 Domini… Zorzal… 76% 3- B,S… cocoa,… 3.5
## 3 2454 5150 U.S.A. 2019 Madaga… Bejofo… 76% 3- B,S… cocoa,… 3.75
## 4 2542 5150 U.S.A. 2021 Fiji Matasa… 68% 3- B,S… chewy,… 3
## 5 2546 5150 U.S.A. 2021 Venezu… Sur de… 72% 3- B,S… fatty,… 3
## 6 2546 5150 U.S.A. 2021 Uganda Semuli… 80% 3- B,S… mildly… 3.25
## 7 2542 5150 U.S.A. 2021 India Anamal… 68% 3- B,S… milk b… 3.5
## 8 797 A. Morin France 2012 Bolivia Bolivia 70% 4- B,S… vegeta… 3.5
## 9 797 A. Morin France 2012 Peru Peru 63% 4- B,S… fruity… 3.75
## 10 1011 A. Morin France 2013 Panama Panama 70% 4- B,S… brief … 2.75
## # … with 2,520 more rows, and abbreviated variable names ¹company_manufacturer,
## # ²company_location, ³review_date, ⁴country_of_bean_origin,
## # ⁵specific_bean_origin_or_bar_name, ⁶cocoa_percent, ⁷ingredients,
## # ⁸most_memorable_characteristics
ggplot(chocolate, aes(x=rating)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
### change the number of bins to the perfect 25
ggplot(chocolate, aes(x=rating)) +
geom_histogram(bins=25)
When I change the number of bins, the width of each bin changes and the graph looks more spread out as the number increases. 25 is pretty good cuz the graph looks nice and clean.
chocolate %>%
group_by(country_of_bean_origin) %>%
summarize(n = n())
## # A tibble: 62 × 2
## country_of_bean_origin n
## <chr> <int>
## 1 Australia 3
## 2 Belize 76
## 3 Blend 156
## 4 Bolivia 80
## 5 Brazil 78
## 6 Burma 1
## 7 Cameroon 3
## 8 China 1
## 9 Colombia 79
## 10 Congo 11
## # … with 52 more rows
chocolate %>%
filter(country_of_bean_origin == "Ecuador") %>%
summarise(total_number = n(), average = mean(rating), standard_deviation = sd(rating))
## # A tibble: 1 × 3
## total_number average standard_deviation
## <int> <dbl> <dbl>
## 1 219 3.16 0.512
chocolate %>%
filter(country_of_bean_origin == "Ecuador") %>%
group_by(company_location) %>%
summarise(ave_rating = mean(rating)) %>%
arrange(desc(ave_rating))
## # A tibble: 25 × 2
## company_location ave_rating
## <chr> <dbl>
## 1 Australia 3.81
## 2 Switzerland 3.75
## 3 New Zealand 3.62
## 4 Hungary 3.5
## 5 Israel 3.5
## 6 Netherlands 3.5
## 7 Singapore 3.5
## 8 South Korea 3.33
## 9 Canada 3.28
## 10 U.S.A. 3.26
## # … with 15 more rows
And thus, Australia makes the best chocolate with beans from Ecuador
chocolate %>%
group_by(country_of_bean_origin) %>%
summarise(average_rating = mean(rating)) %>%
arrange(desc(average_rating))
## # A tibble: 62 × 2
## country_of_bean_origin average_rating
## <chr> <dbl>
## 1 Tobago 3.62
## 2 China 3.5
## 3 Sao Tome & Principe 3.5
## 4 Solomon Islands 3.45
## 5 Congo 3.32
## 6 Thailand 3.3
## 7 Cuba 3.29
## 8 Vietnam 3.29
## 9 Papua New Guinea 3.28
## 10 Madagascar 3.27
## # … with 52 more rows
So, Tobago, China, and Sao Tome & Principe are the top 3 countries that have the highest ratings on average
tb <- chocolate %>%
group_by(country_of_bean_origin) %>%
summarise(total_bars = n())
tb <- as_tibble(tb)
master <- left_join(x = chocolate, y = tb, by = "country_of_bean_origin")
view(master)
master %>%
filter(total_bars >= 10) %>%
group_by(country_of_bean_origin) %>%
summarise(average_rating = mean(rating)) %>%
arrange(desc(average_rating))
## # A tibble: 35 × 2
## country_of_bean_origin average_rating
## <chr> <dbl>
## 1 Solomon Islands 3.45
## 2 Congo 3.32
## 3 Cuba 3.29
## 4 Vietnam 3.29
## 5 Papua New Guinea 3.28
## 6 Madagascar 3.27
## 7 Haiti 3.27
## 8 Brazil 3.26
## 9 Guatemala 3.26
## 10 Nicaragua 3.26
## # … with 25 more rows
Therefore, Solomon Islands, Congo, and Cuba are the top 3 countries that have the highest ratings on average among countries with 10 more bars reviewed
over_50 <- master %>%
filter(total_bars > 50)
substr(over_50$cocoa_percent, start = 1, stop = 2)
## [1] "76" "76" "76" "72" "70" "63" "70" "70" "70" "70" "70" "70" "70" "63"
## [15] "70" "70" "70" "70" "70" "70" "63" "70" "70" "70" "74" "70" "55" "70"
## [29] "70" "75" "75" "75" "75" "75" "75" "75" "70" "70" "70" "70" "60" "60"
## [43] "60" "80" "60" "60" "60" "60" "60" "60" "70" "70" "70" "70" "70" "70"
## [57] "70" "70" "70" "85" "70" "70" "72" "73" "64" "66" "75" "70" "68" "66"
## [71] "70" "63" "70" "70" "70" "70" "70" "75" "70" "85" "50" "75" "60" "75"
## [85] "75" "75" "75" "75" "72" "75" "75" "75" "75" "70" "70" "68" "70" "75"
## [99] "70" "70" "70" "70" "70" "73" "73" "70" "70" "70" "70" "70" "70" "73"
## [113] "70" "70" "70" "70" "70" "70" "70" "72" "75" "80" "75" "72" "10" "72"
## [127] "72" "72" "72" "72" "75" "70" "72" "68" "70" "70" "70" "70" "70" "80"
## [141] "70" "70" "70" "72" "72" "70" "70" "70" "70" "70" "65" "65" "70" "70"
## [155] "80" "70" "70" "90" "64" "71" "70" "70" "70" "75" "70" "83" "78" "83"
## [169] "72" "72" "74" "74" "73" "55" "70" "74" "72" "64" "88" "72" "72" "76"
## [183] "86" "70" "70" "70" "71" "68" "75" "75" "65" "65" "70" "78" "70" "70"
## [197] "70" "70" "70" "70" "70" "70" "72" "72" "72" "70" "70" "75" "75" "82"
## [211] "75" "70" "10" "75" "75" "75" "75" "75" "75" "75" "75" "75" "75" "75"
## [225] "75" "75" "75" "75" "75" "75" "65" "75" "75" "75" "75" "75" "10" "77"
## [239] "70" "70" "75" "70" "70" "62" "72" "70" "85" "70" "70" "68" "70" "70"
## [253] "70" "75" "80" "60" "80" "80" "60" "70" "72" "70" "72" "70" "68" "70"
## [267] "70" "60" "72" "72" "72" "70" "70" "70" "70" "75" "75" "75" "75" "72"
## [281] "66" "75" "70" "72" "72" "72" "75" "74" "75" "70" "74" "75" "77" "75"
## [295] "75" "70" "70" "69" "70" "64" "70" "72" "71" "74" "72" "65" "66" "72"
## [309] "70" "70" "71" "71" "77" "71" "72" "72" "72" "70" "70" "70" "70" "70"
## [323] "70" "70" "80" "70" "91" "82" "63" "71" "70" "80" "10" "72" "70" "75"
## [337] "73" "70" "75" "70" "70" "70" "70" "70" "77" "77" "77" "55" "55" "55"
## [351] "70" "70" "70" "70" "72" "65" "65" "65" "65" "65" "75" "71" "65" "70"
## [365] "70" "70" "72" "70" "72" "73" "70" "72" "76" "72" "70" "72" "70" "70"
## [379] "70" "72" "70" "70" "70" "70" "72" "70" "70" "72" "74" "68" "70" "75"
## [393] "55" "72" "72" "83" "69" "70" "70" "75" "82" "70" "72" "71" "65" "80"
## [407] "65" "74" "70" "68" "90" "80" "70" "80" "60" "75" "80" "70" "74" "75"
## [421] "80" "82" "68" "72" "70" "69" "80" "70" "72" "70" "76" "70" "77" "65"
## [435] "75" "70" "73" "70" "71" "70" "71" "60" "75" "71" "71" "65" "75" "80"
## [449] "70" "70" "80" "70" "70" "75" "70" "67" "75" "75" "70" "75" "72" "73"
## [463] "72" "72" "70" "68" "61" "77" "10" "80" "75" "73" "70" "73" "70" "68"
## [477] "77" "70" "80" "88" "72" "70" "70" "76" "78" "68" "75" "72" "72" "72"
## [491] "72" "62" "70" "70" "72" "70" "72" "10" "70" "72" "65" "70" "75" "65"
## [505] "85" "73" "70" "70" "70" "70" "75" "70" "73" "72" "70" "70" "65" "72"
## [519] "70" "70" "70" "70" "70" "70" "70" "70" "70" "70" "70" "70" "70" "70"
## [533] "70" "70" "70" "70" "70" "70" "75" "60" "60" "75" "70" "70" "70" "70"
## [547] "82" "72" "72" "70" "72" "70" "70" "70" "70" "70" "75" "58" "70" "70"
## [561] "72" "65" "70" "66" "71" "64" "66" "70" "70" "80" "72" "72" "77" "80"
## [575] "80" "75" "74" "70" "75" "70" "76" "74" "70" "78" "70" "75" "72" "72"
## [589] "73" "72" "72" "72" "85" "70" "88" "70" "70" "70" "75" "60" "70" "70"
## [603] "70" "70" "70" "70" "70" "78" "70" "70" "70" "70" "70" "10" "70" "70"
## [617] "70" "70" "80" "77" "72" "80" "78" "75" "70" "75" "70" "72" "70" "71"
## [631] "65" "71" "72" "70" "76" "70" "80" "70" "70" "82" "70" "70" "70" "70"
## [645] "70" "70" "70" "70" "70" "70" "70" "72" "80" "68" "70" "70" "71" "75"
## [659] "70" "74" "58" "61" "60" "70" "70" "70" "79" "78" "70" "70" "70" "72"
## [673] "80" "80" "80" "70" "74" "72" "74" "81" "70" "65" "70" "55" "70" "62"
## [687] "62" "80" "80" "80" "80" "73" "75" "76" "76" "62" "64" "65" "74" "72"
## [701] "68" "85" "80" "70" "70" "70" "70" "75" "75" "75" "75" "70" "70" "72"
## [715] "70" "80" "70" "75" "67" "72" "70" "70" "70" "70" "70" "70" "70" "70"
## [729] "60" "70" "70" "70" "60" "70" "85" "70" "72" "65" "80" "75" "70" "65"
## [743] "81" "70" "66" "66" "68" "73" "70" "65" "74" "74" "72" "76" "72" "70"
## [757] "74" "72" "72" "70" "70" "70" "70" "70" "70" "70" "70" "70" "72" "72"
## [771] "74" "74" "70" "70" "70" "68" "70" "70" "70" "70" "70" "70" "70" "60"
## [785] "70" "70" "70" "68" "70" "70" "70" "70" "70" "70" "70" "70" "70" "70"
## [799] "70" "70" "70" "70" "70" "60" "85" "70" "75" "70" "76" "74" "85" "74"
## [813] "77" "68" "72" "70" "70" "70" "65" "70" "80" "75" "75" "72" "75" "66"
## [827] "70" "77" "77" "77" "70" "73" "70" "70" "70" "70" "72" "72" "70" "75"
## [841] "73" "70" "70" "73" "64" "64" "76" "64" "70" "65" "65" "61" "65" "65"
## [855] "61" "72" "65" "64" "65" "91" "70" "72" "61" "72" "65" "65" "70" "10"
## [869] "74" "65" "70" "80" "75" "70" "77" "55" "70" "70" "67" "77" "74" "85"
## [883] "75" "70" "65" "72" "70" "70" "64" "71" "71" "70" "67" "67" "67" "70"
## [897] "77" "68" "72" "74" "70" "70" "70" "74" "70" "73" "70" "70" "73" "74"
## [911] "70" "71" "70" "72" "75" "66" "68" "85" "80" "80" "72" "58" "58" "70"
## [925] "70" "75" "70" "70" "70" "70" "72" "75" "80" "75" "70" "70" "70" "80"
## [939] "66" "10" "10" "10" "90" "72" "80" "82" "70" "70" "70" "70" "70" "70"
## [953] "70" "70" "72" "72" "72" "72" "72" "74" "70" "72" "72" "72" "72" "72"
## [967] "72" "72" "72" "72" "72" "70" "70" "72" "75" "72" "65" "70" "68" "78"
## [981] "73" "63" "75" "82" "55" "62" "70" "68" "70" "67" "70" "72" "71" "68"
## [995] "65" "75" "75" "75" "64" "85" "75" "75" "85" "70" "85" "85" "70" "70"
## [1009] "80" "70" "72" "70" "60" "70" "70" "70" "70" "70" "70" "72" "72" "72"
## [1023] "72" "90" "72" "72" "70" "70" "70" "72" "72" "72" "68" "64" "71" "74"
## [1037] "74" "74" "68" "70" "70" "70" "70" "70" "70" "75" "70" "70" "70" "70"
## [1051] "72" "74" "60" "69" "60" "66" "74" "56" "72" "60" "70" "70" "70" "68"
## [1065] "75" "75" "75" "75" "75" "70" "75" "75" "75" "72" "80" "71" "73" "74"
## [1079] "70" "70" "70" "70" "70" "70" "70" "70" "70" "71" "70" "75" "74" "65"
## [1093] "68" "68" "68" "85" "72" "70" "75" "75" "75" "80" "78" "81" "70" "70"
## [1107] "46" "60" "58" "65" "73" "75" "55" "70" "67" "75" "70" "63" "70" "72"
## [1121] "70" "70" "70" "75" "70" "70" "70" "75" "73" "70" "74" "74" "74" "74"
## [1135] "70" "72" "70" "70" "70" "85" "70" "70" "70" "75" "72" "70" "62" "70"
## [1149] "70" "68" "75" "73" "72" "72" "72" "70" "80" "70" "72" "72" "72" "75"
## [1163] "80" "76" "72" "70" "78" "74" "75" "85" "68" "70" "75" "74" "89" "76"
## [1177] "74" "81" "75" "70" "81" "76" "70" "75" "73" "72" "70" "74" "72" "73"
## [1191] "73" "75" "76" "74" "70" "68" "70" "70" "82" "63" "75" "65" "72" "70"
## [1205] "70" "70" "70" "70" "70" "70" "85" "70" "70" "80" "72" "70" "80" "75"
## [1219] "70" "72" "70" "70" "60" "85" "99" "67" "65" "66" "66" "69" "70" "70"
## [1233] "65" "75" "75" "80" "75" "80" "70" "75" "75" "70" "70" "70" "70" "70"
## [1247] "70" "76" "90" "70" "70" "70" "70" "77" "67" "72" "65" "72" "70" "70"
## [1261] "75" "65" "67" "72" "73" "72" "70" "72" "70" "70" "70" "70" "70" "70"
## [1275] "70" "72" "70" "72" "75" "70" "63" "85" "70" "70" "80" "70" "71" "68"
## [1289] "68" "80" "70" "78" "75" "70" "70" "67" "72" "70" "73" "70" "73" "71"
## [1303] "73" "72" "72" "72" "72" "72" "75" "75" "72" "70" "72" "72" "72" "72"
## [1317] "75" "75" "70" "70" "75" "70" "80" "70" "70" "70" "70" "70" "70" "70"
## [1331] "70" "75" "70" "70" "70" "70" "70" "10" "80" "70" "70" "70" "70" "78"
## [1345] "67" "75" "72" "76" "76" "85" "85" "66" "70" "73" "70" "70" "70" "75"
## [1359] "70" "75" "75" "74" "72" "72" "72" "72" "75" "68" "66" "80" "65" "72"
## [1373] "10" "60" "72" "65" "70" "70" "85" "70" "70" "70" "70" "70" "70" "72"
## [1387] "72" "75" "75" "78" "70" "72" "72" "70" "72" "72" "75" "70" "76" "75"
## [1401] "76" "76" "71" "65" "70" "70" "70" "70" "75" "75" "75" "55" "70" "70"
## [1415] "67" "70" "75" "70" "67" "73" "64" "82" "70" "72" "72" "85" "72" "70"
## [1429] "72" "72" "72" "75" "85" "76" "70" "70" "73" "73" "73" "62" "80" "80"
## [1443] "80" "80" "70" "70" "70" "76" "82" "76" "70" "70" "70" "70" "70" "70"
## [1457] "75" "75" "75" "80" "75" "10" "75" "75" "75" "75" "75" "75" "75" "75"
## [1471] "75" "75" "75" "75" "70" "75" "85" "75" "72" "74" "77" "73" "73" "75"
## [1485] "65" "60" "65" "55" "75" "80" "85" "70" "70" "70" "70" "70" "70" "74"
## [1499] "85" "75" "70" "70" "75" "70" "80" "73" "70" "75" "72" "72" "72" "72"
## [1513] "72" "70" "75" "67" "75" "75" "75" "75" "75" "75" "75" "85" "70" "75"
## [1527] "75" "70" "75" "72" "70" "75" "68" "70" "70" "70" "75" "75" "75" "70"
## [1541] "70" "75" "80" "75" "70" "71" "73" "70" "76" "84" "72" "73" "72" "70"
## [1555] "72" "10" "70" "64" "72" "72" "83" "69" "70" "70" "70" "70" "70" "53"
## [1569] "65" "70" "75" "70" "82" "68" "62" "70" "72" "62" "75" "75" "72" "68"
## [1583] "72" "65" "78" "70" "70" "70" "70" "70" "70" "70" "70" "75" "72" "77"
## [1597] "74" "75" "70" "70" "67" "68" "60" "67" "73" "73" "10" "10" "73" "73"
## [1611] "73" "73" "73" "73" "73" "72" "72" "67" "65" "70" "67" "70" "72" "70"
## [1625] "70" "70" "70" "70" "70" "68" "68" "72" "70" "85" "68" "70" "70" "70"
## [1639] "70" "70" "70" "70" "70" "70" "70" "70" "70" "70" "70" "70" "70" "64"
## [1653] "70" "70" "70" "70" "70" "70" "70" "70" "77" "70" "88" "70" "67" "70"
## [1667] "70" "70" "80" "70" "70" "75" "70" "62" "70" "70" "70" "75" "70" "70"
## [1681] "70" "70" "62" "70" "70" "70" "70" "70" "75" "70" "75" "80" "70" "80"
## [1695] "75" "70" "70" "70" "70" "70" "70" "70" "70" "70" "84" "70" "70" "70"
## [1709] "70" "70" "70" "68" "84" "70" "70" "67" "78" "61" "71" "70" "60" "74"
## [1723] "72" "70" "75" "70" "70" "75" "74" "68" "70" "70" "70" "70" "70" "70"
## [1737] "70" "70" "70" "70" "70" "70" "70" "70" "70" "70" "70" "60" "70" "75"
## [1751] "75" "75" "75" "75" "72" "70" "70" "75" "87" "68" "70" "70" "70" "60"
## [1765] "68" "99" "62" "70" "70" "70" "70" "70" "70" "80" "70" "70" "70" "67"
## [1779] "70" "70" "77" "67" "70" "70" "70" "70" "70" "70" "77" "77" "70" "91"
## [1793] "75" "65" "70" "70" "70" "70" "72" "71" "75" "81" "72" "74" "77" "70"
## [1807] "70" "70" "70" "72" "70" "70" "70" "70" "73" "70" "70" "70" "72" "73"
## [1821] "75" "68" "75" "75" "75" "75" "60" "63" "75" "75" "75" "74" "70" "72"
## [1835] "72" "65" "70" "70" "74" "66" "64" "64" "70" "85" "85" "71" "72" "64"
## [1849] "65" "64" "64" "66" "56" "64" "69" "70" "64" "63" "60" "72" "76" "80"
## [1863] "70" "68" "64" "70" "70" "70" "70" "70" "70" "90" "90" "75" "75" "75"
## [1877] "70" "75" "70" "75" "10" "90" "75" "65" "77" "77" "77" "77" "77" "70"
## [1891] "70" "70" "70" "70" "70" "68" "72" "75" "89" "75" "75" "70" "72" "71"
## [1905] "72" "88" "70" "74" "76" "68" "70" "70" "70" "70" "70" "70" "70" "70"
## [1919] "70" "66" "73" "70" "72" "70" "70" "70" "70" "70" "60" "70" "70" "70"
## [1933] "70" "70" "85" "70" "70" "68" "72" "65" "72" "75" "70" "65" "70" "70"
## [1947] "75" "90" "65" "70" "58" "62" "70" "80" "75" "75" "72"
over_50 <- over_50 %>%
mutate(percentage_group = case_when(
cocoa_percent < 60 ~ "1",
cocoa_percent >= 60 & cocoa_percent < 70 ~ "2",
cocoa_percent >= 70 & cocoa_percent < 90 ~ "3",
cocoa_percent >= 60 ~ "4"))
over_50 <- over_50 %>%
arrange(percentage_group)
over_50_1 <- over_50 %>%
group_by(country_of_bean_origin, percentage_group) %>%
summarise(ave_rating = mean(rating))
## `summarise()` has grouped output by 'country_of_bean_origin'. You can override
## using the `.groups` argument.
over_50_1 <- as_tibble(over_50_1)
ggplot(over_50_1, aes(x=percentage_group, y=ave_rating, fill = percentage_group)) +
geom_bar(stat = "identity") +
facet_grid(. ~ country_of_bean_origin) +
labs(title = "Rating and Cocoa Percent")
The faceted graph shows that on average, 60 %- 70% of cocoa is most highly rated, although there are disagreements among countries.
library(gapminder)
gm <-gapminder %>%
mutate(country_of_bean_origin = country) %>%
distinct(continent, country_of_bean_origin, .keep_all = T)
master2 <- left_join(x=master, y=gm, by="country_of_bean_origin")
master2 <- master2[c(1:11, 13)]
master3 <- master2 %>%
filter(total_bars >= 10 & country_of_bean_origin != "Blend")
missing_continent <-
tibble(country_of_bean_origin = c("Fiji", "Papua New Guinea", "Sao Tome", "Vanuatu", "Trinidad", "Belize", "Grenada",
"Congo", "Solomon Islands", "St. Lucia", "U.S.A."),
continent = c("Oceania", "Oceania", "Africa", "Oceania", "Americas", "Americas", "Americas", "Africa", "Oceania",
"Americas", "Americas"))
master4 <- left_join(x=master3, y=missing_continent, by="country_of_bean_origin") %>%
transmute(master3, continent = coalesce(continent.x, continent.y))
which(is.na(master4$continent))
## integer(0)
ggplot(master4, aes(x=continent, y=rating))+
geom_violin()
## Solution to Part 3
library(stringr)
new1 <- chocolate %>%
mutate(beans = case_when(str_detect(ingredients, "B") == T ~ "1",
))
new <- chocolate %>%
mutate(beans = ifelse(grepl("B", ingredients), "1", "0")) %>%
mutate(sugar = ifelse(grepl("S", ingredients), "1", "0")) %>%
mutate(cocoa_butter = ifelse(grepl("C", ingredients), "1", "0")) %>%
mutate(vanilla = ifelse(grepl("V", ingredients), "1", "0")) %>%
mutate(letchin = ifelse(grepl("L", ingredients), "1", "0")) %>%
mutate(salt = ifelse(grepl("Sa", ingredients), "1", "0")) %>%
mutate(char_cocoa = ifelse(grepl("cocoa", most_memorable_characteristics), "1", "0")) %>%
mutate(char_sweet = ifelse(grepl("sweet", most_memorable_characteristics), "1", "0")) %>%
mutate(char_nutty = ifelse(grepl("nutty", most_memorable_characteristics), "1", "0")) %>%
mutate(char_creamy = ifelse(grepl("creamy", most_memorable_characteristics), "1", "0")) %>%
mutate(char_roasty = ifelse(grepl("roasty", most_memorable_characteristics), "1", "0")) %>%
mutate(char_earthy = ifelse(grepl("earthy", most_memorable_characteristics), "1", "0"))
new[11:22] = sapply(new[11:22], as.numeric)
new1 <- new %>%
group_by(review_date) %>%
select(11:22) %>%
summarise(across(everything(), mean))
## Adding missing grouping variables: `review_date`
new1 <- as_tibble(new1)
new1$review_date <- as.character(new1$review_date)
new2 <- new1 %>%
pivot_longer(cols = 2:13, names_to = "features", values_to = "mean_score")
view(new2)
basic <- ggplot(new2, aes(x=as.numeric(review_date), y=mean_score))+
geom_point() +
geom_smooth()+
labs(title = "Chocolate rating and Time",
subtitle = "In general, the average rating of chocolate decreased from 2006 to 2021",
caption = "Created by Ying Zhang")+
xlab("Year") + ylab("Average rating")
basic
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
worst <- master4 %>%
ggplot(aes(review_date, cocoa_percent))+
geom_point()
worst
### Why is this the worst? * I should have changed the intervals on the y-axis. Right now it looks really cluttered. * Years should be factors, but I didn’t change that, so the x-axis lacks some key variables. * I left the x-axis and y-axis labels as what ggplot2 defaulted. They are not informative at all. * I didn’t add a title to the graph, so readers will be lost looking at this graph. * The graph is monotonous. I should have use more color to make it look visually appealing. * This graph doesn’t tell a story. I should have use the
goem function to at least show some interesting trends coming out of the data. * The graph can be potentially misleading since I didn’t adjust the y-axis label. It started from 50%.
Anyways, lesson learned: do something about the graph instead of just doing the minimum and leaving it as what it is.
chocolate %>%
ggplot(aes(x = as.factor(review_date),
y = rating)) +
geom_violin(trim = FALSE) +
labs(title = "Chocolate ratings and year")+
theme_minimal() +
xlab("Year") + ylab("Rating") +
theme(legend.position="none") +
theme(plot.title = element_text(size=20, face="bold",
margin = margin(10, 0, 10, 0)))+
theme(axis.text.x = element_text(angle = 90)) +
stat_summary(fun = "mean", geom="point", size=2, color="red")
To make the graph look better, I did the following: * Added a title to give more information on the graph. I also made the title size bigger than usual. * I also corrected the x-axis and y-axis labels. This way folks will be able to know what the two axises are about. * I deleted the legend since the legend is not really useful. The information provided by the legend can be found from the x-axis. * I rotated the x-axis labels, so it’s clear to see the year intervals. * I got rid of the color and used the
theme_minimal() function. The graph looks less distracting and more visually appealing than before (I guess?) * I used trim = F, which didn’t necessarily make the graph look better, but did avoid data getting cut out. * I added those red dots that indicate the mean of the rating in that year. Readers will be able to see some trend with them.