Project 37

#Questions #Which authors are most successful: who is most prolific, who has the highest average ratings or popularity, and do top authors specialize by cuisine, ingredient, or recipe length? #Is there a relationship between prep/cook time and average rating? #Which recipe categories or cuisines tend to have the highest average ratings and review counts? #Which recipes are the most “actionable” — high rating with low total time?

#Install data

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(tidytuesdayR)
tuesdata <- tidytuesdayR::tt_load(2025, week = 37)

## ---- Compiling #TidyTuesday Information for 2025-09-16 ----
## --- There are 2 files available ---
## 
## 
## ── Downloading files ───────────────────────────────────────────────────────────
## 
##   1 of 2: "all_recipes.csv"
##   2 of 2: "cuisines.csv"

#two data sets available; using cuisines. 
cuisines <- tuesdata$cuisines

#To understand the data set

view(cuisines)

#Who is most prolific?

cuisines |> 
  count(author, sort = TRUE)

## # A tibble: 1,635 × 2
##    author                 n
##    <chr>              <int>
##  1 John Mitzewich       130
##  2 Allrecipes Member     35
##  3 Nicole McLaughlin     22
##  4 lola                  20
##  5 Diana Moutsopoulos    19
##  6 Brenda Venable        17
##  7 Allrecipes            16
##  8 Buckwheat Queen       12
##  9 Soup Loving Nicole    12
## 10 Pat Bernitt           10
## # ℹ 1,625 more rows

#Who has the highest average ratings or popularity?

cuisines |> 
  group_by(author) |> 
  summarise(review = mean(avg_rating)) |> 
  arrange(desc(review))

## # A tibble: 1,635 × 2
##    author               review
##    <chr>                 <dbl>
##  1 2CHAE                     5
##  2 Andrew Currin-Chodur      5
##  3 Andrew Holness            5
##  4 Aron Bartram              5
##  5 Ashley B                  5
##  6 Avon- status quo PRO      5
##  7 Barbara Kahian            5
##  8 Bevin                     5
##  9 BigDaddy                  5
## 10 Boitumelo                 5
## # ℹ 1,625 more rows

#Which recipe has highest reviews? #Which recipe categories or cuisines tend to have the highest average ratings and review counts?

cuisines |> 
  arrange(desc(reviews)) |> 
  relocate(reviews, .after = author)

## # A tibble: 2,218 × 17
##    name   country url   author reviews date_published ingredients calories   fat
##    <chr>  <chr>   <chr> <chr>    <dbl> <date>         <chr>          <dbl> <dbl>
##  1 Garli… Soul F… http… TANAQ…     975 2022-11-10     2 teaspoon…      391    11
##  2 Easy … Indian  http… mn         967 2024-05-06     1 cup butt…      880    82
##  3 Old C… Southe… http… bersk…     965 2024-11-14     3 cups wat…      618    44
##  4 Russi… Russian http… DTERE…     963 2024-11-09     5 tablespo…      167     8
##  5 Best … Cajun … http… Terri      950 2025-02-23     2 tablespo…      465    20
##  6 Real … Greek   http… ROYHO…     932 2025-01-28     2 cloves g…       54     3
##  7 South… Southe… http… QUEEN…     928 2024-10-11     2 cups shr…      208    20
##  8 Sweet… Chinese http… PAM_1      915 2024-11-12     1 pound po…      663    35
##  9 Black… Cajun … http… JEFF …     893 2025-03-05     2 tablespo…      511    38
## 10 Cajun… Cajun … http… Star …     890 2025-02-03     1 pound dr…      695    37
## # ℹ 2,208 more rows
## # ℹ 8 more variables: carbs <dbl>, protein <dbl>, avg_rating <dbl>,
## #   total_ratings <dbl>, prep_time <dbl>, cook_time <dbl>, total_time <dbl>,
## #   servings <dbl>

#Do top authors specialize by cuisine?

cuisines |> 
  distinct(author,country)

## # A tibble: 1,921 × 2
##    author             country                     
##    <chr>              <chr>                       
##  1 John Mitzewich     Greek                       
##  2 John Mitzewich     Jewish                      
##  3 CHIPPENDALE        Australian and New Zealander
##  4 Heidi              Chilean                     
##  5 Ann                Tex-Mex                     
##  6 MomWhoCooks        Canadian                    
##  7 Buckwheat Queen    Italian                     
##  8 TheOtherJuliaGulia Danish                      
##  9 Laura Sandahl      Amish and Mennonite         
## 10 Luis Luna          Spanish                     
## # ℹ 1,911 more rows

#Is there a relationship between prep/cook time and average rating?

cuisines |>
  ggplot(aes(x = avg_rating, y = total_time))+
  geom_point()+
  labs(x = "avg_rating", y = "total_time")

## Warning: Removed 97 rows containing missing values or values outside the scale range
## (`geom_point()`).

#Which recipes are the most “actionable” — high rating with low total time?

cuisines |> 
  arrange(desc(avg_rating), total_time) |> 
  relocate(avg_rating, .after = author) |> 
  relocate(total_time, .after = avg_rating) |> 
  head()

## # A tibble: 6 × 17
##   name     country url   author avg_rating total_time date_published ingredients
##   <chr>    <chr>   <chr> <chr>       <dbl>      <dbl> <date>         <chr>      
## 1 Kaisers… Austri… http… John …          5          0 2023-04-24     2 large eg…
## 2 Vietnam… Vietna… http… Yoly            5          5 2021-02-09     1 large eg…
## 3 Coconut… Brazil… http… Nicol…          5          5 2024-06-13     2 limes, 2…
## 4 Italian… Italian http… Nicol…          5          5 2025-07-26     2 fresh ch…
## 5 Lebanes… Lebane… http… BigDa…          5          5 2018-11-30     1 tablespo…
## 6 Easy Mo… Cuban   http… Jenni…          5          5 2025-05-12     12 leaves …
## # ℹ 9 more variables: calories <dbl>, fat <dbl>, carbs <dbl>, protein <dbl>,
## #   total_ratings <dbl>, reviews <dbl>, prep_time <dbl>, cook_time <dbl>,
## #   servings <dbl>

#Not satisfied with this work. Need to improve the graph. And I might be missing other ways of looking at the data.

Project 37

AlearnsR

2025-09-22