The purpose of this analysis is to complete the Peer Graded Assignment from Developing Data Products by John Hopkins University on Coursera.
2026-05-22
The purpose of this analysis is to complete the Peer Graded Assignment from Developing Data Products by John Hopkins University on Coursera.
In this presentation, we’ll be covering Ramen Reviews, taken from the data set by the Big List Reviews (https://www.theramenrater.com/resources-2/the-list/).
From this data, we’ll be looking at plotly graphs on Ramen Star Ratings to observe:
Countries and their instant ramen ratings
Packaging styles and their instant ramen ratings
#download the file
url <- "https://www.theramenrater.com/wp-content/uploads/2025/11/11212025The-Ramen-Rater.xlsx"
download.file(url, destfile = "RamenRater.xlsx", mode = "wb")
#read the file
suppressPackageStartupMessages(library(readxl))
ramendata <- read_excel("RamenRater.xlsx", sheet = 1)
#filtering corrupted data
# keep only numbers at the beginning
ramendata$Stars <- sub("^([0-9.]+)*", "\\1", ramendata$Stars)
#deleting T column
ramendata <- ramendata[, -7]
#tidying Country column #correcting typos ramendata$Country <- replace(ramendata$Country, ramendata$Country == "THailand", "Thailand") ramendata$Country <- replace(ramendata$Country, ramendata$Country == "Souh Korea", "South Korea") ramendata$Country <- replace(ramendata$Country, ramendata$Country == "UK", "United Kingdom") ramendata$Country <- replace(ramendata$Country, ramendata$Country == "Sarawak", "Malaysia") ramendata$Country <- replace(ramendata$Country, ramendata$Country == "Phlippines", "Philippines") ramendata$Country <- replace(ramendata$Country, ramendata$Country == "Phillippines", "Philippines") ramendata$Country <- replace(ramendata$Country, ramendata$Country == "Russian Federation", "Russia") ramendata$Country <- replace(ramendata$Country, ramendata$Country == "Yaiwan", "Taiwan") ramendata$Country <- replace(ramendata$Country, ramendata$Country == "USA", "United States") ramendata$Style <- replace(ramendata$Style, ramendata$Style == "Boowl", "Bowl") ramendata$Style <- replace(ramendata$Style, ramendata$Style == "Trray", "Tray")
# remove text entries
ramendata <- ramendata[
!(ramendata$Stars %in% c("Not reviewed", "NR", "NS", "Unrated", "Detail of the lid (click to enlarge).", "-")),]
# keep only the first number before "/"
ramendata$Stars <- sub("/.*", "", ramendata$Stars)
# convert to numeric
ramendata$Stars <- as.numeric(ramendata$Stars)
# remove impossible values
ramendata <- ramendata[ramendata$Stars >= 0 & ramendata$Stars <= 5,]
#looking at country suppressPackageStartupMessages(library(dplyr)) country_summary <- ramendata %>% group_by(Country) %>% summarise(total_reviews = n(), average_stars = mean(Stars, na.rm = TRUE), total_stars = sum(Stars, na.rm = TRUE)) country_summary %>% arrange(desc(average_stars)) country_summary <- na.omit(country_summary) head(country_summary)
library(plotly)
#based on country
p <- plot_ly(country_summary, x = ~total_reviews, y = ~average_stars, type = "scatter",
mode = "markers", color = ~as.factor(Country), size = ~total_reviews,
text = ~paste(
"Country:", Country,
"<br>Reviews:", total_reviews,
"<br>Average Stars:", round(average_stars,2)
),
hoverinfo = "text"
)
p
#based on packaging style q <- plot_ly( ramendata, y = ~Stars, color = ~Style, type = "violin", box = list(visible = TRUE), meanline = list(visible = TRUE) ) q