2026-05-22

Purpose of Analysis

The purpose of this analysis is to complete the Peer Graded Assignment from Developing Data Products by John Hopkins University on Coursera.

Ramen Data

In this presentation, we’ll be covering Ramen Reviews, taken from the data set by the Big List Reviews (https://www.theramenrater.com/resources-2/the-list/).

From this data, we’ll be looking at plotly graphs on Ramen Star Ratings to observe:

  • Countries and their instant ramen ratings

  • Packaging styles and their instant ramen ratings

#download the file
url <- "https://www.theramenrater.com/wp-content/uploads/2025/11/11212025The-Ramen-Rater.xlsx"
download.file(url, destfile = "RamenRater.xlsx", mode = "wb")

#read the file
suppressPackageStartupMessages(library(readxl))
ramendata <- read_excel("RamenRater.xlsx", sheet = 1)

Cleaning the data

#filtering corrupted data
# keep only numbers at the beginning
ramendata$Stars <- sub("^([0-9.]+)*", "\\1", ramendata$Stars)

#deleting T column
ramendata <- ramendata[, -7]

Cleaning the data

#tidying Country column
#correcting typos
ramendata$Country <- replace(ramendata$Country, ramendata$Country == "THailand", "Thailand")
ramendata$Country <- replace(ramendata$Country, ramendata$Country == "Souh Korea", "South Korea")
ramendata$Country <- replace(ramendata$Country, ramendata$Country == "UK", "United Kingdom")
ramendata$Country <- replace(ramendata$Country, ramendata$Country == "Sarawak", "Malaysia")
ramendata$Country <- replace(ramendata$Country, ramendata$Country == "Phlippines", "Philippines")
ramendata$Country <- replace(ramendata$Country, ramendata$Country == "Phillippines", "Philippines")
ramendata$Country <- replace(ramendata$Country, ramendata$Country == "Russian Federation", "Russia")
ramendata$Country <- replace(ramendata$Country, ramendata$Country == "Yaiwan", "Taiwan")
ramendata$Country <- replace(ramendata$Country, ramendata$Country == "USA", "United States")

ramendata$Style <- replace(ramendata$Style, ramendata$Style == "Boowl", "Bowl")
ramendata$Style <- replace(ramendata$Style, ramendata$Style == "Trray", "Tray")

Cleaning the data

# remove text entries
ramendata <- ramendata[
  !(ramendata$Stars %in% c("Not reviewed", "NR", "NS", "Unrated", "Detail of the lid (click to enlarge).", "-")),]

# keep only the first number before "/"
ramendata$Stars <- sub("/.*", "", ramendata$Stars)

# convert to numeric
ramendata$Stars <- as.numeric(ramendata$Stars)

# remove impossible values
ramendata <- ramendata[ramendata$Stars >= 0 & ramendata$Stars <= 5,]

Cleaning the data

#looking at country
suppressPackageStartupMessages(library(dplyr))

country_summary <- ramendata %>%
  group_by(Country) %>%
  summarise(total_reviews = n(), average_stars = mean(Stars, na.rm = TRUE), total_stars = sum(Stars, na.rm = TRUE))

country_summary %>% arrange(desc(average_stars))
country_summary <- na.omit(country_summary)
head(country_summary)

Countries and instant ramen ratings

library(plotly)
#based on country
 p <- plot_ly(country_summary, x = ~total_reviews, y = ~average_stars, type = "scatter",
  mode = "markers", color = ~as.factor(Country), size = ~total_reviews,
  text = ~paste(
    "Country:", Country,
    "<br>Reviews:", total_reviews,
    "<br>Average Stars:", round(average_stars,2)
  ),
  hoverinfo = "text"
)

p

Countries and instant ramen ratings

Packaging style and instant ramen ratings

#based on packaging style
q <- plot_ly(
  ramendata,
  y = ~Stars,
  color = ~Style,
  type = "violin",
  box = list(visible = TRUE),
  meanline = list(visible = TRUE)
)

q

Packaging style and instant ramen ratings