#| include: false
library(readr)
library(ggplot2)
library(tidyverse)
wish_items <- read_csv('https://myxavier-my.sharepoint.com/:x:/g/personal/delffso_xavier_edu/EY7nT0JhnOFAsLS9nBs8gxkBm58cgbu0bkKJDd3RCmyMmQ?download=1')In Class Activity
What is the most common rating on an item?
On wish.com, you can rate something 1-5 stars. The data set contains the number of each star rating per item, so I will graph these values to see a distribution of each rating.
#Create a vector that records the total count of each rating level
rateVector <- c(sum(wish_items$rating_one_count, na.rm = TRUE),
sum(wish_items$rating_two_count, na.rm = TRUE),
sum(wish_items$rating_three_count, na.rm = TRUE),
sum(wish_items$rating_four_count,na.rm = TRUE),
sum(wish_items$rating_five_count, na.rm = TRUE))
#Create a vector that records a label for each rating
rateLabel <- as.factor(c("One Star", "Two Star", "Three Star",
"Four Star", "Five Star"))
#Create a data frame with both new vectors
rateFrame <- data.frame(rateLabel, rateVector)
#Put the ratings in order
rateLevels <- factor(c("One Star", "Two Star", "Three Star",
"Four Star", "Five Star"))
#Set the levels of each rating in correct order
levels(rateFrame$rateLabel) <- rateLevels
rateFrame %>% ggplot(aes(x = rateLabel, y = rateVector)) + geom_bar(stat =
"summary")+
labs(title = "Rating Distribution", x = "Level", y= "frequency")The bar for one star has the highest count which means it is the most common rate given to an item. This makes sense because wish.com sells very cheap items that are not usually the best quality.