library(readr)
product_info <- read_csv("~/Downloads/product_info.csv.zip")
dim(product_info)
## [1] 8494 27
There are lots of Sephora Products we’re analyzing!
names(product_info)
## [1] "product_id" "product_name" "brand_id"
## [4] "brand_name" "loves_count" "rating"
## [7] "reviews" "size" "variation_type"
## [10] "variation_value" "variation_desc" "ingredients"
## [13] "price_usd" "value_price_usd" "sale_price_usd"
## [16] "limited_edition" "new" "online_only"
## [19] "out_of_stock" "sephora_exclusive" "highlights"
## [22] "primary_category" "secondary_category" "tertiary_category"
## [25] "child_count" "child_max_price" "child_min_price"
max(product_info$rating, na.rm = TRUE)
## [1] 5
highratedprod = subset(product_info, rating == 5)
highratedprod
## # A tibble: 256 × 27
## product_id product_name brand_id brand_name loves_count rating reviews size
## <chr> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <chr>
## 1 P460438 Aperitivo In… 5847 Acqua di … 423 5 1 180 …
## 2 P480150 Liquid Gold … 6283 Alpha-H 3928 5 16 1.69…
## 3 P504779 Desert Date … 7103 Ami Colé 2858 5 4 0.19…
## 4 P501442 Good Vibes O… 6004 amika 29742 5 17 <NA>
## 5 P500697 Coming Up Ro… 5746 Anastasia… 9864 5 4 <NA>
## 6 P503247 Gioia Perfum… 1517 Armani Be… 2841 5 2 <NA>
## 7 P503243 My Way Perfu… 1517 Armani Be… 2726 5 3 <NA>
## 8 P503244 Sì Passione … 1517 Armani Be… 1700 5 2 <NA>
## 9 P504018 The Skin Ren… 6356 Augustinu… 2331 5 2 <NA>
## 10 P503552 GLOfacial Cl… 6272 BeautyBio 1715 5 8 1.7 …
## # ℹ 246 more rows
## # ℹ 19 more variables: variation_type <chr>, variation_value <chr>,
## # variation_desc <chr>, ingredients <chr>, price_usd <dbl>,
## # value_price_usd <dbl>, sale_price_usd <dbl>, limited_edition <dbl>,
## # new <dbl>, online_only <dbl>, out_of_stock <dbl>, sephora_exclusive <dbl>,
## # highlights <chr>, primary_category <chr>, secondary_category <chr>,
## # tertiary_category <chr>, child_count <dbl>, child_max_price <dbl>, …
nrow(highratedprod)
## [1] 256
mean(highratedprod$price_usd, na.rm=TRUE)
## [1] 61.46676
min(product_info$rating, na.rm = TRUE)
## [1] 1
lowestrated = subset(product_info, product_info$rating == 1)
lowestrated
## # A tibble: 12 × 27
## product_id product_name brand_id brand_name loves_count rating reviews size
## <chr> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <chr>
## 1 P479695 BLEU DE CHAN… 1065 CHANEL 1040 1 1 3.4 …
## 2 P502603 Beautiful Sk… 6236 Charlotte… 1327 1 1 <NA>
## 3 P501952 Vivid Tones … 6503 Curlsmith 228 1 1 12 o…
## 4 P460622 25 Years of … 4372 Dr. Brand… 274 1 1 <NA>
## 5 P476849 Platinum Lip… 6318 Dr. Lara … 839 1 2 0.17…
## 6 P505242 Mini Blossom… 6202 Jo Malone… 556 1 2 <NA>
## 7 P500131 Hydro Ungrip… 6134 MILK MAKE… 5101 1 3 <NA>
## 8 P500464 NuBODY - Bod… 6001 NuFACE 1312 1 2 <NA>
## 9 P453277 Mini Grandio… 6159 Rossano F… 435 1 1 1.7 …
## 10 P502509 Cozy Vibes M… 3902 SEPHORA C… 4175 1 4 <NA>
## 11 P500157 Smoothing Co… 5780 T3 599 1 1 <NA>
## 12 P455853 Mini Healthy… 6304 Virtue 888 1 2 1.8 …
## # ℹ 19 more variables: variation_type <chr>, variation_value <chr>,
## # variation_desc <chr>, ingredients <chr>, price_usd <dbl>,
## # value_price_usd <dbl>, sale_price_usd <dbl>, limited_edition <dbl>,
## # new <dbl>, online_only <dbl>, out_of_stock <dbl>, sephora_exclusive <dbl>,
## # highlights <chr>, primary_category <chr>, secondary_category <chr>,
## # tertiary_category <chr>, child_count <dbl>, child_max_price <dbl>,
## # child_min_price <dbl>
nrow(lowestrated)
## [1] 12
Only 12 items have a rating of 1!
cor(product_info$price_usd, product_info$rating, use = "complete.obs")
## [1] 0.05673784
This shows higher prices are not strongly correlated with higher ratings!
mean(product_info$price_usd, na.rm = TRUE)
## [1] 51.65559
median(product_info$price_usd)
## [1] 35
under5prods = subset(product_info, product_info$price_usd < 5)
under5prods
## # A tibble: 13 × 27
## product_id product_name brand_id brand_name loves_count rating reviews size
## <chr> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <chr>
## 1 P421291 Artist Color… 4028 MAKE UP F… 57439 4.73 263 <NA>
## 2 P409800 Cleansing & … 3902 SEPHORA C… 266116 4.34 3837 10 W…
## 3 P446621 Organic Cott… 3902 SEPHORA C… 47223 4.49 267 50 P…
## 4 P460703 Organic Cott… 3902 SEPHORA C… 21081 4.46 93 200 …
## 5 P460856 Clean Eye Ma… 3902 SEPHORA C… 94157 4.17 371 1 Ma…
## 6 P467138 Beauty on th… 3902 SEPHORA C… 16473 4.7 60 <NA>
## 7 P461522 Clean Charco… 3902 SEPHORA C… 25388 3.03 183 1 Ma…
## 8 P464804 Hand Sanitiz… 3902 SEPHORA C… 9852 4.15 33 1.0 …
## 9 P460724 Mini Clean C… 3902 SEPHORA C… 6569 3.20 51 10 W…
## 10 P483151 Vitamin Eye … 3902 SEPHORA C… 26055 3.91 34 1 Pa…
## 11 P482314 Mattifying B… 3902 SEPHORA C… 6314 2.87 15 <NA>
## 12 P444237 Mini Cleansi… 3902 SEPHORA C… 265 4.3 10 10 W…
## 13 P504514 Bath Bomb Ba… 3902 SEPHORA C… 1194 2.11 19 1.7 …
## # ℹ 19 more variables: variation_type <chr>, variation_value <chr>,
## # variation_desc <chr>, ingredients <chr>, price_usd <dbl>,
## # value_price_usd <dbl>, sale_price_usd <dbl>, limited_edition <dbl>,
## # new <dbl>, online_only <dbl>, out_of_stock <dbl>, sephora_exclusive <dbl>,
## # highlights <chr>, primary_category <chr>, secondary_category <chr>,
## # tertiary_category <chr>, child_count <dbl>, child_max_price <dbl>,
## # child_min_price <dbl>
nrow(under5prods)
## [1] 13
mean(under5prods$rating, na.rm=TRUE)
## [1] 3.881046
Average rating of these cheap products is 3.88 out of 5! Not bad. # Beauty on a Budget!