library(readr)
product_info <- read_csv("~/Downloads/product_info.csv.zip")

How large is our data set?

dim(product_info)
## [1] 8494   27

There are lots of Sephora Products we’re analyzing!

What are the column names?

names(product_info)
##  [1] "product_id"         "product_name"       "brand_id"          
##  [4] "brand_name"         "loves_count"        "rating"            
##  [7] "reviews"            "size"               "variation_type"    
## [10] "variation_value"    "variation_desc"     "ingredients"       
## [13] "price_usd"          "value_price_usd"    "sale_price_usd"    
## [16] "limited_edition"    "new"                "online_only"       
## [19] "out_of_stock"       "sephora_exclusive"  "highlights"        
## [22] "primary_category"   "secondary_category" "tertiary_category" 
## [25] "child_count"        "child_max_price"    "child_min_price"

What is the highest rating available?

max(product_info$rating, na.rm = TRUE)
## [1] 5

Which products have a rating of 5?

highratedprod = subset(product_info, rating == 5)
highratedprod
## # A tibble: 256 × 27
##    product_id product_name  brand_id brand_name loves_count rating reviews size 
##    <chr>      <chr>            <dbl> <chr>            <dbl>  <dbl>   <dbl> <chr>
##  1 P460438    Aperitivo In…     5847 Acqua di …         423      5       1 180 …
##  2 P480150    Liquid Gold …     6283 Alpha-H           3928      5      16 1.69…
##  3 P504779    Desert Date …     7103 Ami Colé          2858      5       4 0.19…
##  4 P501442    Good Vibes O…     6004 amika            29742      5      17 <NA> 
##  5 P500697    Coming Up Ro…     5746 Anastasia…        9864      5       4 <NA> 
##  6 P503247    Gioia Perfum…     1517 Armani Be…        2841      5       2 <NA> 
##  7 P503243    My Way Perfu…     1517 Armani Be…        2726      5       3 <NA> 
##  8 P503244    Sì Passione …     1517 Armani Be…        1700      5       2 <NA> 
##  9 P504018    The Skin Ren…     6356 Augustinu…        2331      5       2 <NA> 
## 10 P503552    GLOfacial Cl…     6272 BeautyBio         1715      5       8 1.7 …
## # ℹ 246 more rows
## # ℹ 19 more variables: variation_type <chr>, variation_value <chr>,
## #   variation_desc <chr>, ingredients <chr>, price_usd <dbl>,
## #   value_price_usd <dbl>, sale_price_usd <dbl>, limited_edition <dbl>,
## #   new <dbl>, online_only <dbl>, out_of_stock <dbl>, sephora_exclusive <dbl>,
## #   highlights <chr>, primary_category <chr>, secondary_category <chr>,
## #   tertiary_category <chr>, child_count <dbl>, child_max_price <dbl>, …

How many products have a rating of 5?

nrow(highratedprod)
## [1] 256

Of the products that have a rating of 5, what’s their average price?

mean(highratedprod$price_usd, na.rm=TRUE)
## [1] 61.46676

What is the lowest rating?

min(product_info$rating, na.rm = TRUE)
## [1] 1

Which products have lowest rating of 1?

lowestrated = subset(product_info, product_info$rating == 1)
lowestrated
## # A tibble: 12 × 27
##    product_id product_name  brand_id brand_name loves_count rating reviews size 
##    <chr>      <chr>            <dbl> <chr>            <dbl>  <dbl>   <dbl> <chr>
##  1 P479695    BLEU DE CHAN…     1065 CHANEL            1040      1       1 3.4 …
##  2 P502603    Beautiful Sk…     6236 Charlotte…        1327      1       1 <NA> 
##  3 P501952    Vivid Tones …     6503 Curlsmith          228      1       1 12 o…
##  4 P460622    25 Years of …     4372 Dr. Brand…         274      1       1 <NA> 
##  5 P476849    Platinum Lip…     6318 Dr. Lara …         839      1       2 0.17…
##  6 P505242    Mini Blossom…     6202 Jo Malone…         556      1       2 <NA> 
##  7 P500131    Hydro Ungrip…     6134 MILK MAKE…        5101      1       3 <NA> 
##  8 P500464    NuBODY - Bod…     6001 NuFACE            1312      1       2 <NA> 
##  9 P453277    Mini Grandio…     6159 Rossano F…         435      1       1 1.7 …
## 10 P502509    Cozy Vibes M…     3902 SEPHORA C…        4175      1       4 <NA> 
## 11 P500157    Smoothing Co…     5780 T3                 599      1       1 <NA> 
## 12 P455853    Mini Healthy…     6304 Virtue             888      1       2 1.8 …
## # ℹ 19 more variables: variation_type <chr>, variation_value <chr>,
## #   variation_desc <chr>, ingredients <chr>, price_usd <dbl>,
## #   value_price_usd <dbl>, sale_price_usd <dbl>, limited_edition <dbl>,
## #   new <dbl>, online_only <dbl>, out_of_stock <dbl>, sephora_exclusive <dbl>,
## #   highlights <chr>, primary_category <chr>, secondary_category <chr>,
## #   tertiary_category <chr>, child_count <dbl>, child_max_price <dbl>,
## #   child_min_price <dbl>

How many products have a rating of 1?

nrow(lowestrated)
## [1] 12

Only 12 items have a rating of 1!

What’s the correlation between USD price and rating?

cor(product_info$price_usd, product_info$rating, use = "complete.obs")
## [1] 0.05673784

This shows higher prices are not strongly correlated with higher ratings!

What’s the average price for all the products?

mean(product_info$price_usd, na.rm = TRUE)
## [1] 51.65559

What’s the median price for all items?

median(product_info$price_usd)
## [1] 35

What products are under 5$?

under5prods = subset(product_info, product_info$price_usd < 5)
under5prods
## # A tibble: 13 × 27
##    product_id product_name  brand_id brand_name loves_count rating reviews size 
##    <chr>      <chr>            <dbl> <chr>            <dbl>  <dbl>   <dbl> <chr>
##  1 P421291    Artist Color…     4028 MAKE UP F…       57439   4.73     263 <NA> 
##  2 P409800    Cleansing & …     3902 SEPHORA C…      266116   4.34    3837 10 W…
##  3 P446621    Organic Cott…     3902 SEPHORA C…       47223   4.49     267 50 P…
##  4 P460703    Organic Cott…     3902 SEPHORA C…       21081   4.46      93 200 …
##  5 P460856    Clean Eye Ma…     3902 SEPHORA C…       94157   4.17     371 1 Ma…
##  6 P467138    Beauty on th…     3902 SEPHORA C…       16473   4.7       60 <NA> 
##  7 P461522    Clean Charco…     3902 SEPHORA C…       25388   3.03     183 1 Ma…
##  8 P464804    Hand Sanitiz…     3902 SEPHORA C…        9852   4.15      33 1.0 …
##  9 P460724    Mini Clean C…     3902 SEPHORA C…        6569   3.20      51 10 W…
## 10 P483151    Vitamin Eye …     3902 SEPHORA C…       26055   3.91      34 1 Pa…
## 11 P482314    Mattifying B…     3902 SEPHORA C…        6314   2.87      15 <NA> 
## 12 P444237    Mini Cleansi…     3902 SEPHORA C…         265   4.3       10 10 W…
## 13 P504514    Bath Bomb Ba…     3902 SEPHORA C…        1194   2.11      19 1.7 …
## # ℹ 19 more variables: variation_type <chr>, variation_value <chr>,
## #   variation_desc <chr>, ingredients <chr>, price_usd <dbl>,
## #   value_price_usd <dbl>, sale_price_usd <dbl>, limited_edition <dbl>,
## #   new <dbl>, online_only <dbl>, out_of_stock <dbl>, sephora_exclusive <dbl>,
## #   highlights <chr>, primary_category <chr>, secondary_category <chr>,
## #   tertiary_category <chr>, child_count <dbl>, child_max_price <dbl>,
## #   child_min_price <dbl>
nrow(under5prods)
## [1] 13

What’s the average rating of these cheap products?

mean(under5prods$rating, na.rm=TRUE)
## [1] 3.881046

Average rating of these cheap products is 3.88 out of 5! Not bad. # Beauty on a Budget!