Chocolate Dataset

library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.4     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.1     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(tinytex)
library(viridis)

## Loading required package: viridisLite

setwd("C:/Users/gru_e/OneDrive/Desktop/DATA110/R Projects")
chocolate <- read_csv("chocolate.csv")

## New names:
## * `` -> ...1

## Rows: 2224 Columns: 21

## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (15): company, company_location, country_of_bean_origin, specific_bean_o...
## dbl  (6): ...1, ref, review_date, cocoa_percent, rating, counts_of_ingredients

## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

chocolate

## # A tibble: 2,224 x 21
##     ...1   ref company  company_location review_date country_of_bean_origin
##    <dbl> <dbl> <chr>    <chr>                  <dbl> <chr>                 
##  1     0  2454 5150     U.S.A                   2019 Madagascar            
##  2     1  2458 5150     U.S.A                   2019 Dominican republic    
##  3     2  2454 5150     U.S.A                   2019 Tanzania              
##  4     3   797 A. Morin France                  2012 Peru                  
##  5     4   797 A. Morin France                  2012 Bolivia               
##  6     5  1015 A. Morin France                  2013 Venezuela             
##  7     6  1019 A. Morin France                  2013 Peru                  
##  8     7  1011 A. Morin France                  2013 Ecuador               
##  9     8  1019 A. Morin France                  2013 Peru                  
## 10     9  1011 A. Morin France                  2013 Brazil                
## # ... with 2,214 more rows, and 15 more variables:
## #   specific_bean_origin_or_bar_name <chr>, cocoa_percent <dbl>, rating <dbl>,
## #   counts_of_ingredients <dbl>, beans <chr>, cocoa_butter <chr>,
## #   vanilla <chr>, lecithin <chr>, salt <chr>, sugar <chr>,
## #   sweetener_without_sugar <chr>, first_taste <chr>, second_taste <chr>,
## #   third_taste <chr>, fourth_taste <chr>

Clean up the data:

names(chocolate) <- tolower(names(chocolate))
names(chocolate) <- gsub(" ","",names(chocolate))
str(chocolate)

## spec_tbl_df [2,224 x 21] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ ...1                            : num [1:2224] 0 1 2 3 4 5 6 7 8 9 ...
##  $ ref                             : num [1:2224] 2454 2458 2454 797 797 ...
##  $ company                         : chr [1:2224] "5150" "5150" "5150" "A. Morin" ...
##  $ company_location                : chr [1:2224] "U.S.A" "U.S.A" "U.S.A" "France" ...
##  $ review_date                     : num [1:2224] 2019 2019 2019 2012 2012 ...
##  $ country_of_bean_origin          : chr [1:2224] "Madagascar" "Dominican republic" "Tanzania" "Peru" ...
##  $ specific_bean_origin_or_bar_name: chr [1:2224] "Bejofo Estate, batch 1" "Zorzal, batch 1" "Kokoa Kamili, batch 1" "Peru" ...
##  $ cocoa_percent                   : num [1:2224] 76 76 76 63 70 70 63 70 70 70 ...
##  $ rating                          : num [1:2224] 3.75 3.5 3.25 3.75 3.5 4 4 3.75 3.5 3.25 ...
##  $ counts_of_ingredients           : num [1:2224] 3 3 3 4 4 4 3 4 4 4 ...
##  $ beans                           : chr [1:2224] "have_bean" "have_bean" "have_bean" "have_bean" ...
##  $ cocoa_butter                    : chr [1:2224] "have_cocoa_butter" "have_cocoa_butter" "have_cocoa_butter" "have_cocoa_butter" ...
##  $ vanilla                         : chr [1:2224] "have_not_vanila" "have_not_vanila" "have_not_vanila" "have_not_vanila" ...
##  $ lecithin                        : chr [1:2224] "have_not_lecithin" "have_not_lecithin" "have_not_lecithin" "have_lecithin" ...
##  $ salt                            : chr [1:2224] "have_not_salt" "have_not_salt" "have_not_salt" "have_not_salt" ...
##  $ sugar                           : chr [1:2224] "have_sugar" "have_sugar" "have_sugar" "have_sugar" ...
##  $ sweetener_without_sugar         : chr [1:2224] "have_not_sweetener_without_sugar" "have_not_sweetener_without_sugar" "have_not_sweetener_without_sugar" "have_not_sweetener_without_sugar" ...
##  $ first_taste                     : chr [1:2224] "cocoa" "cocoa" "rich cocoa" "fruity" ...
##  $ second_taste                    : chr [1:2224] "blackberry" "vegetal" "fatty" "melon" ...
##  $ third_taste                     : chr [1:2224] "full body" "savory" "bready" "roasty" ...
##  $ fourth_taste                    : chr [1:2224] NA NA NA NA ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   ...1 = col_double(),
##   ..   ref = col_double(),
##   ..   company = col_character(),
##   ..   company_location = col_character(),
##   ..   review_date = col_double(),
##   ..   country_of_bean_origin = col_character(),
##   ..   specific_bean_origin_or_bar_name = col_character(),
##   ..   cocoa_percent = col_double(),
##   ..   rating = col_double(),
##   ..   counts_of_ingredients = col_double(),
##   ..   beans = col_character(),
##   ..   cocoa_butter = col_character(),
##   ..   vanilla = col_character(),
##   ..   lecithin = col_character(),
##   ..   salt = col_character(),
##   ..   sugar = col_character(),
##   ..   sweetener_without_sugar = col_character(),
##   ..   first_taste = col_character(),
##   ..   second_taste = col_character(),
##   ..   third_taste = col_character(),
##   ..   fourth_taste = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>

Chocolate Dataset

Kanitta Srichan Elder

10/10/2021

Clean up the data: