library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.4 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(tinytex)
library(viridis)
## Loading required package: viridisLite
setwd("C:/Users/gru_e/OneDrive/Desktop/DATA110/R Projects")
chocolate <- read_csv("chocolate.csv")
## New names:
## * `` -> ...1
## Rows: 2224 Columns: 21
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (15): company, company_location, country_of_bean_origin, specific_bean_o...
## dbl (6): ...1, ref, review_date, cocoa_percent, rating, counts_of_ingredients
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
chocolate
## # A tibble: 2,224 x 21
## ...1 ref company company_location review_date country_of_bean_origin
## <dbl> <dbl> <chr> <chr> <dbl> <chr>
## 1 0 2454 5150 U.S.A 2019 Madagascar
## 2 1 2458 5150 U.S.A 2019 Dominican republic
## 3 2 2454 5150 U.S.A 2019 Tanzania
## 4 3 797 A. Morin France 2012 Peru
## 5 4 797 A. Morin France 2012 Bolivia
## 6 5 1015 A. Morin France 2013 Venezuela
## 7 6 1019 A. Morin France 2013 Peru
## 8 7 1011 A. Morin France 2013 Ecuador
## 9 8 1019 A. Morin France 2013 Peru
## 10 9 1011 A. Morin France 2013 Brazil
## # ... with 2,214 more rows, and 15 more variables:
## # specific_bean_origin_or_bar_name <chr>, cocoa_percent <dbl>, rating <dbl>,
## # counts_of_ingredients <dbl>, beans <chr>, cocoa_butter <chr>,
## # vanilla <chr>, lecithin <chr>, salt <chr>, sugar <chr>,
## # sweetener_without_sugar <chr>, first_taste <chr>, second_taste <chr>,
## # third_taste <chr>, fourth_taste <chr>
Clean up the data:
names(chocolate) <- tolower(names(chocolate))
names(chocolate) <- gsub(" ","",names(chocolate))
str(chocolate)
## spec_tbl_df [2,224 x 21] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ ...1 : num [1:2224] 0 1 2 3 4 5 6 7 8 9 ...
## $ ref : num [1:2224] 2454 2458 2454 797 797 ...
## $ company : chr [1:2224] "5150" "5150" "5150" "A. Morin" ...
## $ company_location : chr [1:2224] "U.S.A" "U.S.A" "U.S.A" "France" ...
## $ review_date : num [1:2224] 2019 2019 2019 2012 2012 ...
## $ country_of_bean_origin : chr [1:2224] "Madagascar" "Dominican republic" "Tanzania" "Peru" ...
## $ specific_bean_origin_or_bar_name: chr [1:2224] "Bejofo Estate, batch 1" "Zorzal, batch 1" "Kokoa Kamili, batch 1" "Peru" ...
## $ cocoa_percent : num [1:2224] 76 76 76 63 70 70 63 70 70 70 ...
## $ rating : num [1:2224] 3.75 3.5 3.25 3.75 3.5 4 4 3.75 3.5 3.25 ...
## $ counts_of_ingredients : num [1:2224] 3 3 3 4 4 4 3 4 4 4 ...
## $ beans : chr [1:2224] "have_bean" "have_bean" "have_bean" "have_bean" ...
## $ cocoa_butter : chr [1:2224] "have_cocoa_butter" "have_cocoa_butter" "have_cocoa_butter" "have_cocoa_butter" ...
## $ vanilla : chr [1:2224] "have_not_vanila" "have_not_vanila" "have_not_vanila" "have_not_vanila" ...
## $ lecithin : chr [1:2224] "have_not_lecithin" "have_not_lecithin" "have_not_lecithin" "have_lecithin" ...
## $ salt : chr [1:2224] "have_not_salt" "have_not_salt" "have_not_salt" "have_not_salt" ...
## $ sugar : chr [1:2224] "have_sugar" "have_sugar" "have_sugar" "have_sugar" ...
## $ sweetener_without_sugar : chr [1:2224] "have_not_sweetener_without_sugar" "have_not_sweetener_without_sugar" "have_not_sweetener_without_sugar" "have_not_sweetener_without_sugar" ...
## $ first_taste : chr [1:2224] "cocoa" "cocoa" "rich cocoa" "fruity" ...
## $ second_taste : chr [1:2224] "blackberry" "vegetal" "fatty" "melon" ...
## $ third_taste : chr [1:2224] "full body" "savory" "bready" "roasty" ...
## $ fourth_taste : chr [1:2224] NA NA NA NA ...
## - attr(*, "spec")=
## .. cols(
## .. ...1 = col_double(),
## .. ref = col_double(),
## .. company = col_character(),
## .. company_location = col_character(),
## .. review_date = col_double(),
## .. country_of_bean_origin = col_character(),
## .. specific_bean_origin_or_bar_name = col_character(),
## .. cocoa_percent = col_double(),
## .. rating = col_double(),
## .. counts_of_ingredients = col_double(),
## .. beans = col_character(),
## .. cocoa_butter = col_character(),
## .. vanilla = col_character(),
## .. lecithin = col_character(),
## .. salt = col_character(),
## .. sugar = col_character(),
## .. sweetener_without_sugar = col_character(),
## .. first_taste = col_character(),
## .. second_taste = col_character(),
## .. third_taste = col_character(),
## .. fourth_taste = col_character()
## .. )
## - attr(*, "problems")=<externalptr>