Import your data
data <- read_excel("../01_module4/data/MyData.xlsx")
data
## # A tibble: 2,657 × 10
## REF Compan…¹ Compa…² Revie…³ Count…⁴ Speci…⁵ Cocoa…⁶ Ingre…⁷ Most.…⁸ Rating
## <dbl> <chr> <chr> <dbl> <chr> <chr> <dbl> <chr> <chr> <dbl>
## 1 2454 5150 U.S.A. 2019 Tanzan… Kokoa … 0.76 3- B,S… rich c… 3.25
## 2 2458 5150 U.S.A. 2019 Domini… Zorzal… 0.76 3- B,S… cocoa,… 3.5
## 3 2454 5150 U.S.A. 2019 Madaga… Bejofo… 0.76 3- B,S… cocoa,… 3.75
## 4 2542 5150 U.S.A. 2021 Fiji Matasa… 0.68 3- B,S… chewy,… 3
## 5 2546 5150 U.S.A. 2021 Venezu… Sur de… 0.72 3- B,S… fatty,… 3
## 6 2546 5150 U.S.A. 2021 Uganda Semuli… 0.8 3- B,S… mildly… 3.25
## 7 2542 5150 U.S.A. 2021 India Anamal… 0.68 3- B,S… milk b… 3.5
## 8 2808 20N | 2… France 2022 Venezu… Chuao,… 0.78 2- B,S sandy,… 2.75
## 9 2808 20N | 2… France 2022 Venezu… Chuao,… 0.78 2- B,S sl. dr… 3
## 10 797 A. Morin France 2012 Bolivia Bolivia 0.7 4- B,S… vegeta… 3.5
## # … with 2,647 more rows, and abbreviated variable names ¹Company.Manufacturer,
## # ²Company.Location, ³Review.Date, ⁴Country.of.Bean.Origin,
## # ⁵Specific.Bean.Origin.or.Bar.Name, ⁶Cocoa.Percent, ⁷Ingredients,
## # ⁸Most.Memorable.Characteristics
data_small <- data %>%
sample_n(10) %>%
select(Rating,Cocoa.Percent, Review.Date)
data_small
## # A tibble: 10 × 3
## Rating Cocoa.Percent Review.Date
## <dbl> <dbl> <dbl>
## 1 3.25 0.7 2018
## 2 3 0.7 2016
## 3 3.75 0.76 2014
## 4 3 0.72 2021
## 5 3.5 0.7 2012
## 6 2 0.85 2006
## 7 2.75 0.74 2022
## 8 3 0.8 2015
## 9 4 0.72 2022
## 10 3 0.72 2020
Separating and Uniting
Separate a column
data_small %>%
separate(Review.Date, into = c("century", "year"), sep = 2)
## # A tibble: 10 × 4
## Rating Cocoa.Percent century year
## <dbl> <dbl> <chr> <chr>
## 1 3.25 0.7 20 18
## 2 3 0.7 20 16
## 3 3.75 0.76 20 14
## 4 3 0.72 20 21
## 5 3.5 0.7 20 12
## 6 2 0.85 20 06
## 7 2.75 0.74 20 22
## 8 3 0.8 20 15
## 9 4 0.72 20 22
## 10 3 0.72 20 20
Unite two columns
data %>%
unite(col = "Company.Location_Manufacturer", c(Company.Location, Company.Manufacturer), sep = "/")
## # A tibble: 2,657 × 9
## REF Company.Locati…¹ Revie…² Count…³ Speci…⁴ Cocoa…⁵ Ingre…⁶ Most.…⁷ Rating
## <dbl> <chr> <dbl> <chr> <chr> <dbl> <chr> <chr> <dbl>
## 1 2454 U.S.A./5150 2019 Tanzan… Kokoa … 0.76 3- B,S… rich c… 3.25
## 2 2458 U.S.A./5150 2019 Domini… Zorzal… 0.76 3- B,S… cocoa,… 3.5
## 3 2454 U.S.A./5150 2019 Madaga… Bejofo… 0.76 3- B,S… cocoa,… 3.75
## 4 2542 U.S.A./5150 2021 Fiji Matasa… 0.68 3- B,S… chewy,… 3
## 5 2546 U.S.A./5150 2021 Venezu… Sur de… 0.72 3- B,S… fatty,… 3
## 6 2546 U.S.A./5150 2021 Uganda Semuli… 0.8 3- B,S… mildly… 3.25
## 7 2542 U.S.A./5150 2021 India Anamal… 0.68 3- B,S… milk b… 3.5
## 8 2808 France/20N | 20S 2022 Venezu… Chuao,… 0.78 2- B,S sandy,… 2.75
## 9 2808 France/20N | 20S 2022 Venezu… Chuao,… 0.78 2- B,S sl. dr… 3
## 10 797 France/A. Morin 2012 Bolivia Bolivia 0.7 4- B,S… vegeta… 3.5
## # … with 2,647 more rows, and abbreviated variable names
## # ¹Company.Location_Manufacturer, ²Review.Date, ³Country.of.Bean.Origin,
## # ⁴Specific.Bean.Origin.or.Bar.Name, ⁵Cocoa.Percent, ⁶Ingredients,
## # ⁷Most.Memorable.Characteristics
Missing Values
data_small %>%
complete(Rating, Review.Date)
## # A tibble: 63 × 3
## Rating Review.Date Cocoa.Percent
## <dbl> <dbl> <dbl>
## 1 2 2006 0.85
## 2 2 2012 NA
## 3 2 2014 NA
## 4 2 2015 NA
## 5 2 2016 NA
## 6 2 2018 NA
## 7 2 2020 NA
## 8 2 2021 NA
## 9 2 2022 NA
## 10 2.75 2006 NA
## # … with 53 more rows