Import data

# excel file
data <- read_excel("../01_module4/data/MyData.xlsx")
data
## # A tibble: 2,657 × 10
##      REF Compan…¹ Compa…² Revie…³ Count…⁴ Speci…⁵ Cocoa…⁶ Ingre…⁷ Most.…⁸ Rating
##    <dbl> <chr>    <chr>     <dbl> <chr>   <chr>     <dbl> <chr>   <chr>    <dbl>
##  1  2454 5150     U.S.A.     2019 Tanzan… Kokoa …    0.76 3- B,S… rich c…   3.25
##  2  2458 5150     U.S.A.     2019 Domini… Zorzal…    0.76 3- B,S… cocoa,…   3.5 
##  3  2454 5150     U.S.A.     2019 Madaga… Bejofo…    0.76 3- B,S… cocoa,…   3.75
##  4  2542 5150     U.S.A.     2021 Fiji    Matasa…    0.68 3- B,S… chewy,…   3   
##  5  2546 5150     U.S.A.     2021 Venezu… Sur de…    0.72 3- B,S… fatty,…   3   
##  6  2546 5150     U.S.A.     2021 Uganda  Semuli…    0.8  3- B,S… mildly…   3.25
##  7  2542 5150     U.S.A.     2021 India   Anamal…    0.68 3- B,S… milk b…   3.5 
##  8  2808 20N | 2… France     2022 Venezu… Chuao,…    0.78 2- B,S  sandy,…   2.75
##  9  2808 20N | 2… France     2022 Venezu… Chuao,…    0.78 2- B,S  sl. dr…   3   
## 10   797 A. Morin France     2012 Bolivia Bolivia    0.7  4- B,S… vegeta…   3.5 
## # … with 2,647 more rows, and abbreviated variable names ¹​Company.Manufacturer,
## #   ²​Company.Location, ³​Review.Date, ⁴​Country.of.Bean.Origin,
## #   ⁵​Specific.Bean.Origin.or.Bar.Name, ⁶​Cocoa.Percent, ⁷​Ingredients,
## #   ⁸​Most.Memorable.Characteristics

Apply the following dplyr verbs to your data

Filter rows

filter(data, Review.Date == 2013, Rating == 3)
## # A tibble: 27 × 10
##      REF Compan…¹ Compa…² Revie…³ Count…⁴ Speci…⁵ Cocoa…⁶ Ingre…⁷ Most.…⁸ Rating
##    <dbl> <chr>    <chr>     <dbl> <chr>   <chr>     <dbl> <chr>   <chr>    <dbl>
##  1  1011 A. Morin France     2013 Madaga… Madaga…    0.7  4- B,S… sticky…      3
##  2  1015 A. Morin France     2013 Burma   Birman…    0.7  4- B,S… sticky…      3
##  3  1125 Altus a… U.S.A.     2013 Domini… Conaca…    0.6  4- B,S… sandy,…      3
##  4  1133 Altus a… U.S.A.     2013 Bolivia Bolivi…    0.6  4- B,S… grit, …      3
##  5  1125 Brassto… U.S.A.     2013 Domini… Coopro…    0.72 5- B,S… oily, …      3
##  6  1149 Breeze … U.S.A.     2013 Jamaica Jamaica    0.7  4- B,S… chalky…      3
##  7  1101 Castron… U.S.A.     2013 Bolivia Bolivi…    0.7  3- B,S… sandy,…      3
##  8  1153 Castron… U.S.A.     2013 Venezu… Guania…    0.72 3- B,S… mild m…      3
##  9  1109 Domori   Italy      2013 Venezu… IL100,…    1    <NA>    smooth…      3
## 10  1185 Fruition U.S.A.     2013 Ecuador Camino…    0.75 3- B,S… sandy,…      3
## # … with 17 more rows, and abbreviated variable names ¹​Company.Manufacturer,
## #   ²​Company.Location, ³​Review.Date, ⁴​Country.of.Bean.Origin,
## #   ⁵​Specific.Bean.Origin.or.Bar.Name, ⁶​Cocoa.Percent, ⁷​Ingredients,
## #   ⁸​Most.Memorable.Characteristics

Arrange rows

arrange(data, desc(Review.Date), desc(Rating))
## # A tibble: 2,657 × 10
##      REF Compan…¹ Compa…² Revie…³ Count…⁴ Speci…⁵ Cocoa…⁶ Ingre…⁷ Most.…⁸ Rating
##    <dbl> <chr>    <chr>     <dbl> <chr>   <chr>     <dbl> <chr>   <chr>    <dbl>
##  1  2828 Aroko    Italy      2023 Venezu… Porcel…    0.72 3- B,S… strong…    3.5
##  2  2832 Aroko    Italy      2023 Venezu… Chuao      0.7  3- B,S… mild f…    3.5
##  3  2836 Ducrey   U.S.A.     2023 Peru    Piura,…    0.7  2- B,S  bold, …    3.5
##  4  2820 Kasama   Canada     2023 Ecuador Costa …    0.7  3- B,S… hay, f…    3.5
##  5  2820 Kasama   Canada     2023 Peru    Chunch…    0.75 3- B,S… pungen…    3.5
##  6  2836 Lydgate… U.S.A.     2023 U.S.A.  Wailua…    0.7  3- B,S… distin…    3.5
##  7  2824 McGuire  Canada     2023 Tanzan… Kokoa …    0.7  3- B,S… sl. Nu…    3.5
##  8  2820 Schoki   Canada     2023 Domini… Zorzal…    0.72 3- B,S… bold, …    3.5
##  9  2820 Schoki   Canada     2023 Tanzan… Kokoa …    0.72 3- B,S… rich, …    3.5
## 10  2816 Spinnak… U.S.A.     2023 Tanzan… Kokoa …    0.7  2- B,S  smooth…    3.5
## # … with 2,647 more rows, and abbreviated variable names ¹​Company.Manufacturer,
## #   ²​Company.Location, ³​Review.Date, ⁴​Country.of.Bean.Origin,
## #   ⁵​Specific.Bean.Origin.or.Bar.Name, ⁶​Cocoa.Percent, ⁷​Ingredients,
## #   ⁸​Most.Memorable.Characteristics

Select columns

select(data, Rating:Company.Location)
## # A tibble: 2,657 × 8
##    Rating Most.Memorable.Chara…¹ Ingre…² Cocoa…³ Speci…⁴ Count…⁵ Revie…⁶ Compa…⁷
##     <dbl> <chr>                  <chr>     <dbl> <chr>   <chr>     <dbl> <chr>  
##  1   3.25 rich cocoa, fatty, br… 3- B,S…    0.76 Kokoa … Tanzan…    2019 U.S.A. 
##  2   3.5  cocoa, vegetal, savory 3- B,S…    0.76 Zorzal… Domini…    2019 U.S.A. 
##  3   3.75 cocoa, blackberry, fu… 3- B,S…    0.76 Bejofo… Madaga…    2019 U.S.A. 
##  4   3    chewy, off, rubbery    3- B,S…    0.68 Matasa… Fiji       2021 U.S.A. 
##  5   3    fatty, earthy, moss, … 3- B,S…    0.72 Sur de… Venezu…    2021 U.S.A. 
##  6   3.25 mildly bitter, basic … 3- B,S…    0.8  Semuli… Uganda     2021 U.S.A. 
##  7   3.5  milk brownie, macadam… 3- B,S…    0.68 Anamal… India      2021 U.S.A. 
##  8   2.75 sandy, astringent, so… 2- B,S     0.78 Chuao,… Venezu…    2022 France 
##  9   3    sl. dry, fruit, cocoa… 2- B,S     0.78 Chuao,… Venezu…    2022 France 
## 10   3.5  vegetal, nutty         4- B,S…    0.7  Bolivia Bolivia    2012 France 
## # … with 2,647 more rows, and abbreviated variable names
## #   ¹​Most.Memorable.Characteristics, ²​Ingredients, ³​Cocoa.Percent,
## #   ⁴​Specific.Bean.Origin.or.Bar.Name, ⁵​Country.of.Bean.Origin, ⁶​Review.Date,
## #   ⁷​Company.Location

Add columns

mutate(data,
       gain = Cocoa.Percent - Rating)
## # A tibble: 2,657 × 11
##      REF Compan…¹ Compa…² Revie…³ Count…⁴ Speci…⁵ Cocoa…⁶ Ingre…⁷ Most.…⁸ Rating
##    <dbl> <chr>    <chr>     <dbl> <chr>   <chr>     <dbl> <chr>   <chr>    <dbl>
##  1  2454 5150     U.S.A.     2019 Tanzan… Kokoa …    0.76 3- B,S… rich c…   3.25
##  2  2458 5150     U.S.A.     2019 Domini… Zorzal…    0.76 3- B,S… cocoa,…   3.5 
##  3  2454 5150     U.S.A.     2019 Madaga… Bejofo…    0.76 3- B,S… cocoa,…   3.75
##  4  2542 5150     U.S.A.     2021 Fiji    Matasa…    0.68 3- B,S… chewy,…   3   
##  5  2546 5150     U.S.A.     2021 Venezu… Sur de…    0.72 3- B,S… fatty,…   3   
##  6  2546 5150     U.S.A.     2021 Uganda  Semuli…    0.8  3- B,S… mildly…   3.25
##  7  2542 5150     U.S.A.     2021 India   Anamal…    0.68 3- B,S… milk b…   3.5 
##  8  2808 20N | 2… France     2022 Venezu… Chuao,…    0.78 2- B,S  sandy,…   2.75
##  9  2808 20N | 2… France     2022 Venezu… Chuao,…    0.78 2- B,S  sl. dr…   3   
## 10   797 A. Morin France     2012 Bolivia Bolivia    0.7  4- B,S… vegeta…   3.5 
## # … with 2,647 more rows, 1 more variable: gain <dbl>, and abbreviated variable
## #   names ¹​Company.Manufacturer, ²​Company.Location, ³​Review.Date,
## #   ⁴​Country.of.Bean.Origin, ⁵​Specific.Bean.Origin.or.Bar.Name, ⁶​Cocoa.Percent,
## #   ⁷​Ingredients, ⁸​Most.Memorable.Characteristics

Summarize by groups

data %>%
    
    # Group by Company.Manufacturer
    group_by(Company.Manufacturer) %>%
    
    # Calculate average Company.Manufacturer with Review.Data
    summarise(Review.Date = mean(Review.Date, na.rm = TRUE)) %>%
    
    # Sort 
    arrange(Company.Manufacturer)
## # A tibble: 606 × 2
##    Company.Manufacturer          Review.Date
##    <chr>                               <dbl>
##  1 20N | 20S                           2022 
##  2 5150                                2020.
##  3 A. Morin                            2015.
##  4 AMMA                                2011.
##  5 Acalli                              2018.
##  6 Adi aka Fijiana (Easy In Ltd)       2011 
##  7 Aelan                               2019 
##  8 Aequare (Gianduja)                  2009 
##  9 Ah Cacao                            2009 
## 10 Akesson's (Pralus)                  2010.
## # … with 596 more rows