Load packages and dataset.

library(tidyverse)

transactionLines <- read_csv("Online_Retail.csv", locale = locale(encoding = "Windows-1252")) |>  #locate used to deal with British pound symbol found in some descripitons
  mutate(
    InvoiceDate = mdy_hm(InvoiceDate),     # parses str datetime to datetime obj
    Month = month(InvoiceDate),
    Hour = hour(InvoiceDate),
    DayOfWeek = wday(InvoiceDate, label = TRUE),
    TransactionValue = Quantity * UnitPrice, # adding in total column to calculate the line total
    IsCancellation = str_detect(InvoiceNo, "^[Cc]"),
    IsAdjustment = !IsCancellation & Quantity < 0 & UnitPrice == 0
  )

# Build header level transactions table 

transactions <- transactionLines %>%
  group_by(InvoiceNo) %>%
  summarise(
    InvoiceDate = min(InvoiceDate, na.rm = TRUE),  # min to deal with any minute drift between time stamps
    Month = first(Month),                          # constant within invoice
    Hour = first(Hour),                            # constant within invoice
    DayOfWeel = first(DayOfWeek),                  # constant within invoice
    CustomerID  = first(CustomerID),               # constant within invoice
    Country     = first(Country),                  # constant within invoice
    IsCancellation = first(IsCancellation),        # constant within invoice
    IsAdjustment = first(IsAdjustment),            # constant within invoice
    LineItems   = n(),                             # line count from retail dataframe
    Units       = sum(Quantity, na.rm = TRUE),     # sum of all units sold on the transaction
    TransactionValue       = sum(TransactionValue, na.rm = TRUE),        # total transaction value 
    .groups = "drop"
  )
  1. Show the breakdown of the number of transactions by countries i.e., how many transactions are in the dataset for each country (consider all records including cancelled transactions). Show this in total number and also in percentage. Show only countries accounting for more than 1% of the total transactions.
transactions %>%
  group_by(Country) %>%
  summarise(Transactions = n(), .groups = "drop") %>%
  mutate(Pct = round(Transactions / sum(Transactions) * 100, 2)) %>%
  filter(Pct > 1) %>%
  arrange(desc(Pct))
NA
LS0tCnRpdGxlOiAiQXNzaWdubWVudCDigJMgT25saW5lIFJldGFpbCBBbmFseXRpY3MiCm91dHB1dDoKICBodG1sX25vdGVib29rOiBkZWZhdWx0CiAgd29yZF9kb2N1bWVudDogZGVmYXVsdAogIHBkZl9kb2N1bWVudDogZGVmYXVsdAotLS0KCkxvYWQgcGFja2FnZXMgYW5kIGRhdGFzZXQuCgpgYGB7cn0KbGlicmFyeSh0aWR5dmVyc2UpCgp0cmFuc2FjdGlvbkxpbmVzIDwtIHJlYWRfY3N2KCJPbmxpbmVfUmV0YWlsLmNzdiIsIGxvY2FsZSA9IGxvY2FsZShlbmNvZGluZyA9ICJXaW5kb3dzLTEyNTIiKSkgfD4gICNsb2NhdGUgdXNlZCB0byBkZWFsIHdpdGggQnJpdGlzaCBwb3VuZCBzeW1ib2wgZm91bmQgaW4gc29tZSBkZXNjcmlwaXRvbnMKICBtdXRhdGUoCiAgICBJbnZvaWNlRGF0ZSA9IG1keV9obShJbnZvaWNlRGF0ZSksICAgICAjIHBhcnNlcyBzdHIgZGF0ZXRpbWUgdG8gZGF0ZXRpbWUgb2JqCiAgICBNb250aCA9IG1vbnRoKEludm9pY2VEYXRlKSwKICAgIEhvdXIgPSBob3VyKEludm9pY2VEYXRlKSwKICAgIERheU9mV2VlayA9IHdkYXkoSW52b2ljZURhdGUsIGxhYmVsID0gVFJVRSksCiAgICBUcmFuc2FjdGlvblZhbHVlID0gUXVhbnRpdHkgKiBVbml0UHJpY2UsICMgYWRkaW5nIGluIHRvdGFsIGNvbHVtbiB0byBjYWxjdWxhdGUgdGhlIGxpbmUgdG90YWwKICAgIElzQ2FuY2VsbGF0aW9uID0gc3RyX2RldGVjdChJbnZvaWNlTm8sICJeW0NjXSIpLAogICAgSXNBZGp1c3RtZW50ID0gIUlzQ2FuY2VsbGF0aW9uICYgUXVhbnRpdHkgPCAwICYgVW5pdFByaWNlID09IDAKICApCgojIEJ1aWxkIGhlYWRlciBsZXZlbCB0cmFuc2FjdGlvbnMgdGFibGUgCgp0cmFuc2FjdGlvbnMgPC0gdHJhbnNhY3Rpb25MaW5lcyAlPiUKICBncm91cF9ieShJbnZvaWNlTm8pICU+JQogIHN1bW1hcmlzZSgKICAgIEludm9pY2VEYXRlID0gbWluKEludm9pY2VEYXRlLCBuYS5ybSA9IFRSVUUpLCAgIyBtaW4gdG8gZGVhbCB3aXRoIGFueSBtaW51dGUgZHJpZnQgYmV0d2VlbiB0aW1lIHN0YW1wcwogICAgTW9udGggPSBmaXJzdChNb250aCksICAgICAgICAgICAgICAgICAgICAgICAgICAjIGNvbnN0YW50IHdpdGhpbiBpbnZvaWNlCiAgICBIb3VyID0gZmlyc3QoSG91ciksICAgICAgICAgICAgICAgICAgICAgICAgICAgICMgY29uc3RhbnQgd2l0aGluIGludm9pY2UKICAgIERheU9mV2VlbCA9IGZpcnN0KERheU9mV2VlayksICAgICAgICAgICAgICAgICAgIyBjb25zdGFudCB3aXRoaW4gaW52b2ljZQogICAgQ3VzdG9tZXJJRCAgPSBmaXJzdChDdXN0b21lcklEKSwgICAgICAgICAgICAgICAjIGNvbnN0YW50IHdpdGhpbiBpbnZvaWNlCiAgICBDb3VudHJ5ICAgICA9IGZpcnN0KENvdW50cnkpLCAgICAgICAgICAgICAgICAgICMgY29uc3RhbnQgd2l0aGluIGludm9pY2UKICAgIElzQ2FuY2VsbGF0aW9uID0gZmlyc3QoSXNDYW5jZWxsYXRpb24pLCAgICAgICAgIyBjb25zdGFudCB3aXRoaW4gaW52b2ljZQogICAgSXNBZGp1c3RtZW50ID0gZmlyc3QoSXNBZGp1c3RtZW50KSwgICAgICAgICAgICAjIGNvbnN0YW50IHdpdGhpbiBpbnZvaWNlCiAgICBMaW5lSXRlbXMgICA9IG4oKSwgICAgICAgICAgICAgICAgICAgICAgICAgICAgICMgbGluZSBjb3VudCBmcm9tIHJldGFpbCBkYXRhZnJhbWUKICAgIFVuaXRzICAgICAgID0gc3VtKFF1YW50aXR5LCBuYS5ybSA9IFRSVUUpLCAgICAgIyBzdW0gb2YgYWxsIHVuaXRzIHNvbGQgb24gdGhlIHRyYW5zYWN0aW9uCiAgICBUcmFuc2FjdGlvblZhbHVlICAgICAgID0gc3VtKFRyYW5zYWN0aW9uVmFsdWUsIG5hLnJtID0gVFJVRSksICAgICAgICAjIHRvdGFsIHRyYW5zYWN0aW9uIHZhbHVlIAogICAgLmdyb3VwcyA9ICJkcm9wIgogICkKYGBgCjEuIFNob3cgdGhlIGJyZWFrZG93biBvZiB0aGUgbnVtYmVyIG9mIHRyYW5zYWN0aW9ucyBieSBjb3VudHJpZXMgaS5lLiwgaG93IG1hbnkgdHJhbnNhY3Rpb25zIGFyZSBpbiB0aGUgZGF0YXNldCBmb3IgZWFjaCBjb3VudHJ5IChjb25zaWRlciBhbGwgcmVjb3JkcyBpbmNsdWRpbmcgY2FuY2VsbGVkIHRyYW5zYWN0aW9ucykuIFNob3cgdGhpcyBpbiB0b3RhbCBudW1iZXIgYW5kIGFsc28gaW4gcGVyY2VudGFnZS4gU2hvdyBvbmx5IGNvdW50cmllcyBhY2NvdW50aW5nIGZvciBtb3JlIHRoYW4gMSUgb2YgdGhlIHRvdGFsIHRyYW5zYWN0aW9ucy4KCmBgYHtyfQp0cmFuc2FjdGlvbnMgJT4lCiAgZ3JvdXBfYnkoQ291bnRyeSkgJT4lCiAgc3VtbWFyaXNlKFRyYW5zYWN0aW9ucyA9IG4oKSwgLmdyb3VwcyA9ICJkcm9wIikgJT4lCiAgbXV0YXRlKFBjdCA9IHJvdW5kKFRyYW5zYWN0aW9ucyAvIHN1bShUcmFuc2FjdGlvbnMpICogMTAwLCAyKSkgJT4lCiAgZmlsdGVyKFBjdCA+IDEpICU+JQogIGFycmFuZ2UoZGVzYyhQY3QpKQoKYGBgCgo=