Load packages and dataset.
library(tidyverse)
transactionLines <- read_csv("Online_Retail.csv", locale = locale(encoding = "Windows-1252")) |> #locate used to deal with British pound symbol found in some descripitons
mutate(
InvoiceDate = mdy_hm(InvoiceDate), # parses str datetime to datetime obj
Month = month(InvoiceDate),
Hour = hour(InvoiceDate),
DayOfWeek = wday(InvoiceDate, label = TRUE),
TransactionValue = Quantity * UnitPrice, # adding in total column to calculate the line total
IsCancellation = str_detect(InvoiceNo, "^[Cc]"),
IsAdjustment = !IsCancellation & Quantity < 0 & UnitPrice == 0
)
# Build header level transactions table
transactions <- transactionLines %>%
group_by(InvoiceNo) %>%
summarise(
InvoiceDate = min(InvoiceDate, na.rm = TRUE), # min to deal with any minute drift between time stamps
Month = first(Month), # constant within invoice
Hour = first(Hour), # constant within invoice
DayOfWeel = first(DayOfWeek), # constant within invoice
CustomerID = first(CustomerID), # constant within invoice
Country = first(Country), # constant within invoice
IsCancellation = first(IsCancellation), # constant within invoice
IsAdjustment = first(IsAdjustment), # constant within invoice
LineItems = n(), # line count from retail dataframe
Units = sum(Quantity, na.rm = TRUE), # sum of all units sold on the transaction
TransactionValue = sum(TransactionValue, na.rm = TRUE), # total transaction value
.groups = "drop"
)
- Show the breakdown of the number of transactions by countries i.e.,
how many transactions are in the dataset for each country (consider all
records including cancelled transactions). Show this in total number and
also in percentage. Show only countries accounting for more than 1% of
the total transactions.
transactions %>%
group_by(Country) %>%
summarise(Transactions = n(), .groups = "drop") %>%
mutate(Pct = round(Transactions / sum(Transactions) * 100, 2)) %>%
filter(Pct > 1) %>%
arrange(desc(Pct))
NA
LS0tCnRpdGxlOiAiQXNzaWdubWVudCDigJMgT25saW5lIFJldGFpbCBBbmFseXRpY3MiCm91dHB1dDoKICBodG1sX25vdGVib29rOiBkZWZhdWx0CiAgd29yZF9kb2N1bWVudDogZGVmYXVsdAogIHBkZl9kb2N1bWVudDogZGVmYXVsdAotLS0KCkxvYWQgcGFja2FnZXMgYW5kIGRhdGFzZXQuCgpgYGB7cn0KbGlicmFyeSh0aWR5dmVyc2UpCgp0cmFuc2FjdGlvbkxpbmVzIDwtIHJlYWRfY3N2KCJPbmxpbmVfUmV0YWlsLmNzdiIsIGxvY2FsZSA9IGxvY2FsZShlbmNvZGluZyA9ICJXaW5kb3dzLTEyNTIiKSkgfD4gICNsb2NhdGUgdXNlZCB0byBkZWFsIHdpdGggQnJpdGlzaCBwb3VuZCBzeW1ib2wgZm91bmQgaW4gc29tZSBkZXNjcmlwaXRvbnMKICBtdXRhdGUoCiAgICBJbnZvaWNlRGF0ZSA9IG1keV9obShJbnZvaWNlRGF0ZSksICAgICAjIHBhcnNlcyBzdHIgZGF0ZXRpbWUgdG8gZGF0ZXRpbWUgb2JqCiAgICBNb250aCA9IG1vbnRoKEludm9pY2VEYXRlKSwKICAgIEhvdXIgPSBob3VyKEludm9pY2VEYXRlKSwKICAgIERheU9mV2VlayA9IHdkYXkoSW52b2ljZURhdGUsIGxhYmVsID0gVFJVRSksCiAgICBUcmFuc2FjdGlvblZhbHVlID0gUXVhbnRpdHkgKiBVbml0UHJpY2UsICMgYWRkaW5nIGluIHRvdGFsIGNvbHVtbiB0byBjYWxjdWxhdGUgdGhlIGxpbmUgdG90YWwKICAgIElzQ2FuY2VsbGF0aW9uID0gc3RyX2RldGVjdChJbnZvaWNlTm8sICJeW0NjXSIpLAogICAgSXNBZGp1c3RtZW50ID0gIUlzQ2FuY2VsbGF0aW9uICYgUXVhbnRpdHkgPCAwICYgVW5pdFByaWNlID09IDAKICApCgojIEJ1aWxkIGhlYWRlciBsZXZlbCB0cmFuc2FjdGlvbnMgdGFibGUgCgp0cmFuc2FjdGlvbnMgPC0gdHJhbnNhY3Rpb25MaW5lcyAlPiUKICBncm91cF9ieShJbnZvaWNlTm8pICU+JQogIHN1bW1hcmlzZSgKICAgIEludm9pY2VEYXRlID0gbWluKEludm9pY2VEYXRlLCBuYS5ybSA9IFRSVUUpLCAgIyBtaW4gdG8gZGVhbCB3aXRoIGFueSBtaW51dGUgZHJpZnQgYmV0d2VlbiB0aW1lIHN0YW1wcwogICAgTW9udGggPSBmaXJzdChNb250aCksICAgICAgICAgICAgICAgICAgICAgICAgICAjIGNvbnN0YW50IHdpdGhpbiBpbnZvaWNlCiAgICBIb3VyID0gZmlyc3QoSG91ciksICAgICAgICAgICAgICAgICAgICAgICAgICAgICMgY29uc3RhbnQgd2l0aGluIGludm9pY2UKICAgIERheU9mV2VlbCA9IGZpcnN0KERheU9mV2VlayksICAgICAgICAgICAgICAgICAgIyBjb25zdGFudCB3aXRoaW4gaW52b2ljZQogICAgQ3VzdG9tZXJJRCAgPSBmaXJzdChDdXN0b21lcklEKSwgICAgICAgICAgICAgICAjIGNvbnN0YW50IHdpdGhpbiBpbnZvaWNlCiAgICBDb3VudHJ5ICAgICA9IGZpcnN0KENvdW50cnkpLCAgICAgICAgICAgICAgICAgICMgY29uc3RhbnQgd2l0aGluIGludm9pY2UKICAgIElzQ2FuY2VsbGF0aW9uID0gZmlyc3QoSXNDYW5jZWxsYXRpb24pLCAgICAgICAgIyBjb25zdGFudCB3aXRoaW4gaW52b2ljZQogICAgSXNBZGp1c3RtZW50ID0gZmlyc3QoSXNBZGp1c3RtZW50KSwgICAgICAgICAgICAjIGNvbnN0YW50IHdpdGhpbiBpbnZvaWNlCiAgICBMaW5lSXRlbXMgICA9IG4oKSwgICAgICAgICAgICAgICAgICAgICAgICAgICAgICMgbGluZSBjb3VudCBmcm9tIHJldGFpbCBkYXRhZnJhbWUKICAgIFVuaXRzICAgICAgID0gc3VtKFF1YW50aXR5LCBuYS5ybSA9IFRSVUUpLCAgICAgIyBzdW0gb2YgYWxsIHVuaXRzIHNvbGQgb24gdGhlIHRyYW5zYWN0aW9uCiAgICBUcmFuc2FjdGlvblZhbHVlICAgICAgID0gc3VtKFRyYW5zYWN0aW9uVmFsdWUsIG5hLnJtID0gVFJVRSksICAgICAgICAjIHRvdGFsIHRyYW5zYWN0aW9uIHZhbHVlIAogICAgLmdyb3VwcyA9ICJkcm9wIgogICkKYGBgCjEuIFNob3cgdGhlIGJyZWFrZG93biBvZiB0aGUgbnVtYmVyIG9mIHRyYW5zYWN0aW9ucyBieSBjb3VudHJpZXMgaS5lLiwgaG93IG1hbnkgdHJhbnNhY3Rpb25zIGFyZSBpbiB0aGUgZGF0YXNldCBmb3IgZWFjaCBjb3VudHJ5IChjb25zaWRlciBhbGwgcmVjb3JkcyBpbmNsdWRpbmcgY2FuY2VsbGVkIHRyYW5zYWN0aW9ucykuIFNob3cgdGhpcyBpbiB0b3RhbCBudW1iZXIgYW5kIGFsc28gaW4gcGVyY2VudGFnZS4gU2hvdyBvbmx5IGNvdW50cmllcyBhY2NvdW50aW5nIGZvciBtb3JlIHRoYW4gMSUgb2YgdGhlIHRvdGFsIHRyYW5zYWN0aW9ucy4KCmBgYHtyfQp0cmFuc2FjdGlvbnMgJT4lCiAgZ3JvdXBfYnkoQ291bnRyeSkgJT4lCiAgc3VtbWFyaXNlKFRyYW5zYWN0aW9ucyA9IG4oKSwgLmdyb3VwcyA9ICJkcm9wIikgJT4lCiAgbXV0YXRlKFBjdCA9IHJvdW5kKFRyYW5zYWN0aW9ucyAvIHN1bShUcmFuc2FjdGlvbnMpICogMTAwLCAyKSkgJT4lCiAgZmlsdGVyKFBjdCA+IDEpICU+JQogIGFycmFuZ2UoZGVzYyhQY3QpKQoKYGBgCgo=