Load packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Import your data
my_data <- read_csv("https://drive.google.com/uc?id=107Sqmt1sk6oGcKL_TwNVqz5wMJBavsoq&export=download")
## Rows: 100 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): Name, Description, Brand, Category, Currency, EAN, Color, Size, Ava...
## dbl (4): Index, Price, Stock, Internal ID
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
structure and column names
glimpse(my_data)
## Rows: 100
## Columns: 13
## $ Index <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1…
## $ Name <chr> "Compact Printer Air Advanced Digital", "Tablet", "Smart…
## $ Description <chr> "Situation organization these memory much off.", "Discus…
## $ Brand <chr> "Garner, Boyle and Flynn", "Mueller Inc", "Lawson, Kelle…
## $ Category <chr> "Books & Stationery", "Shoes & Footwear", "Kitchen Appli…
## $ Price <dbl> 265, 502, 227, 121, 1, 426, 68, 159, 454, 845, 257, 221,…
## $ Currency <chr> "USD", "USD", "USD", "USD", "USD", "USD", "USD", "USD", …
## $ Stock <dbl> 774, 81, 726, 896, 925, 549, 870, 584, 499, 564, 168, 67…
## $ EAN <chr> "2091465262179", "5286196620740", "1282898648918", "3879…
## $ Color <chr> "ForestGreen", "Black", "SlateGray", "PaleGreen", "SeaSh…
## $ Size <chr> "Large", "8x10 in", "XS", "L", "100x200 mm", "30x40 cm",…
## $ Availability <chr> "pre_order", "in_stock", "in_stock", "discontinued", "di…
## $ `Internal ID` <dbl> 56, 29, 70, 31, 10, 60, 86, 50, 88, 88, 30, 32, 57, 66, …
colnames(my_data)
## [1] "Index" "Name" "Description" "Brand" "Category"
## [6] "Price" "Currency" "Stock" "EAN" "Color"
## [11] "Size" "Availability" "Internal ID"
Pivoting
numeric_cols <- my_data %>% select(where(is.numeric)) %>% names()
Pivot longer
long_data <- my_data %>%
pivot_longer(
cols = all_of(numeric_cols),
names_to = "variable",
values_to = "value"
)
long_data
## # A tibble: 400 × 11
## Name Description Brand Category Currency EAN Color Size Availability
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Compact P… Situation … Garn… Books &… USD 2091… Fore… Large pre_order
## 2 Compact P… Situation … Garn… Books &… USD 2091… Fore… Large pre_order
## 3 Compact P… Situation … Garn… Books &… USD 2091… Fore… Large pre_order
## 4 Compact P… Situation … Garn… Books &… USD 2091… Fore… Large pre_order
## 5 Tablet Discussion… Muel… Shoes &… USD 5286… Black 8x10… in_stock
## 6 Tablet Discussion… Muel… Shoes &… USD 5286… Black 8x10… in_stock
## 7 Tablet Discussion… Muel… Shoes &… USD 5286… Black 8x10… in_stock
## 8 Tablet Discussion… Muel… Shoes &… USD 5286… Black 8x10… in_stock
## 9 Smart Ble… No situati… Laws… Kitchen… USD 1282… Slat… XS in_stock
## 10 Smart Ble… No situati… Laws… Kitchen… USD 1282… Slat… XS in_stock
## # ℹ 390 more rows
## # ℹ 2 more variables: variable <chr>, value <dbl>
Pivot back to wide
wide_data <- long_data %>%
pivot_wider(
names_from = variable,
values_from = value
)
wide_data
## # A tibble: 100 × 13
## Name Description Brand Category Currency EAN Color Size Availability
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Compact P… Situation … Garn… Books &… USD 2091… Fore… Large pre_order
## 2 Tablet Discussion… Muel… Shoes &… USD 5286… Black 8x10… in_stock
## 3 Smart Ble… No situati… Laws… Kitchen… USD 1282… Slat… XS in_stock
## 4 Advanced … For force … Gall… Kitchen… USD 3879… Pale… L discontinued
## 5 Portable … Feeling ba… Irwi… Kids' C… USD 9055… SeaS… 100x… discontinued
## 6 Radio Character … Benj… Skincare USD 1150… Corn… 30x4… pre_order
## 7 Ultra Pro… Pattern po… Mcco… Laptops… USD 5029… Purp… S discontinued
## 8 Webcam St… Deep area … Morr… Automot… USD 9883… Medi… 8x10… pre_order
## 9 Eco Radio Know fathe… Edwa… Skincare USD 1773… DimG… Medi… pre_order
## 10 Ultra Pow… Meeting ad… Bren… Fitness… USD 4877… Silv… Extr… discontinued
## # ℹ 90 more rows
## # ℹ 4 more variables: Index <dbl>, Price <dbl>, Stock <dbl>,
## # `Internal ID` <dbl>
Pivot again
long_again <- wide_data %>%
pivot_longer(
cols = all_of(numeric_cols),
names_to = "variable",
values_to = "value"
)
long_again
## # A tibble: 400 × 11
## Name Description Brand Category Currency EAN Color Size Availability
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Compact P… Situation … Garn… Books &… USD 2091… Fore… Large pre_order
## 2 Compact P… Situation … Garn… Books &… USD 2091… Fore… Large pre_order
## 3 Compact P… Situation … Garn… Books &… USD 2091… Fore… Large pre_order
## 4 Compact P… Situation … Garn… Books &… USD 2091… Fore… Large pre_order
## 5 Tablet Discussion… Muel… Shoes &… USD 5286… Black 8x10… in_stock
## 6 Tablet Discussion… Muel… Shoes &… USD 5286… Black 8x10… in_stock
## 7 Tablet Discussion… Muel… Shoes &… USD 5286… Black 8x10… in_stock
## 8 Tablet Discussion… Muel… Shoes &… USD 5286… Black 8x10… in_stock
## 9 Smart Ble… No situati… Laws… Kitchen… USD 1282… Slat… XS in_stock
## 10 Smart Ble… No situati… Laws… Kitchen… USD 1282… Slat… XS in_stock
## # ℹ 390 more rows
## # ℹ 2 more variables: variable <chr>, value <dbl>
Separate
supplier_col <- colnames(my_data)[str_detect(colnames(my_data), regex("supplier|brand", ignore_case = TRUE))][1]
Select and separate
demo <- my_data %>%
select(Supplier = all_of(supplier_col))
demo_separated <- demo %>%
separate(Supplier, into = c("Last", "First"), sep = ", ", extra = "merge", fill = "right")
demo_separated
## # A tibble: 100 × 2
## Last First
## <chr> <chr>
## 1 Garner Boyle and Flynn
## 2 Mueller Inc <NA>
## 3 Lawson Keller and Winters
## 4 Gallagher and Sons <NA>
## 5 Irwin LLC <NA>
## 6 Benjamin Nelson and Hancock
## 7 Mccoy Waters and Rose
## 8 Morrow and Sons <NA>
## 9 Edwards Odonnell and Conley
## 10 Brennan Archer and Rosales
## # ℹ 90 more rows
Unite separated columns
demo_united <- demo_separated %>%
unite("Supplier_Full", First, Last, sep = " ")
demo_united
## # A tibble: 100 × 1
## Supplier_Full
## <chr>
## 1 Boyle and Flynn Garner
## 2 NA Mueller Inc
## 3 Keller and Winters Lawson
## 4 NA Gallagher and Sons
## 5 NA Irwin LLC
## 6 Nelson and Hancock Benjamin
## 7 Waters and Rose Mccoy
## 8 NA Morrow and Sons
## 9 Odonnell and Conley Edwards
## 10 Archer and Rosales Brennan
## # ℹ 90 more rows