Load packages

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Import your data

my_data <- read_csv("https://drive.google.com/uc?id=107Sqmt1sk6oGcKL_TwNVqz5wMJBavsoq&export=download")

## Rows: 100 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): Name, Description, Brand, Category, Currency, EAN, Color, Size, Ava...
## dbl (4): Index, Price, Stock, Internal ID
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

structure and column names

glimpse(my_data)

## Rows: 100
## Columns: 13
## $ Index         <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1…
## $ Name          <chr> "Compact Printer Air Advanced Digital", "Tablet", "Smart…
## $ Description   <chr> "Situation organization these memory much off.", "Discus…
## $ Brand         <chr> "Garner, Boyle and Flynn", "Mueller Inc", "Lawson, Kelle…
## $ Category      <chr> "Books & Stationery", "Shoes & Footwear", "Kitchen Appli…
## $ Price         <dbl> 265, 502, 227, 121, 1, 426, 68, 159, 454, 845, 257, 221,…
## $ Currency      <chr> "USD", "USD", "USD", "USD", "USD", "USD", "USD", "USD", …
## $ Stock         <dbl> 774, 81, 726, 896, 925, 549, 870, 584, 499, 564, 168, 67…
## $ EAN           <chr> "2091465262179", "5286196620740", "1282898648918", "3879…
## $ Color         <chr> "ForestGreen", "Black", "SlateGray", "PaleGreen", "SeaSh…
## $ Size          <chr> "Large", "8x10 in", "XS", "L", "100x200 mm", "30x40 cm",…
## $ Availability  <chr> "pre_order", "in_stock", "in_stock", "discontinued", "di…
## $ `Internal ID` <dbl> 56, 29, 70, 31, 10, 60, 86, 50, 88, 88, 30, 32, 57, 66, …

colnames(my_data)

##  [1] "Index"        "Name"         "Description"  "Brand"        "Category"    
##  [6] "Price"        "Currency"     "Stock"        "EAN"          "Color"       
## [11] "Size"         "Availability" "Internal ID"

Pivoting

numeric_cols <- my_data %>% select(where(is.numeric)) %>% names()

Pivot longer

long_data <- my_data %>%
  pivot_longer(
    cols = all_of(numeric_cols),
    names_to = "variable",
    values_to = "value"
  )
long_data

## # A tibble: 400 × 11
##    Name       Description Brand Category Currency EAN   Color Size  Availability
##    <chr>      <chr>       <chr> <chr>    <chr>    <chr> <chr> <chr> <chr>       
##  1 Compact P… Situation … Garn… Books &… USD      2091… Fore… Large pre_order   
##  2 Compact P… Situation … Garn… Books &… USD      2091… Fore… Large pre_order   
##  3 Compact P… Situation … Garn… Books &… USD      2091… Fore… Large pre_order   
##  4 Compact P… Situation … Garn… Books &… USD      2091… Fore… Large pre_order   
##  5 Tablet     Discussion… Muel… Shoes &… USD      5286… Black 8x10… in_stock    
##  6 Tablet     Discussion… Muel… Shoes &… USD      5286… Black 8x10… in_stock    
##  7 Tablet     Discussion… Muel… Shoes &… USD      5286… Black 8x10… in_stock    
##  8 Tablet     Discussion… Muel… Shoes &… USD      5286… Black 8x10… in_stock    
##  9 Smart Ble… No situati… Laws… Kitchen… USD      1282… Slat… XS    in_stock    
## 10 Smart Ble… No situati… Laws… Kitchen… USD      1282… Slat… XS    in_stock    
## # ℹ 390 more rows
## # ℹ 2 more variables: variable <chr>, value <dbl>

Pivot back to wide

wide_data <- long_data %>%
  pivot_wider(
    names_from = variable,
    values_from = value
  )
wide_data

## # A tibble: 100 × 13
##    Name       Description Brand Category Currency EAN   Color Size  Availability
##    <chr>      <chr>       <chr> <chr>    <chr>    <chr> <chr> <chr> <chr>       
##  1 Compact P… Situation … Garn… Books &… USD      2091… Fore… Large pre_order   
##  2 Tablet     Discussion… Muel… Shoes &… USD      5286… Black 8x10… in_stock    
##  3 Smart Ble… No situati… Laws… Kitchen… USD      1282… Slat… XS    in_stock    
##  4 Advanced … For force … Gall… Kitchen… USD      3879… Pale… L     discontinued
##  5 Portable … Feeling ba… Irwi… Kids' C… USD      9055… SeaS… 100x… discontinued
##  6 Radio      Character … Benj… Skincare USD      1150… Corn… 30x4… pre_order   
##  7 Ultra Pro… Pattern po… Mcco… Laptops… USD      5029… Purp… S     discontinued
##  8 Webcam St… Deep area … Morr… Automot… USD      9883… Medi… 8x10… pre_order   
##  9 Eco Radio  Know fathe… Edwa… Skincare USD      1773… DimG… Medi… pre_order   
## 10 Ultra Pow… Meeting ad… Bren… Fitness… USD      4877… Silv… Extr… discontinued
## # ℹ 90 more rows
## # ℹ 4 more variables: Index <dbl>, Price <dbl>, Stock <dbl>,
## #   `Internal ID` <dbl>

Pivot again

long_again <- wide_data %>%
  pivot_longer(
    cols = all_of(numeric_cols),
    names_to = "variable",
    values_to = "value"
  )
long_again

## # A tibble: 400 × 11
##    Name       Description Brand Category Currency EAN   Color Size  Availability
##    <chr>      <chr>       <chr> <chr>    <chr>    <chr> <chr> <chr> <chr>       
##  1 Compact P… Situation … Garn… Books &… USD      2091… Fore… Large pre_order   
##  2 Compact P… Situation … Garn… Books &… USD      2091… Fore… Large pre_order   
##  3 Compact P… Situation … Garn… Books &… USD      2091… Fore… Large pre_order   
##  4 Compact P… Situation … Garn… Books &… USD      2091… Fore… Large pre_order   
##  5 Tablet     Discussion… Muel… Shoes &… USD      5286… Black 8x10… in_stock    
##  6 Tablet     Discussion… Muel… Shoes &… USD      5286… Black 8x10… in_stock    
##  7 Tablet     Discussion… Muel… Shoes &… USD      5286… Black 8x10… in_stock    
##  8 Tablet     Discussion… Muel… Shoes &… USD      5286… Black 8x10… in_stock    
##  9 Smart Ble… No situati… Laws… Kitchen… USD      1282… Slat… XS    in_stock    
## 10 Smart Ble… No situati… Laws… Kitchen… USD      1282… Slat… XS    in_stock    
## # ℹ 390 more rows
## # ℹ 2 more variables: variable <chr>, value <dbl>

Separate

supplier_col <- colnames(my_data)[str_detect(colnames(my_data), regex("supplier|brand", ignore_case = TRUE))][1]

Select and separate

demo <- my_data %>%
  select(Supplier = all_of(supplier_col))

demo_separated <- demo %>%
  separate(Supplier, into = c("Last", "First"), sep = ", ", extra = "merge", fill = "right")
demo_separated

## # A tibble: 100 × 2
##    Last               First              
##    <chr>              <chr>              
##  1 Garner             Boyle and Flynn    
##  2 Mueller Inc        <NA>               
##  3 Lawson             Keller and Winters 
##  4 Gallagher and Sons <NA>               
##  5 Irwin LLC          <NA>               
##  6 Benjamin           Nelson and Hancock 
##  7 Mccoy              Waters and Rose    
##  8 Morrow and Sons    <NA>               
##  9 Edwards            Odonnell and Conley
## 10 Brennan            Archer and Rosales 
## # ℹ 90 more rows

Unite separated columns

demo_united <- demo_separated %>%
  unite("Supplier_Full", First, Last, sep = " ")
demo_united

## # A tibble: 100 × 1
##    Supplier_Full              
##    <chr>                      
##  1 Boyle and Flynn Garner     
##  2 NA Mueller Inc             
##  3 Keller and Winters Lawson  
##  4 NA Gallagher and Sons      
##  5 NA Irwin LLC               
##  6 Nelson and Hancock Benjamin
##  7 Waters and Rose Mccoy      
##  8 NA Morrow and Sons         
##  9 Odonnell and Conley Edwards
## 10 Archer and Rosales Brennan 
## # ℹ 90 more rows

Week 8: Apply it to your data 7

Caroline Coleman

2022-10-05

Load packages

Import your data

structure and column names

Pivoting

Pivot longer

Pivot back to wide

Pivot again

Separate

Select and separate

Unite separated columns