title: “PPPP” author: “Temuulen SUkhbat” date: “2025-09-23” output: html_document # 1. Install packages install.packages(“readxl”)

install.packages(“tidyverse”) # Only run once library(tidyverse)

library(dplyr) library(readxl) library(writexl)

2. Data import

./, ../,

getwd() bikes_tbl <- read_excel(“./bikes”) # fast key: alt+- bikeshops_tbl <- read_excel(“./bikeshops.xlsx”) orderlines_tbl <- read_excel(“./orderlines.xlsx”)

Examine data:

bikes_tbl head(bikes_tbl)

Import csv file:

bike_orderlines_tbl <- read_csv(“./bike_orderlines.csv”)

Joining data:

orderlines_bikes_tbl <- left_join(orderlines_tbl, bikes_tbl, by = c(“product.id” = “bike.id”))

bike_orderlines_bikeshops_joined <- left_join(orderlines_bikes_tbl, bikeshops_tbl, by = c(‘customer.id’ = ‘bikeshop.id’))

%>% is called pipe: fast key: ctl + shift + m

bike_orderlines_bikeshops_joined <- left_join(orderlines_tbl, bikes_tbl, by = c(“product.id” = “bike.id”)) %>% left_join(bikeshops_tbl, by = c(“customer.id” = “bikeshop.id”))

Wrangling data: decompose description into three columns: category.1, category.2 and frame.material

bike_orderlines_wrangled_tbl <- bike_orderlines_bikeshops_joined %>% separate(description, into = c(‘category.1’, ‘category.2’, ‘frame.material’), sep = ’ - ‘) %>% separate(location, into = c(’city’, ‘state’), sep = ‘,’, remove = FALSE)