remove(list = ls())
Merging
Setup + Importing
<- read.csv("~/Downloads/sales.csv")
sales <- read.csv("~/Downloads/products (1).csv") products
Checking ID
?uniquelength(unique(sales$Sale_ID))
[1] 829262
Sale_ID is unique in sales df
Product_ID is unique in products df
Cannot merge sales data with products data based on their unique ID’s
Thus we use product ID
Merging Data
<- merge(
sales_products x = sales,
y = products,
by = "Product_ID",
all.x = TRUE
)
Viewing Data
glimpse(sales_products)
Rows: 829,262
Columns: 9
$ Product_ID <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
$ Sale_ID <int> 209473, 818753, 1956, 443121, 561047, 147861, 800529,…
$ Date <chr> "2017-07-11", "2018-09-23", "2017-01-02", "2018-01-16…
$ Store_ID <int> 46, 2, 10, 17, 34, 41, 4, 43, 45, 36, 9, 22, 39, 17, …
$ Units <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
$ Product_Name <chr> "Action Figure", "Action Figure", "Action Figure", "A…
$ Product_Category <chr> "Toys", "Toys", "Toys", "Toys", "Toys", "Toys", "Toys…
$ Product_Cost <chr> "$9.99 ", "$9.99 ", "$9.99 ", "$9.99 ", "$9.99 ", "$9…
$ Product_Price <chr> "$15.99 ", "$15.99 ", "$15.99 ", "$15.99 ", "$15.99 "…
Importing 2
<- read.csv("~/Downloads/stores.csv")
stores <- read.csv("~/Downloads/inventory.csv") inventory
Merging Data
# Merging Data 2
<- merge(
sales_products_stores x = sales_products,
y = stores,
by = "Store_ID", # <- was Product_ID before, now correct
all.x = TRUE
)
# Bring in inventory (by both store and product)
<- merge(
sales_products_stores_inventory x = sales_products_stores,
y = inventory,
by = c("Store_ID", "Product_ID"),
all.x = TRUE
)remove(sales)
remove(inventory)
remove(stores)
remove(products)
remove(sales_products)
remove(sales_products_stores)
Charts
library(ggplot2)
ggplot(sales_products_stores_inventory, aes(x = Stock_On_Hand)) +
geom_density()
Warning: Removed 6232 rows containing non-finite outside the scale range
(`stat_density()`).