Merging_Maven_Toys

Author

AS

SetUp

remove(list=ls())
library(visdat)

Import Data

sales <- read.csv("~/Library/CloudStorage/Dropbox/WCAS/BCE_Summer/Data Analysis/Summer 2025/shared/Day 4/Maven+Toys+Data/sales.csv")

products <- read.csv("~/Library/CloudStorage/Dropbox/WCAS/BCE_Summer/Data Analysis/Summer 2025/shared/Day 4/Maven+Toys+Data/products.csv")

Explore

names(sales)
[1] "Sale_ID"    "Date"       "Store_ID"   "Product_ID" "Units"     
names(products)
[1] "Product_ID"       "Product_Name"     "Product_Category" "Product_Cost"    
[5] "Product_Price"   
vis_dat(sales[1:1000,])

vis_dat(products)

Confirm uniqueness of the the ID variable.

?unique()
length(unique(sales$Sale_ID))
[1] 829262
length(unique(products$Product_ID))
[1] 35
  • Sale_ID is unique in sales dataframe

  • Product_ID is unique in products dataframe

Note

Cannot merge the sales data with products data based on their unique ids.

  • No Sales_ID in products table => Have to use Product_ID for merge
?merge

product_sales <-
merge(x = products, 
      y = sales, 
      by.x = "Product_ID",
      by.y = "Product_ID"
      )

remove(products)
remove(sales)
inventory <- read.csv("~/Library/CloudStorage/Dropbox/WCAS/BCE_Summer/Data Analysis/Summer 2025/shared/Day 4/Maven+Toys+Data/inventory.csv")

stores <- read.csv("~/Library/CloudStorage/Dropbox/WCAS/BCE_Summer/Data Analysis/Summer 2025/shared/Day 4/Maven+Toys+Data/stores.csv")
store_product_sales <-
merge(x = product_sales,
      y = stores, 
      by = "Store_ID") # many to one

# store_product_sales - 829262 rows, 13 vars

remove(stores)
remove(product_sales)
  • In inventory, unique id is Store_ID-Product_ID

  • In store_product_sales, Store_ID-Product_ID is not unique.

inventory_store_product_sales <-
merge(x = store_product_sales,   # many
      y = inventory,             # unique 
      by = c("Store_ID", "Product_ID") # many to one
)

# store_product_sales - 829262 rows, 14 vars

remove(store_product_sales)
remove(inventory)

vis_dat(inventory_store_product_sales[1:1000,])

library(Amelia)
Loading required package: Rcpp
## 
## Amelia II: Multiple Imputation
## (Version 1.8.3, built: 2024-11-07)
## Copyright (C) 2005-2025 James Honaker, Gary King and Matthew Blackwell
## Refer to http://gking.harvard.edu/amelia/ for more information
## 
?Amelia
?missmap
missmap(obj = inventory_store_product_sales)