remove(list=ls())
library(visdat)Merging_Maven_Toys
SetUp
Import Data
sales <- read.csv("~/Library/CloudStorage/Dropbox/WCAS/BCE_Summer/Data Analysis/Summer 2025/shared/Day 4/Maven+Toys+Data/sales.csv")
products <- read.csv("~/Library/CloudStorage/Dropbox/WCAS/BCE_Summer/Data Analysis/Summer 2025/shared/Day 4/Maven+Toys+Data/products.csv")Explore
names(sales)[1] "Sale_ID" "Date" "Store_ID" "Product_ID" "Units"
names(products)[1] "Product_ID" "Product_Name" "Product_Category" "Product_Cost"
[5] "Product_Price"
vis_dat(sales[1:1000,])vis_dat(products)Confirm uniqueness of the the ID variable.
?unique()
length(unique(sales$Sale_ID))[1] 829262
length(unique(products$Product_ID))[1] 35
Sale_IDis unique insalesdataframeProduct_IDis unique inproductsdataframe
Note
Cannot merge the sales data with products data based on their unique ids.
- No
Sales_IDinproductstable => Have to useProduct_IDfor merge
?merge
product_sales <-
merge(x = products,
y = sales,
by.x = "Product_ID",
by.y = "Product_ID"
)
remove(products)
remove(sales)inventory <- read.csv("~/Library/CloudStorage/Dropbox/WCAS/BCE_Summer/Data Analysis/Summer 2025/shared/Day 4/Maven+Toys+Data/inventory.csv")
stores <- read.csv("~/Library/CloudStorage/Dropbox/WCAS/BCE_Summer/Data Analysis/Summer 2025/shared/Day 4/Maven+Toys+Data/stores.csv")store_product_sales <-
merge(x = product_sales,
y = stores,
by = "Store_ID") # many to one
# store_product_sales - 829262 rows, 13 vars
remove(stores)
remove(product_sales)In
inventory, unique id isStore_ID-Product_IDIn
store_product_sales,Store_ID-Product_IDis not unique.
inventory_store_product_sales <-
merge(x = store_product_sales, # many
y = inventory, # unique
by = c("Store_ID", "Product_ID") # many to one
)
# store_product_sales - 829262 rows, 14 vars
remove(store_product_sales)
remove(inventory)
vis_dat(inventory_store_product_sales[1:1000,])library(Amelia)Loading required package: Rcpp
##
## Amelia II: Multiple Imputation
## (Version 1.8.3, built: 2024-11-07)
## Copyright (C) 2005-2025 James Honaker, Gary King and Matthew Blackwell
## Refer to http://gking.harvard.edu/amelia/ for more information
##
?Amelia
?missmap
missmap(obj = inventory_store_product_sales)