remove(list=ls())
library(visdat)
Merging_Maven_Toys
SetUp
Import Data
<- read.csv("~/Library/CloudStorage/Dropbox/WCAS/BCE_Summer/Data Analysis/Summer 2025/shared/Day 4/Maven+Toys+Data/sales.csv")
sales
<- read.csv("~/Library/CloudStorage/Dropbox/WCAS/BCE_Summer/Data Analysis/Summer 2025/shared/Day 4/Maven+Toys+Data/products.csv") products
Explore
names(sales)
[1] "Sale_ID" "Date" "Store_ID" "Product_ID" "Units"
names(products)
[1] "Product_ID" "Product_Name" "Product_Category" "Product_Cost"
[5] "Product_Price"
vis_dat(sales[1:1000,])
vis_dat(products)
Confirm uniqueness of the the ID variable.
unique()
?length(unique(sales$Sale_ID))
[1] 829262
length(unique(products$Product_ID))
[1] 35
Sale_ID
is unique insales
dataframeProduct_ID
is unique inproducts
dataframe
Note
Cannot merge the sales
data with products
data based on their unique ids.
- No
Sales_ID
inproducts
table => Have to useProduct_ID
for merge
?merge
<-
product_sales merge(x = products,
y = sales,
by.x = "Product_ID",
by.y = "Product_ID"
)
remove(products)
remove(sales)
<- read.csv("~/Library/CloudStorage/Dropbox/WCAS/BCE_Summer/Data Analysis/Summer 2025/shared/Day 4/Maven+Toys+Data/inventory.csv")
inventory
<- read.csv("~/Library/CloudStorage/Dropbox/WCAS/BCE_Summer/Data Analysis/Summer 2025/shared/Day 4/Maven+Toys+Data/stores.csv") stores
<-
store_product_sales merge(x = product_sales,
y = stores,
by = "Store_ID") # many to one
# store_product_sales - 829262 rows, 13 vars
remove(stores)
remove(product_sales)
In
inventory
, unique id isStore_ID
-Product_ID
In
store_product_sales
,Store_ID
-Product_ID
is not unique.
<-
inventory_store_product_sales merge(x = store_product_sales, # many
y = inventory, # unique
by = c("Store_ID", "Product_ID") # many to one
)
# store_product_sales - 829262 rows, 14 vars
remove(store_product_sales)
remove(inventory)
vis_dat(inventory_store_product_sales[1:1000,])
library(Amelia)
Loading required package: Rcpp
##
## Amelia II: Multiple Imputation
## (Version 1.8.3, built: 2024-11-07)
## Copyright (C) 2005-2025 James Honaker, Gary King and Matthew Blackwell
## Refer to http://gking.harvard.edu/amelia/ for more information
##
?Amelia
?missmapmissmap(obj = inventory_store_product_sales)