Questions we are attempting to answer
Question 1: What month was the best for sales? How much was earned that month?
Question 2: What city did we have the best sales in?
Question 3: What product did we sell the most of?
Question 4: What time can we best utilize advertisements?
Cleaning the data
# Clean data
options(scipen = 999)# Removes scientific notation
setwd("/cloud/project/data")
file_list <- list.files(path="/cloud/project/data")
# Create the data frame and concatenate several months worth of sales data into one
dataset <- data.frame()
for (i in 1:length(file_list)){
temp_data <- read_csv(file_list[i]) #each file will be read in, specify which columns you need read in to avoid any errors
temp_data$Class <- sapply(strsplit(gsub(".csv", "", file_list[i]), "_"), function(x){x[2]}) #clean the data as needed, in this case I am creating a new column that indicates which file each row of data came from
dataset <- rbind(dataset, temp_data) #for each iteration, bind the new data to the building dataset
}
# Fixing the data type
dataset$`Quantity Ordered` <- as.numeric(dataset$`Quantity Ordered`)
dataset$`Price Each` <- as.numeric(dataset$`Price Each`)
# Removing NA values
dataset <- na.omit(dataset)
# Fixing date format for the Order date column
dataset$`Order Date` <- as.POSIXct(dataset$`Order Date`, format="%m/%d/%Y %H:%M")
#Fixing naming conventions
dataset<- rename(dataset, order_id = `Order ID`)
dataset<-rename(dataset, quantity_ordered = `Quantity Ordered`)
dataset<-rename(dataset, price_each = `Price Each`)
dataset<-rename(dataset, order_date = `Order Date`)
dataset<-rename(dataset, purchase_address = `Purchase Address`)
glimpse(dataset)
## Rows: 185,950
## Columns: 7
## $ order_id <chr> "176558", "176559", "176560", "176560", "176561", "17…
## $ Product <chr> "USB-C Charging Cable", "Bose SoundSport Headphones",…
## $ quantity_ordered <dbl> 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ price_each <dbl> 11.95, 99.99, 600.00, 11.99, 11.99, 11.95, 99.99, 11.…
## $ order_date <dttm> 19-04-19 08:46:00, 19-04-07 22:30:00, 19-04-12 14:38…
## $ purchase_address <chr> "917 1st St, Dallas, TX 75001", "682 Chestnut St, Bos…
## $ Class <chr> "April", "April", "April", "April", "April", "April",…