# Set the file path (adjust if your file is in a different location)
file_path <- "C:/Users/User/OneDrive/Desktop/R_Output/Premiere Data File.xlsx"
# Read each sheet into a data frame
sales_rep <- read_excel(file_path, sheet = "REP")
customer <- read_excel(file_path, sheet = "CUSTOMER")
product <- read_excel(file_path, sheet = "PART")
order_data <- read_excel(file_path, sheet = "ORDERS")
## Warning: Coercing text to numeric in A9002 / R9002C1: '21617'
## Warning: Coercing text to numeric in A9003 / R9003C1: '21619'
## Warning: Coercing text to numeric in A9004 / R9004C1: '21623'
## Warning: Coercing text to numeric in A9005 / R9005C1: '21613'
## Warning: Coercing text to numeric in A9006 / R9006C1: '21614'
## Warning: Coercing text to numeric in A9007 / R9007C1: '21608'
## Warning: Coercing text to numeric in A9008 / R9008C1: '21610'
order_item <- read_excel(file_path, sheet = "ORDER_LINE")
# Display the first few rows of each data frame to verify
cat("SalesRep Table:\n")
## SalesRep Table:
print(head(sales_rep))
## # A tibble: 3 × 9
## REP_NUM LAST_NAME FIRST_NAME STREET CITY STATE ZIP COMMISSION RATE
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 20 Kaiser Valerie 624 Randall Grove FL 33321 20542. 0.05
## 2 35 Hull Richard 532 Jackson Sheldon FL 33553 39216 0.07
## 3 65 Perez Juan 1626 Taylor Fillmore FL 33336 23487 0.05
cat("\nCustomer Table:\n")
##
## Customer Table:
print(head(customer))
## # A tibble: 6 × 9
## CUSTOMER_NUM CUSTOMER_NAME STREET CITY STATE ZIP BALANCE CREDIT_LIMIT
## <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 148 Al's Appliance and… 2837 … Fill… FL 33336 6550 7500
## 2 282 Brookings Direct 3827 … Grove FL 33321 432. 10000
## 3 356 Ferguson's 382 W… Nort… FL 33146 5785 7500
## 4 408 The Everything Shop 1828 … Crys… FL 33503 5285. 5000
## 5 462 Bargains Galore 3829 … Grove FL 33321 3412 10000
## 6 524 Kline's 838 R… Fill… FL 33336 12762 15000
## # ℹ 1 more variable: REP_NUM <chr>
cat("\nProduct Table:\n")
##
## Product Table:
print(head(product))
## # A tibble: 6 × 6
## PART_NUM DESCRIPTION ON_HAND CLASS WAREHOUSE PRICE
## <chr> <chr> <dbl> <chr> <chr> <dbl>
## 1 AT94 Iron 50 HW 3 25.0
## 2 BV06 Home Gym 45 SG 2 795.
## 3 CD52 Microwave Oven 32 AP 1 165
## 4 DL71 Cordless Drill 21 HW 3 130.
## 5 DR93 Gas Range 8 AP 2 495
## 6 DW11 Washer 12 AP 3 400.
cat("\nOrder Table:\n")
##
## Order Table:
print(head(order_data))
## # A tibble: 6 × 3
## ORDER_NUM ORDER_DATE CUSTOMER_NUM
## <dbl> <dttm> <chr>
## 1 4339 2019-10-12 00:00:00 524
## 2 7338 2019-10-12 00:00:00 687
## 3 8892 2019-10-12 00:00:00 282
## 4 8922 2019-10-12 00:00:00 356
## 5 9187 2019-10-12 00:00:00 462
## 6 9247 2019-10-12 00:00:00 687
cat("\nOrderItem Table:\n")
##
## OrderItem Table:
print(head(order_item))
## # A tibble: 6 × 4
## ORDER_NUM PART_NUM NUM_ORDERED QUOTED_PRICE
## <chr> <chr> <dbl> <dbl>
## 1 21608 AT94 11 22.0
## 2 21610 DR93 1 495
## 3 21610 DW11 1 400.
## 4 21613 KL62 4 330.
## 5 21614 KT03 2 595
## 6 21617 BV06 2 795.
order_item <- order_item %>%
mutate(REVENUE = NUM_ORDERED * QUOTED_PRICE)
knitr::opts_chunk$set(echo = TRUE)
library(readxl)
library(dplyr)
library(tidyr)
library(ggplot2)
file_path <- "C:/Users/User/OneDrive/Desktop/R_Output/Premiere Data File.xlsx"
sales_rep <- read_excel(file_path, sheet = "REP")
customer <- read_excel(file_path, sheet = "CUSTOMER")
customer <- read_excel(file_path, sheet = "CUSTOMER")
order_data <- read_excel(file_path, sheet = "ORDERS")
## Warning: Coercing text to numeric in A9002 / R9002C1: '21617'
## Warning: Coercing text to numeric in A9003 / R9003C1: '21619'
## Warning: Coercing text to numeric in A9004 / R9004C1: '21623'
## Warning: Coercing text to numeric in A9005 / R9005C1: '21613'
## Warning: Coercing text to numeric in A9006 / R9006C1: '21614'
## Warning: Coercing text to numeric in A9007 / R9007C1: '21608'
## Warning: Coercing text to numeric in A9008 / R9008C1: '21610'
order_item <- read_excel(file_path, sheet = "ORDER_LINE")
cat("SalesRep Table:\n")
## SalesRep Table:
print(head(sales_rep))
## # A tibble: 3 × 9
## REP_NUM LAST_NAME FIRST_NAME STREET CITY STATE ZIP COMMISSION RATE
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 20 Kaiser Valerie 624 Randall Grove FL 33321 20542. 0.05
## 2 35 Hull Richard 532 Jackson Sheldon FL 33553 39216 0.07
## 3 65 Perez Juan 1626 Taylor Fillmore FL 33336 23487 0.05
cat("\nCustomer Table:\n")
##
## Customer Table:
print(head(customer))
## # A tibble: 6 × 9
## CUSTOMER_NUM CUSTOMER_NAME STREET CITY STATE ZIP BALANCE CREDIT_LIMIT
## <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 148 Al's Appliance and… 2837 … Fill… FL 33336 6550 7500
## 2 282 Brookings Direct 3827 … Grove FL 33321 432. 10000
## 3 356 Ferguson's 382 W… Nort… FL 33146 5785 7500
## 4 408 The Everything Shop 1828 … Crys… FL 33503 5285. 5000
## 5 462 Bargains Galore 3829 … Grove FL 33321 3412 10000
## 6 524 Kline's 838 R… Fill… FL 33336 12762 15000
## # ℹ 1 more variable: REP_NUM <chr>
cat("\nProduct Table:\n")
##
## Product Table:
print(head(product))
## # A tibble: 6 × 6
## PART_NUM DESCRIPTION ON_HAND CLASS WAREHOUSE PRICE
## <chr> <chr> <dbl> <chr> <chr> <dbl>
## 1 AT94 Iron 50 HW 3 25.0
## 2 BV06 Home Gym 45 SG 2 795.
## 3 CD52 Microwave Oven 32 AP 1 165
## 4 DL71 Cordless Drill 21 HW 3 130.
## 5 DR93 Gas Range 8 AP 2 495
## 6 DW11 Washer 12 AP 3 400.
cat("\nOrder Table:\n")
##
## Order Table:
print(head(order_data))
## # A tibble: 6 × 3
## ORDER_NUM ORDER_DATE CUSTOMER_NUM
## <dbl> <dttm> <chr>
## 1 4339 2019-10-12 00:00:00 524
## 2 7338 2019-10-12 00:00:00 687
## 3 8892 2019-10-12 00:00:00 282
## 4 8922 2019-10-12 00:00:00 356
## 5 9187 2019-10-12 00:00:00 462
## 6 9247 2019-10-12 00:00:00 687
cat("\nOrderItem Table:\n")
##
## OrderItem Table:
print(head(order_item))
## # A tibble: 6 × 4
## ORDER_NUM PART_NUM NUM_ORDERED QUOTED_PRICE
## <chr> <chr> <dbl> <dbl>
## 1 21608 AT94 11 22.0
## 2 21610 DR93 1 495
## 3 21610 DW11 1 400.
## 4 21613 KL62 4 330.
## 5 21614 KT03 2 595
## 6 21617 BV06 2 795.
cat("\n--- sales_rep Structure Verification ---\n")
##
## --- sales_rep Structure Verification ---
str(sales_rep)
## tibble [3 × 9] (S3: tbl_df/tbl/data.frame)
## $ REP_NUM : chr [1:3] "20" "35" "65"
## $ LAST_NAME : chr [1:3] "Kaiser" "Hull" "Perez"
## $ FIRST_NAME: chr [1:3] "Valerie" "Richard" "Juan"
## $ STREET : chr [1:3] "624 Randall" "532 Jackson" "1626 Taylor"
## $ CITY : chr [1:3] "Grove" "Sheldon" "Fillmore"
## $ STATE : chr [1:3] "FL" "FL" "FL"
## $ ZIP : chr [1:3] "33321" "33553" "33336"
## $ COMMISSION: num [1:3] 20543 39216 23487
## $ RATE : num [1:3] 0.05 0.07 0.05
order_data <- order_data %>%
mutate(ORDER_NUM = as.character(ORDER_NUM))
order_item <- order_item %>%
mutate(ORDER_NUM = as.character(ORDER_NUM))
joined_data <- order_item %>%
inner_join(order_data, by = "ORDER_NUM")
print(head(joined_data))
## # A tibble: 6 × 6
## ORDER_NUM PART_NUM NUM_ORDERED QUOTED_PRICE ORDER_DATE CUSTOMER_NUM
## <chr> <chr> <dbl> <dbl> <dttm> <chr>
## 1 21608 AT94 11 22.0 2013-10-20 00:00:00 148
## 2 21610 DR93 1 495 2013-10-20 00:00:00 356
## 3 21610 DW11 1 400. 2013-10-20 00:00:00 356
## 4 21613 KL62 4 330. 2013-10-21 00:00:00 408
## 5 21614 KT03 2 595 2013-10-21 00:00:00 282
## 6 21617 BV06 2 795. 2013-10-23 00:00:00 608
joined_data <- joined_data %>%
mutate(REVENUE = NUM_ORDERED * QUOTED_PRICE)
print(head(joined_data))
## # A tibble: 6 × 7
## ORDER_NUM PART_NUM NUM_ORDERED QUOTED_PRICE ORDER_DATE CUSTOMER_NUM
## <chr> <chr> <dbl> <dbl> <dttm> <chr>
## 1 21608 AT94 11 22.0 2013-10-20 00:00:00 148
## 2 21610 DR93 1 495 2013-10-20 00:00:00 356
## 3 21610 DW11 1 400. 2013-10-20 00:00:00 356
## 4 21613 KL62 4 330. 2013-10-21 00:00:00 408
## 5 21614 KT03 2 595 2013-10-21 00:00:00 282
## 6 21617 BV06 2 795. 2013-10-23 00:00:00 608
## # ℹ 1 more variable: REVENUE <dbl>
total_revenue <- sum(joined_data$REVENUE)
cat("Total Sales Revenue: ", total_revenue, "\n")
## Total Sales Revenue: 644131756
# Join order_item, order_data, customer, and sales_rep
rep_sales_data <- order_item %>%
inner_join(order_data, by = "ORDER_NUM") %>%
inner_join(customer, by = "CUSTOMER_NUM") %>%
inner_join(sales_rep, by = "REP_NUM")
# Calculate revenue (if not already calculated)
rep_sales_data <- rep_sales_data %>%
mutate(REVENUE = NUM_ORDERED * QUOTED_PRICE)
# Aggregate revenue by sales representative
sales_by_rep <- rep_sales_data %>%
group_by(REP_NUM, LAST_NAME, FIRST_NAME) %>%
summarise(TOTAL_SALES = sum(REVENUE), .groups = "drop") %>% # Added .groups
arrange(desc(TOTAL_SALES)) # Arrange in descending order of sales
# Print the sales by representative
cat("\nSales Revenue by Sales Representative:\n")
##
## Sales Revenue by Sales Representative:
print(sales_by_rep)
## # A tibble: 3 × 4
## REP_NUM LAST_NAME FIRST_NAME TOTAL_SALES
## <chr> <chr> <chr> <dbl>
## 1 35 Hull Richard 258897303.
## 2 65 Perez Juan 196062041.
## 3 20 Kaiser Valerie 189172412.
# Join order_item and product tables
product_sales_data <- order_item %>%
inner_join(product, by = "PART_NUM")
# Calculate revenue
product_sales_data <- product_sales_data %>%
mutate(REVENUE = NUM_ORDERED * QUOTED_PRICE)
# Aggregate revenue by product
sales_by_product <- product_sales_data %>%
group_by(PART_NUM, DESCRIPTION) %>%
summarise(TOTAL_SALES = sum(REVENUE)) %>%
arrange(desc(TOTAL_SALES))
## `summarise()` has grouped output by 'PART_NUM'. You can override using the
## `.groups` argument.
# Print the top selling products
cat("\nTop Selling Products:\n")
##
## Top Selling Products:
print(sales_by_product)
## # A tibble: 10 × 3
## # Groups: PART_NUM [10]
## PART_NUM DESCRIPTION TOTAL_SALES
## <chr> <chr> <dbl>
## 1 BV06 Home Gym 66585304.
## 2 FD21 Stand Mixer 65275882.
## 3 DR93 Gas Range 65144288.
## 4 KL62 Dryer 65051331.
## 5 KV29 Treadmill 64765776.
## 6 DL71 Cordless Drill 64662941.
## 7 CD52 Microwave Oven 63639282.
## 8 KT03 Dishwasher 63596383.
## 9 DW11 Washer 62869405.
## 10 AT94 Iron 62541162.
# Ensure total_revenue was calculated in the 'calculate_total_revenue' chunk
cat("## Total Sales Revenue: $", format(total_revenue, big.mark = ",", digits = 2), "\n")
## ## Total Sales Revenue: $ 6.4e+08
``` r
# Ensure the sales_by_rep data frame is available
# (It was created in the calculate_sales_by_rep chunk)
ggplot(sales_by_rep, aes(x = reorder(paste(FIRST_NAME, LAST_NAME), TOTAL_SALES), y = TOTAL_SALES)) +
geom_bar(stat = "identity", fill = "steelblue") +
coord_flip() + # Flip the axes for easier readability of names
labs(title = "Sales Revenue by Sales Representative",
x = "Sales Representative",
y = "Total Sales Revenue") +
theme_minimal()
# Let's display the top 10 products for better visualization
top_n_products <- sales_by_product %>%
top_n(10, TOTAL_SALES)
ggplot(top_n_products, aes(x = reorder(DESCRIPTION, TOTAL_SALES), y = TOTAL_SALES)) +
geom_bar(stat = "identity", fill = "darkgreen") +
coord_flip() + # Flip the axes for easier readability of product descriptions
labs(title = "Top 10 Selling Products by Revenue",
x = "Product Description",
y = "Total Sales Revenue") +
theme_minimal()
## Premiere Products
# Ensure total_revenue was calculated in the 'calculate_total_revenue' chunk
cat("## Total Sales Revenue: $", format(total_revenue, big.mark = ",", digits = 2), "\n")
## ## Total Sales Revenue: $ 6.4e+08
``` r
## Premiere Products
## ## Total Sales Revenue: $ 6.4e+08
## ### Average Order Value: $ 71,515
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.