##Group 3 Team Project R Codes
#Package Installation
#install.packages(c("readxl", "tidyverse", "lubridate", "forecast", "ggplot2"))
library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(forecast)
library(ggplot2)
#Dataset Setup
setwd("~/Desktop/Retail Analytics 450.740.81_Jan152026/Retail Analysis R file")
df <- read_excel("Jewelry 2025 Jan - Dec Monthlysalestransaction.xlsx",
sheet = "Data Clean")
#View(df)
##Time-series Analysis
#Transform Date column to Date format
df <- df %>% mutate(Date = as.Date(Date))
#Create monthly summary
monthly_sales <- df %>% mutate(Month = floor_date(Date, "month")) %>% group_by(Month) %>%
summarise(
Total_Sales = sum(total, na.rm = TRUE),
Bracelet = sum(Bracelet_D, na.rm = TRUE),
Earrings = sum(Earrings_D, na.rm = TRUE),
Necklace = sum(Necklace_D, na.rm = TRUE),
Ring = sum(Ring_D, na.rm = TRUE)
)
#Time-series plot for Total Sales
monthly_sales %>%
mutate(MonthName = lubridate::month(Month, label = TRUE)) %>%
ggplot(aes(x = MonthName, y = Total_Sales, group = 1)) +
geom_line() +
geom_point() +
labs(title = "Total Sales by Month in 2025",
x = "Month", y = "Total Sales")

monthly_sales
## # A tibble: 12 × 6
## Month Total_Sales Bracelet Earrings Necklace Ring
## <date> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2025-01-01 5309. 119 240 3 0
## 2 2025-02-01 5565. 116 280 0 0
## 3 2025-03-01 4293. 81 262 1 1
## 4 2025-04-01 7913. 168 379 0 0
## 5 2025-05-01 9748. 231 416 0 0
## 6 2025-06-01 8114. 195 381 1 0
## 7 2025-07-01 8887. 221 413 30 0
## 8 2025-08-01 10593. 218 421 95 2
## 9 2025-09-01 12529. 247 439 118 0
## 10 2025-10-01 12118. 201 449 157 0
## 11 2025-11-01 10454. 307 462 5 0
## 12 2025-12-01 28377. 471 544 539 0
#Time-series plot for Bracelet Sales Volume
monthly_sales %>%
mutate(MonthName = lubridate::month(Month, label = TRUE)) %>%
ggplot(aes(x = MonthName, y = Bracelet, group = 1)) +
geom_line() +
geom_point() +
labs(title = "Sales Volume of Bracelet by Month in 2025",
x = "Month", y = "Sales Volume")

#Time-series plot for Earrings Sales Volume
monthly_sales %>%
mutate(MonthName = lubridate::month(Month, label = TRUE)) %>%
ggplot(aes(x = MonthName, y = Earrings, group = 1)) +
geom_line() +
geom_point() +
labs(title = "Sales Volume of Earrings by Month in 2025",
x = "Month", y = "Sales Volume")

#Time-series plot for Necklace Sales Volume
monthly_sales %>%
mutate(MonthName = lubridate::month(Month, label = TRUE)) %>%
ggplot(aes(x = MonthName, y = Necklace, group = 1)) +
geom_line() +
geom_point() +
labs(title = "Sales Volume of Necklace by Month in 2025",
x = "Month", y = "Sales Volume")

#Time-series plot for Ring Sales Volume
monthly_sales %>%
mutate(MonthName = lubridate::month(Month, label = TRUE)) %>%
ggplot(aes(x = MonthName, y = Ring, group = 1)) +
geom_line() +
geom_point() +
labs(title = "Sales Volume of Ring by Month in 2025",
x = "Month", y = "Sales Volume")

#Best 5 months summary table
best_months <- monthly_sales %>%
mutate(MonthName = lubridate::month(Month, label = TRUE)) %>%
select(Month, MonthName, Total_Sales, Bracelet, Earrings, Necklace, Ring)
best_months %>%
arrange(desc(Total_Sales)) %>%
head(5)
## # A tibble: 5 × 7
## Month MonthName Total_Sales Bracelet Earrings Necklace Ring
## <date> <ord> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2025-12-01 Dec 28377. 471 544 539 0
## 2 2025-09-01 Sep 12529. 247 439 118 0
## 3 2025-10-01 Oct 12118. 201 449 157 0
## 4 2025-08-01 Aug 10593. 218 421 95 2
## 5 2025-11-01 Nov 10454. 307 462 5 0
##demand model
df$Month <- month(df$Date)
df_reg <- df %>%
filter(total > 0 & quantity > 0)
df_state_month <- df_reg %>%
group_by(order_state, Month) %>%
summarise(total_sales = sum(total),
total_quantity = sum(quantity), groups = "drop")
## `summarise()` has grouped output by 'order_state'. You can override using the
## `.groups` argument.
df_state_month$log_quantity <- log(df_state_month$total_quantity)
model3 <- lm(log_quantity ~ factor(order_state)
+ factor(Month), data = df_state_month)
summary(model3)
##
## Call:
## lm(formula = log_quantity ~ factor(order_state) + factor(Month),
## data = df_state_month)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.49766 -0.20924 0.01373 0.24597 2.28431
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.70358 0.47774 -1.473 0.141480
## factor(order_state)AL 2.27161 0.49097 4.627 4.78e-06 ***
## factor(order_state)AR 1.56639 0.49097 3.190 0.001514 **
## factor(order_state)AZ 2.72193 0.49097 5.544 4.89e-08 ***
## factor(order_state)CA 4.22337 0.49097 8.602 < 2e-16 ***
## factor(order_state)CO 2.29262 0.49097 4.670 3.92e-06 ***
## factor(order_state)CT 1.88301 0.49097 3.835 0.000142 ***
## factor(order_state)DC 1.05833 0.50881 2.080 0.038055 *
## factor(order_state)DE 0.95615 0.49262 1.941 0.052849 .
## factor(order_state)FL 4.42252 0.49097 9.008 < 2e-16 ***
## factor(order_state)GA 3.35436 0.49097 6.832 2.54e-11 ***
## factor(order_state)HI 0.54116 0.50883 1.064 0.288078
## factor(order_state)IA 1.50242 0.49261 3.050 0.002416 **
## factor(order_state)ID 1.27038 0.49459 2.569 0.010513 *
## factor(order_state)IL 3.24175 0.49097 6.603 1.07e-10 ***
## factor(order_state)IN 2.59778 0.49097 5.291 1.85e-07 ***
## factor(order_state)KS 1.24521 0.49097 2.536 0.011521 *
## factor(order_state)KY 1.90432 0.49097 3.879 0.000120 ***
## factor(order_state)LA 2.14701 0.49097 4.373 1.50e-05 ***
## factor(order_state)MA 2.54995 0.49097 5.194 3.05e-07 ***
## factor(order_state)MD 2.52698 0.49097 5.147 3.87e-07 ***
## factor(order_state)ME 0.67666 0.49997 1.353 0.176567
## factor(order_state)MI 3.26354 0.49097 6.647 8.14e-11 ***
## factor(order_state)MN 1.87167 0.49097 3.812 0.000156 ***
## factor(order_state)MO 2.14111 0.49097 4.361 1.59e-05 ***
## factor(order_state)MS 1.68674 0.49097 3.436 0.000643 ***
## factor(order_state)MT 0.32695 0.50377 0.649 0.516644
## factor(order_state)NC 3.27723 0.49097 6.675 6.84e-11 ***
## factor(order_state)ND 0.86573 0.51756 1.673 0.095036 .
## factor(order_state)NE 1.45532 0.49097 2.964 0.003186 **
## factor(order_state)NH 0.90135 0.49459 1.822 0.069016 .
## factor(order_state)NJ 3.31514 0.49097 6.752 4.22e-11 ***
## factor(order_state)NM 0.97335 0.49261 1.976 0.048738 *
## factor(order_state)NV 1.82843 0.49097 3.724 0.000219 ***
## factor(order_state)NY 3.84286 0.49097 7.827 3.22e-14 ***
## factor(order_state)OH 3.26526 0.49097 6.651 7.96e-11 ***
## factor(order_state)OK 1.91006 0.49097 3.890 0.000114 ***
## factor(order_state)OR 1.98397 0.49097 4.041 6.20e-05 ***
## factor(order_state)PA 3.24128 0.49097 6.602 1.08e-10 ***
## factor(order_state)RI 0.86557 0.49459 1.750 0.080743 .
## factor(order_state)SC 2.46334 0.49097 5.017 7.39e-07 ***
## factor(order_state)SD 0.46608 0.50377 0.925 0.355333
## factor(order_state)TN 2.56754 0.49097 5.230 2.54e-07 ***
## factor(order_state)TX 4.18286 0.49097 8.520 < 2e-16 ***
## factor(order_state)UT 1.53555 0.49097 3.128 0.001869 **
## factor(order_state)VA 2.91969 0.49097 5.947 5.27e-09 ***
## factor(order_state)VT 0.24009 0.52828 0.454 0.649697
## factor(order_state)WA 2.28127 0.49097 4.646 4.37e-06 ***
## factor(order_state)WI 2.16364 0.49097 4.407 1.29e-05 ***
## factor(order_state)WV 1.17764 0.49097 2.399 0.016839 *
## factor(order_state)WY 0.39277 0.54273 0.724 0.469598
## factor(Month)2 0.10151 0.10141 1.001 0.317318
## factor(Month)3 -0.01308 0.10264 -0.127 0.898618
## factor(Month)4 0.32957 0.09961 3.309 0.001008 **
## factor(Month)5 0.60394 0.09974 6.055 2.84e-09 ***
## factor(Month)6 0.44999 0.09890 4.550 6.80e-06 ***
## factor(Month)7 0.64959 0.09939 6.536 1.62e-10 ***
## factor(Month)8 0.70358 0.09820 7.165 2.95e-12 ***
## factor(Month)9 0.75792 0.09821 7.717 6.96e-14 ***
## factor(Month)10 0.85722 0.09987 8.583 < 2e-16 ***
## factor(Month)11 0.66579 0.09918 6.713 5.38e-11 ***
## factor(Month)12 1.44756 0.09830 14.727 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4675 on 480 degrees of freedom
## Multiple R-squared: 0.8559, Adjusted R-squared: 0.8376
## F-statistic: 46.73 on 61 and 480 DF, p-value: < 2.2e-16
#Month 12 = 4.25 times of Month 1
exp(coef(model3)["factor(Month)12"])
## factor(Month)12
## 4.252731
#Month 12 was 325% higher than Month 1
(exp(coef(model3)["factor(Month)12"]) - 1) * 100
## factor(Month)12
## 325.2731
#Sales of FL = 83.3 times sales of AK
exp(coef(model3)["factor(order_state)FL"])
## factor(order_state)FL
## 83.30594
(exp(coef(model3)["factor(order_state)FL"]) - 1) * 100
## factor(order_state)FL
## 8230.594
#Extract State Effect Ranking
coef_table <- summary(model3)$coefficients
state_effects <- coef_table[grep("order_state", rownames(coef_table)), ]
state_effects_df <- data.frame(
State = gsub("factor\\(order_state\\)", "", rownames(state_effects)),
Estimate = state_effects[,1]
)
state_effects_df$Relative_to_AK <- exp(state_effects_df$Estimate)
state_effects_df <- state_effects_df %>%
arrange(desc(Relative_to_AK))
state_effects_df
## State Estimate Relative_to_AK
## factor(order_state)FL FL 4.4225199 83.305941
## factor(order_state)CA CA 4.2233725 68.263314
## factor(order_state)TX TX 4.1828603 65.553085
## factor(order_state)NY NY 3.8428592 46.658691
## factor(order_state)GA GA 3.3543617 28.627326
## factor(order_state)NJ NJ 3.3151427 27.526321
## factor(order_state)NC NC 3.2772258 26.502148
## factor(order_state)OH OH 3.2652633 26.187006
## factor(order_state)MI MI 3.2635415 26.141956
## factor(order_state)IL IL 3.2417479 25.578391
## factor(order_state)PA PA 3.2412791 25.566403
## factor(order_state)VA VA 2.9196904 18.535547
## factor(order_state)AZ AZ 2.7219257 15.209583
## factor(order_state)IN IN 2.5977799 13.433881
## factor(order_state)TN TN 2.5675359 13.033669
## factor(order_state)MA MA 2.5499534 12.806507
## factor(order_state)MD MD 2.5269797 12.515648
## factor(order_state)SC SC 2.4633448 11.744028
## factor(order_state)CO CO 2.2926179 9.900823
## factor(order_state)WA WA 2.2812735 9.789139
## factor(order_state)AL AL 2.2716069 9.694968
## factor(order_state)WI WI 2.1636392 8.702751
## factor(order_state)LA LA 2.1470089 8.559218
## factor(order_state)MO MO 2.1411057 8.508840
## factor(order_state)OR OR 1.9839674 7.271535
## factor(order_state)OK OK 1.9100624 6.753510
## factor(order_state)KY KY 1.9043215 6.714850
## factor(order_state)CT CT 1.8830074 6.573244
## factor(order_state)MN MN 1.8716728 6.499159
## factor(order_state)NV NV 1.8284303 6.224109
## factor(order_state)MS MS 1.6867391 5.401837
## factor(order_state)AR AR 1.5663860 4.789308
## factor(order_state)UT UT 1.5355531 4.643893
## factor(order_state)IA IA 1.5024196 4.492546
## factor(order_state)NE NE 1.4553189 4.285850
## factor(order_state)ID ID 1.2703811 3.562210
## factor(order_state)KS KS 1.2452127 3.473674
## factor(order_state)WV WV 1.1776352 3.246687
## factor(order_state)DC DC 1.0583313 2.881558
## factor(order_state)NM NM 0.9733549 2.646809
## factor(order_state)DE DE 0.9561501 2.601661
## factor(order_state)NH NH 0.9013475 2.462920
## factor(order_state)ND ND 0.8657258 2.376730
## factor(order_state)RI RI 0.8655729 2.376367
## factor(order_state)ME ME 0.6766550 1.967286
## factor(order_state)HI HI 0.5411588 1.717997
## factor(order_state)SD SD 0.4660799 1.593734
## factor(order_state)WY WY 0.3927745 1.481084
## factor(order_state)MT MT 0.3269524 1.386735
## factor(order_state)VT VT 0.2400869 1.271360
#Top 10 states by Net Sales
state_summary <- df %>%
group_by(order_state) %>%
summarise(
Net_Sales = sum(total, na.rm = TRUE),
Units = sum(quantity, na.rm = TRUE),
.groups = "drop"
)
total_annual_sales <- sum(state_summary$Net_Sales)
state_summary <- state_summary %>%
mutate(
Share_of_Annual_Net_Sales = Net_Sales / total_annual_sales
)
top10_states <- state_summary %>%
arrange(desc(Net_Sales)) %>%
slice(1:10)
top10_states <- top10_states %>%
mutate(
Net_Sales = round(Net_Sales, 2),
Units = as.integer(Units),
Share_of_Annual_Net_Sales = round(Share_of_Annual_Net_Sales * 100, 1),
Share_of_Annual_Net_Sales = paste0(Share_of_Annual_Net_Sales, "%")
) %>%
rename(State = order_state)
top10_states
## # A tibble: 10 × 4
## State Net_Sales Units Share_of_Annual_Net_Sales
## <chr> <dbl> <int> <chr>
## 1 FL 14618. 938 11.8%
## 2 TX 13101. 780 10.6%
## 3 CA 11636. 799 9.4%
## 4 NY 7823. 540 6.3%
## 5 NJ 4965. 328 4%
## 6 GA 4557. 309 3.7%
## 7 NC 4529. 301 3.7%
## 8 MI 4502. 306 3.6%
## 9 IL 4441. 292 3.6%
## 10 OH 4131. 291 3.3%