##Group 3 Team Project R Codes

#Package Installation
#install.packages(c("readxl", "tidyverse", "lubridate", "forecast", "ggplot2"))

library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(forecast)
library(ggplot2)

#Dataset Setup
setwd("~/Desktop/Retail Analytics 450.740.81_Jan152026/Retail Analysis R file")
df <- read_excel("Jewelry 2025 Jan - Dec Monthlysalestransaction.xlsx",
                 sheet = "Data Clean")
#View(df)

##Time-series Analysis

#Transform Date column to Date format
df <- df %>% mutate(Date = as.Date(Date))

#Create monthly summary
monthly_sales <- df %>% mutate(Month = floor_date(Date, "month")) %>% group_by(Month) %>%
  summarise(
    Total_Sales = sum(total, na.rm = TRUE),
    Bracelet = sum(Bracelet_D, na.rm = TRUE),
    Earrings = sum(Earrings_D, na.rm = TRUE),
    Necklace = sum(Necklace_D, na.rm = TRUE),
    Ring = sum(Ring_D, na.rm = TRUE)
  )

#Time-series plot for Total Sales
monthly_sales %>%
  mutate(MonthName = lubridate::month(Month, label = TRUE)) %>%
  ggplot(aes(x = MonthName, y = Total_Sales, group = 1)) +
  geom_line() +
  geom_point() +
  labs(title = "Total Sales by Month in 2025",
       x = "Month", y = "Total Sales")

monthly_sales
## # A tibble: 12 × 6
##    Month      Total_Sales Bracelet Earrings Necklace  Ring
##    <date>           <dbl>    <dbl>    <dbl>    <dbl> <dbl>
##  1 2025-01-01       5309.      119      240        3     0
##  2 2025-02-01       5565.      116      280        0     0
##  3 2025-03-01       4293.       81      262        1     1
##  4 2025-04-01       7913.      168      379        0     0
##  5 2025-05-01       9748.      231      416        0     0
##  6 2025-06-01       8114.      195      381        1     0
##  7 2025-07-01       8887.      221      413       30     0
##  8 2025-08-01      10593.      218      421       95     2
##  9 2025-09-01      12529.      247      439      118     0
## 10 2025-10-01      12118.      201      449      157     0
## 11 2025-11-01      10454.      307      462        5     0
## 12 2025-12-01      28377.      471      544      539     0
#Time-series plot for Bracelet Sales Volume
monthly_sales %>%
  mutate(MonthName = lubridate::month(Month, label = TRUE)) %>%
  ggplot(aes(x = MonthName, y = Bracelet, group = 1)) +
  geom_line() +
  geom_point() +
  labs(title = "Sales Volume of Bracelet by Month in 2025",
       x = "Month", y = "Sales Volume")

#Time-series plot for Earrings Sales Volume
monthly_sales %>%
  mutate(MonthName = lubridate::month(Month, label = TRUE)) %>%
  ggplot(aes(x = MonthName, y = Earrings, group = 1)) +
  geom_line() +
  geom_point() +
  labs(title = "Sales Volume of Earrings by Month in 2025",
       x = "Month", y = "Sales Volume")

#Time-series plot for Necklace Sales Volume
monthly_sales %>%
  mutate(MonthName = lubridate::month(Month, label = TRUE)) %>%
  ggplot(aes(x = MonthName, y = Necklace, group = 1)) +
  geom_line() +
  geom_point() +
  labs(title = "Sales Volume of Necklace by Month in 2025",
       x = "Month", y = "Sales Volume")

#Time-series plot for Ring Sales Volume
monthly_sales %>%
  mutate(MonthName = lubridate::month(Month, label = TRUE)) %>%
  ggplot(aes(x = MonthName, y = Ring, group = 1)) +
  geom_line() +
  geom_point() +
  labs(title = "Sales Volume of Ring by Month in 2025",
       x = "Month", y = "Sales Volume")

#Best 5 months summary table
best_months <- monthly_sales %>%
  mutate(MonthName = lubridate::month(Month, label = TRUE)) %>%
  select(Month, MonthName, Total_Sales, Bracelet, Earrings, Necklace, Ring)

best_months %>%
  arrange(desc(Total_Sales)) %>%
  head(5)
## # A tibble: 5 × 7
##   Month      MonthName Total_Sales Bracelet Earrings Necklace  Ring
##   <date>     <ord>           <dbl>    <dbl>    <dbl>    <dbl> <dbl>
## 1 2025-12-01 Dec            28377.      471      544      539     0
## 2 2025-09-01 Sep            12529.      247      439      118     0
## 3 2025-10-01 Oct            12118.      201      449      157     0
## 4 2025-08-01 Aug            10593.      218      421       95     2
## 5 2025-11-01 Nov            10454.      307      462        5     0
##demand model
df$Month <- month(df$Date)

df_reg <- df %>%
 filter(total > 0 & quantity > 0)

df_state_month <- df_reg %>%
  group_by(order_state, Month) %>% 
  summarise(total_sales = sum(total), 
            total_quantity = sum(quantity), groups = "drop")
## `summarise()` has grouped output by 'order_state'. You can override using the
## `.groups` argument.
df_state_month$log_quantity <- log(df_state_month$total_quantity)

model3 <- lm(log_quantity ~ factor(order_state) 
             + factor(Month), data = df_state_month)

summary(model3)
## 
## Call:
## lm(formula = log_quantity ~ factor(order_state) + factor(Month), 
##     data = df_state_month)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.49766 -0.20924  0.01373  0.24597  2.28431 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           -0.70358    0.47774  -1.473 0.141480    
## factor(order_state)AL  2.27161    0.49097   4.627 4.78e-06 ***
## factor(order_state)AR  1.56639    0.49097   3.190 0.001514 ** 
## factor(order_state)AZ  2.72193    0.49097   5.544 4.89e-08 ***
## factor(order_state)CA  4.22337    0.49097   8.602  < 2e-16 ***
## factor(order_state)CO  2.29262    0.49097   4.670 3.92e-06 ***
## factor(order_state)CT  1.88301    0.49097   3.835 0.000142 ***
## factor(order_state)DC  1.05833    0.50881   2.080 0.038055 *  
## factor(order_state)DE  0.95615    0.49262   1.941 0.052849 .  
## factor(order_state)FL  4.42252    0.49097   9.008  < 2e-16 ***
## factor(order_state)GA  3.35436    0.49097   6.832 2.54e-11 ***
## factor(order_state)HI  0.54116    0.50883   1.064 0.288078    
## factor(order_state)IA  1.50242    0.49261   3.050 0.002416 ** 
## factor(order_state)ID  1.27038    0.49459   2.569 0.010513 *  
## factor(order_state)IL  3.24175    0.49097   6.603 1.07e-10 ***
## factor(order_state)IN  2.59778    0.49097   5.291 1.85e-07 ***
## factor(order_state)KS  1.24521    0.49097   2.536 0.011521 *  
## factor(order_state)KY  1.90432    0.49097   3.879 0.000120 ***
## factor(order_state)LA  2.14701    0.49097   4.373 1.50e-05 ***
## factor(order_state)MA  2.54995    0.49097   5.194 3.05e-07 ***
## factor(order_state)MD  2.52698    0.49097   5.147 3.87e-07 ***
## factor(order_state)ME  0.67666    0.49997   1.353 0.176567    
## factor(order_state)MI  3.26354    0.49097   6.647 8.14e-11 ***
## factor(order_state)MN  1.87167    0.49097   3.812 0.000156 ***
## factor(order_state)MO  2.14111    0.49097   4.361 1.59e-05 ***
## factor(order_state)MS  1.68674    0.49097   3.436 0.000643 ***
## factor(order_state)MT  0.32695    0.50377   0.649 0.516644    
## factor(order_state)NC  3.27723    0.49097   6.675 6.84e-11 ***
## factor(order_state)ND  0.86573    0.51756   1.673 0.095036 .  
## factor(order_state)NE  1.45532    0.49097   2.964 0.003186 ** 
## factor(order_state)NH  0.90135    0.49459   1.822 0.069016 .  
## factor(order_state)NJ  3.31514    0.49097   6.752 4.22e-11 ***
## factor(order_state)NM  0.97335    0.49261   1.976 0.048738 *  
## factor(order_state)NV  1.82843    0.49097   3.724 0.000219 ***
## factor(order_state)NY  3.84286    0.49097   7.827 3.22e-14 ***
## factor(order_state)OH  3.26526    0.49097   6.651 7.96e-11 ***
## factor(order_state)OK  1.91006    0.49097   3.890 0.000114 ***
## factor(order_state)OR  1.98397    0.49097   4.041 6.20e-05 ***
## factor(order_state)PA  3.24128    0.49097   6.602 1.08e-10 ***
## factor(order_state)RI  0.86557    0.49459   1.750 0.080743 .  
## factor(order_state)SC  2.46334    0.49097   5.017 7.39e-07 ***
## factor(order_state)SD  0.46608    0.50377   0.925 0.355333    
## factor(order_state)TN  2.56754    0.49097   5.230 2.54e-07 ***
## factor(order_state)TX  4.18286    0.49097   8.520  < 2e-16 ***
## factor(order_state)UT  1.53555    0.49097   3.128 0.001869 ** 
## factor(order_state)VA  2.91969    0.49097   5.947 5.27e-09 ***
## factor(order_state)VT  0.24009    0.52828   0.454 0.649697    
## factor(order_state)WA  2.28127    0.49097   4.646 4.37e-06 ***
## factor(order_state)WI  2.16364    0.49097   4.407 1.29e-05 ***
## factor(order_state)WV  1.17764    0.49097   2.399 0.016839 *  
## factor(order_state)WY  0.39277    0.54273   0.724 0.469598    
## factor(Month)2         0.10151    0.10141   1.001 0.317318    
## factor(Month)3        -0.01308    0.10264  -0.127 0.898618    
## factor(Month)4         0.32957    0.09961   3.309 0.001008 ** 
## factor(Month)5         0.60394    0.09974   6.055 2.84e-09 ***
## factor(Month)6         0.44999    0.09890   4.550 6.80e-06 ***
## factor(Month)7         0.64959    0.09939   6.536 1.62e-10 ***
## factor(Month)8         0.70358    0.09820   7.165 2.95e-12 ***
## factor(Month)9         0.75792    0.09821   7.717 6.96e-14 ***
## factor(Month)10        0.85722    0.09987   8.583  < 2e-16 ***
## factor(Month)11        0.66579    0.09918   6.713 5.38e-11 ***
## factor(Month)12        1.44756    0.09830  14.727  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4675 on 480 degrees of freedom
## Multiple R-squared:  0.8559, Adjusted R-squared:  0.8376 
## F-statistic: 46.73 on 61 and 480 DF,  p-value: < 2.2e-16
#Month 12 = 4.25 times of Month 1
exp(coef(model3)["factor(Month)12"])
## factor(Month)12 
##        4.252731
#Month 12 was 325% higher than Month 1
(exp(coef(model3)["factor(Month)12"]) - 1) * 100
## factor(Month)12 
##        325.2731
#Sales of FL = 83.3 times sales of AK
exp(coef(model3)["factor(order_state)FL"])
## factor(order_state)FL 
##              83.30594
(exp(coef(model3)["factor(order_state)FL"]) - 1) * 100
## factor(order_state)FL 
##              8230.594
#Extract State Effect Ranking
coef_table <- summary(model3)$coefficients

state_effects <- coef_table[grep("order_state", rownames(coef_table)), ]

state_effects_df <- data.frame(
  State = gsub("factor\\(order_state\\)", "", rownames(state_effects)),
  Estimate = state_effects[,1]
)

state_effects_df$Relative_to_AK <- exp(state_effects_df$Estimate)

state_effects_df <- state_effects_df %>%
  arrange(desc(Relative_to_AK))

state_effects_df
##                       State  Estimate Relative_to_AK
## factor(order_state)FL    FL 4.4225199      83.305941
## factor(order_state)CA    CA 4.2233725      68.263314
## factor(order_state)TX    TX 4.1828603      65.553085
## factor(order_state)NY    NY 3.8428592      46.658691
## factor(order_state)GA    GA 3.3543617      28.627326
## factor(order_state)NJ    NJ 3.3151427      27.526321
## factor(order_state)NC    NC 3.2772258      26.502148
## factor(order_state)OH    OH 3.2652633      26.187006
## factor(order_state)MI    MI 3.2635415      26.141956
## factor(order_state)IL    IL 3.2417479      25.578391
## factor(order_state)PA    PA 3.2412791      25.566403
## factor(order_state)VA    VA 2.9196904      18.535547
## factor(order_state)AZ    AZ 2.7219257      15.209583
## factor(order_state)IN    IN 2.5977799      13.433881
## factor(order_state)TN    TN 2.5675359      13.033669
## factor(order_state)MA    MA 2.5499534      12.806507
## factor(order_state)MD    MD 2.5269797      12.515648
## factor(order_state)SC    SC 2.4633448      11.744028
## factor(order_state)CO    CO 2.2926179       9.900823
## factor(order_state)WA    WA 2.2812735       9.789139
## factor(order_state)AL    AL 2.2716069       9.694968
## factor(order_state)WI    WI 2.1636392       8.702751
## factor(order_state)LA    LA 2.1470089       8.559218
## factor(order_state)MO    MO 2.1411057       8.508840
## factor(order_state)OR    OR 1.9839674       7.271535
## factor(order_state)OK    OK 1.9100624       6.753510
## factor(order_state)KY    KY 1.9043215       6.714850
## factor(order_state)CT    CT 1.8830074       6.573244
## factor(order_state)MN    MN 1.8716728       6.499159
## factor(order_state)NV    NV 1.8284303       6.224109
## factor(order_state)MS    MS 1.6867391       5.401837
## factor(order_state)AR    AR 1.5663860       4.789308
## factor(order_state)UT    UT 1.5355531       4.643893
## factor(order_state)IA    IA 1.5024196       4.492546
## factor(order_state)NE    NE 1.4553189       4.285850
## factor(order_state)ID    ID 1.2703811       3.562210
## factor(order_state)KS    KS 1.2452127       3.473674
## factor(order_state)WV    WV 1.1776352       3.246687
## factor(order_state)DC    DC 1.0583313       2.881558
## factor(order_state)NM    NM 0.9733549       2.646809
## factor(order_state)DE    DE 0.9561501       2.601661
## factor(order_state)NH    NH 0.9013475       2.462920
## factor(order_state)ND    ND 0.8657258       2.376730
## factor(order_state)RI    RI 0.8655729       2.376367
## factor(order_state)ME    ME 0.6766550       1.967286
## factor(order_state)HI    HI 0.5411588       1.717997
## factor(order_state)SD    SD 0.4660799       1.593734
## factor(order_state)WY    WY 0.3927745       1.481084
## factor(order_state)MT    MT 0.3269524       1.386735
## factor(order_state)VT    VT 0.2400869       1.271360
#Top 10 states by Net Sales
state_summary <- df %>%
  group_by(order_state) %>%
  summarise(
    Net_Sales = sum(total, na.rm = TRUE),
    Units = sum(quantity, na.rm = TRUE),
    .groups = "drop"
  )

total_annual_sales <- sum(state_summary$Net_Sales)

state_summary <- state_summary %>%
  mutate(
    Share_of_Annual_Net_Sales = Net_Sales / total_annual_sales
  )

top10_states <- state_summary %>%
  arrange(desc(Net_Sales)) %>%
  slice(1:10)

top10_states <- top10_states %>%
  mutate(
    Net_Sales = round(Net_Sales, 2),
    Units = as.integer(Units),
    Share_of_Annual_Net_Sales = round(Share_of_Annual_Net_Sales * 100, 1),
    Share_of_Annual_Net_Sales = paste0(Share_of_Annual_Net_Sales, "%")
  ) %>%
  rename(State = order_state)

top10_states
## # A tibble: 10 × 4
##    State Net_Sales Units Share_of_Annual_Net_Sales
##    <chr>     <dbl> <int> <chr>                    
##  1 FL       14618.   938 11.8%                    
##  2 TX       13101.   780 10.6%                    
##  3 CA       11636.   799 9.4%                     
##  4 NY        7823.   540 6.3%                     
##  5 NJ        4965.   328 4%                       
##  6 GA        4557.   309 3.7%                     
##  7 NC        4529.   301 3.7%                     
##  8 MI        4502.   306 3.6%                     
##  9 IL        4441.   292 3.6%                     
## 10 OH        4131.   291 3.3%