library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(ggplot2)
library(scales)
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
library(readr)
# File paths
file_paths <- c("D:/office/DATASETS/data (do not edit)/NOV.csv", 
                "D:/office/DATASETS/data (do not edit)/DEC.csv",
                "D:/office/DATASETS/data (do not edit)/JAN.csv",
                "D:/office/DATASETS/data (do not edit)/FEB.csv")
all_data <- file_paths %>%
  map_dfr(read_csv)  
## Rows: 76 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): DATE, NAME, ADDRESS, COLOUR
## dbl (7): SR NO, QUANTITY, GI, BASIC AMOUNT, CARTAGE, GST, TOTAL AMOUNT
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 54 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): DATE, NAME, ADDRESS, COLOUR
## dbl (7): SR NO, QUANTITY, GI, BASIC AMOUNT, CARTAGE, GST, TOTAL AMOUNT
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 71 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): DATE, NAME, ADDRESS, COLOUR, DEAL
## dbl (8): SR NO, QUANTITY, GI, RATE, BASIC AMOUNT, CARTAGE, GST, TOTAL AMOUNT
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 38 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): DATE, NAME, ADDRESS, COLOUR, DEAL BY
## dbl (8): SR NO, QUANTITY, GI, RATE, BASIC AMOUNT, CARTAGE, GST, TOTAL AMOUNT
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(all_data)
## # A tibble: 6 × 14
##   `SR NO` DATE      NAME    ADDRESS COLOUR QUANTITY    GI `BASIC AMOUNT` CARTAGE
##     <dbl> <chr>     <chr>   <chr>   <chr>     <dbl> <dbl>          <dbl>   <dbl>
## 1       1 5.11.2024 ALU TE… UP      WALNUT    351.     NA          95634      NA
## 2       2 5.11.2024 ECO SU… HR      WHITE     430.     NA          55853      NA
## 3       3 6.11.2024 SHREE … RJ      WALNUT    615.     NA         209046      NA
## 4       4 6.11.2024 JN GRO… UP      WHITE    1074.     NA         150375      NA
## 5       5 7.11.2024 CHAND … HR      WHITE     159.     NA          22620      NA
## 6       6 7.11.2024 KHAN F… UK      WHITE      63.3    NA           8867      NA
## # ℹ 5 more variables: GST <dbl>, `TOTAL AMOUNT` <dbl>, DEAL <chr>, RATE <dbl>,
## #   `DEAL BY` <chr>
all_data$DATE <- dmy(all_data$DATE)


all_data$`TOTAL AMOUNT` <- as.integer(all_data$`TOTAL AMOUNT`)
summary_data <- all_data %>%
  group_by(ADDRESS) %>%
  summarise(TOTAL_AMOUNT = sum(`TOTAL AMOUNT`, na.rm = TRUE)) %>%
  arrange(desc(TOTAL_AMOUNT))
ggplot(summary_data, aes(x = reorder(ADDRESS, -TOTAL_AMOUNT), y = TOTAL_AMOUNT, fill = ADDRESS)) +
  geom_col(show.legend = FALSE) +  
  labs(title = "Total Amount by Address (All Months Combined)", 
       x = "Address", 
       y = "Total Amount") +
  scale_y_continuous(labels = comma) + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_fill_viridis_d() 

top_3_states <- summary_data %>% slice_head(n = 3)
# Display results
print("Top 3 States with Highest AMOUNT:")
## [1] "Top 3 States with Highest AMOUNT:"
print(top_3_states)
## # A tibble: 3 × 2
##   ADDRESS TOTAL_AMOUNT
##   <chr>          <int>
## 1 UP          15858841
## 2 RJ           5647055
## 3 BH           5274292
state_summary <- all_data %>%
  group_by(ADDRESS) %>%
  summarise(TOTAL_AMOUNT = sum(`TOTAL AMOUNT`, na.rm = TRUE)) %>%
  arrange(TOTAL_AMOUNT)  # Ascending order

bottom_3_states <- state_summary %>% slice_head(n = 3)

# Display results
print("Bottom 3 States with Lowest AMOUNT:")
## [1] "Bottom 3 States with Lowest AMOUNT:"
print(bottom_3_states)
## # A tibble: 3 × 2
##   ADDRESS TOTAL_AMOUNT
##   <chr>          <int>
## 1 MH             91181
## 2 MP            235664
## 3 WB            524942
#QUANTITY
all_data$`QUANTITY` <- as.integer(all_data$`QUANTITY`)


summary_data <- all_data %>%
  group_by(ADDRESS) %>%
  summarise(QUANTITY = sum(`QUANTITY`, na.rm = TRUE)) %>%
  arrange(desc(QUANTITY))

ggplot(summary_data, aes(x = reorder(ADDRESS, -QUANTITY), y = QUANTITY, fill = ADDRESS)) +
  geom_col(show.legend = FALSE) +  
  labs(title = "Total QUANTITY by Address (All Months Combined)", 
       x = "Address", 
       y = "QUANTITY") +
  scale_y_continuous(labels = comma) + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_fill_viridis_d() 

# Calculate total quantity per state
state_summary <- all_data %>%
  group_by(ADDRESS) %>%
  summarise(Total_Quantity = sum(QUANTITY, na.rm = TRUE)) %>%
  arrange(desc(Total_Quantity))

# Find the highest and lowest quantity state
highest_quantity_state <- state_summary %>% slice(1)
lowest_quantity_state <- state_summary %>% slice(n())
# Display results
print("State with Highest Quantity:")
## [1] "State with Highest Quantity:"
print(highest_quantity_state)
## # A tibble: 1 × 2
##   ADDRESS Total_Quantity
##   <chr>            <int>
## 1 UP               96953
print("State with Lowest Quantity:")
## [1] "State with Lowest Quantity:"
print(lowest_quantity_state)
## # A tibble: 1 × 2
##   ADDRESS Total_Quantity
##   <chr>            <int>
## 1 MH                 220
# Find the top 3 highest quantity states
top_3_states <- state_summary %>% slice_head(n = 3)
# Display results
print("Top 3 States with Highest Quantity:")
## [1] "Top 3 States with Highest Quantity:"
print(top_3_states)
## # A tibble: 3 × 2
##   ADDRESS Total_Quantity
##   <chr>            <int>
## 1 UP               96953
## 2 BH               36852
## 3 RJ               35912
state_summary <- all_data %>%
  group_by(ADDRESS) %>%
  summarise(Total_Quantity = sum(QUANTITY, na.rm = TRUE)) %>%
  arrange(Total_Quantity)  # Ascending order

bottom_3_states <- state_summary %>% slice_head(n = 3)

# Display results
print("Bottom 3 States with Lowest Quantity:")
## [1] "Bottom 3 States with Lowest Quantity:"
print(bottom_3_states)
## # A tibble: 3 × 2
##   ADDRESS Total_Quantity
##   <chr>            <int>
## 1 MH                 220
## 2 MP                1339
## 3 WB                1610
all_data$DATE <- as.Date(all_data$DATE, format="%Y-%m-%d")

monthly_dispatch <- all_data %>%
  mutate(YearMonth = format(DATE, "%Y-%m")) %>%
  group_by(YearMonth) %>%
  summarise(
    Total_Quantity = sum(QUANTITY, na.rm = TRUE),
    Total_Amount = sum(`TOTAL AMOUNT`, na.rm = TRUE)
  ) %>%
  arrange(YearMonth)


# Calculate month-over-month growth
monthly_dispatch <- monthly_dispatch %>%
  mutate(
    Quantity_Growth = (Total_Quantity / lag(Total_Quantity) - 1) * 100,
    Revenue_Growth = (Total_Amount / lag(Total_Amount) - 1) * 100
  )
# Print results
print(monthly_dispatch)
## # A tibble: 4 × 5
##   YearMonth Total_Quantity Total_Amount Quantity_Growth Revenue_Growth
##   <chr>              <int>        <int>           <dbl>          <dbl>
## 1 2024-11            65539     11024210           NA             NA   
## 2 2024-12            63761     10851635           -2.71          -1.57
## 3 2025-01            69982     11519689            9.76           6.16
## 4 2025-02            38501      6112255          -45.0          -46.9
# Plot Monthly Dispatch Trend (Quantity)
ggplot(monthly_dispatch, aes(x = YearMonth, y = Total_Quantity, group = 1)) +
  geom_line(color = "blue", size = 1) + 
  geom_point(color = "red", size = 3) +
  labs(title = "Monthly Dispatch Trend", x = "Month", y = "Total Quantity") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Plot Monthly Revenue Trend (Total Amount) with Adjusted Scale
ggplot(monthly_dispatch, aes(x = YearMonth, y = Total_Amount, group = 1)) +
  geom_line(color = "purple", size = 1) + 
  geom_point(color = "orange", size = 3) +
  scale_y_continuous(labels = scales::comma, limits = c(min(monthly_dispatch$Total_Amount) * 0.9, 
                                                        max(monthly_dispatch$Total_Amount) * 1.1)) +
  labs(title = "Monthly Revenue Trend", x = "Month", y = "Total Amount (₹)") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))