library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(ggplot2)
library(scales)
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
library(readr)
# File paths
file_paths <- c("D:/office/DATASETS/data (do not edit)/NOV.csv",
"D:/office/DATASETS/data (do not edit)/DEC.csv",
"D:/office/DATASETS/data (do not edit)/JAN.csv",
"D:/office/DATASETS/data (do not edit)/FEB.csv")
all_data <- file_paths %>%
map_dfr(read_csv)
## Rows: 76 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): DATE, NAME, ADDRESS, COLOUR
## dbl (7): SR NO, QUANTITY, GI, BASIC AMOUNT, CARTAGE, GST, TOTAL AMOUNT
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 54 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): DATE, NAME, ADDRESS, COLOUR
## dbl (7): SR NO, QUANTITY, GI, BASIC AMOUNT, CARTAGE, GST, TOTAL AMOUNT
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 71 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): DATE, NAME, ADDRESS, COLOUR, DEAL
## dbl (8): SR NO, QUANTITY, GI, RATE, BASIC AMOUNT, CARTAGE, GST, TOTAL AMOUNT
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 38 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): DATE, NAME, ADDRESS, COLOUR, DEAL BY
## dbl (8): SR NO, QUANTITY, GI, RATE, BASIC AMOUNT, CARTAGE, GST, TOTAL AMOUNT
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(all_data)
## # A tibble: 6 × 14
## `SR NO` DATE NAME ADDRESS COLOUR QUANTITY GI `BASIC AMOUNT` CARTAGE
## <dbl> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 5.11.2024 ALU TE… UP WALNUT 351. NA 95634 NA
## 2 2 5.11.2024 ECO SU… HR WHITE 430. NA 55853 NA
## 3 3 6.11.2024 SHREE … RJ WALNUT 615. NA 209046 NA
## 4 4 6.11.2024 JN GRO… UP WHITE 1074. NA 150375 NA
## 5 5 7.11.2024 CHAND … HR WHITE 159. NA 22620 NA
## 6 6 7.11.2024 KHAN F… UK WHITE 63.3 NA 8867 NA
## # ℹ 5 more variables: GST <dbl>, `TOTAL AMOUNT` <dbl>, DEAL <chr>, RATE <dbl>,
## # `DEAL BY` <chr>
all_data$DATE <- dmy(all_data$DATE)
all_data$`TOTAL AMOUNT` <- as.integer(all_data$`TOTAL AMOUNT`)
summary_data <- all_data %>%
group_by(ADDRESS) %>%
summarise(TOTAL_AMOUNT = sum(`TOTAL AMOUNT`, na.rm = TRUE)) %>%
arrange(desc(TOTAL_AMOUNT))
ggplot(summary_data, aes(x = reorder(ADDRESS, -TOTAL_AMOUNT), y = TOTAL_AMOUNT, fill = ADDRESS)) +
geom_col(show.legend = FALSE) +
labs(title = "Total Amount by Address (All Months Combined)",
x = "Address",
y = "Total Amount") +
scale_y_continuous(labels = comma) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_fill_viridis_d()

top_3_states <- summary_data %>% slice_head(n = 3)
# Display results
print("Top 3 States with Highest AMOUNT:")
## [1] "Top 3 States with Highest AMOUNT:"
print(top_3_states)
## # A tibble: 3 × 2
## ADDRESS TOTAL_AMOUNT
## <chr> <int>
## 1 UP 15858841
## 2 RJ 5647055
## 3 BH 5274292
state_summary <- all_data %>%
group_by(ADDRESS) %>%
summarise(TOTAL_AMOUNT = sum(`TOTAL AMOUNT`, na.rm = TRUE)) %>%
arrange(TOTAL_AMOUNT) # Ascending order
bottom_3_states <- state_summary %>% slice_head(n = 3)
# Display results
print("Bottom 3 States with Lowest AMOUNT:")
## [1] "Bottom 3 States with Lowest AMOUNT:"
print(bottom_3_states)
## # A tibble: 3 × 2
## ADDRESS TOTAL_AMOUNT
## <chr> <int>
## 1 MH 91181
## 2 MP 235664
## 3 WB 524942
#QUANTITY
all_data$`QUANTITY` <- as.integer(all_data$`QUANTITY`)
summary_data <- all_data %>%
group_by(ADDRESS) %>%
summarise(QUANTITY = sum(`QUANTITY`, na.rm = TRUE)) %>%
arrange(desc(QUANTITY))
ggplot(summary_data, aes(x = reorder(ADDRESS, -QUANTITY), y = QUANTITY, fill = ADDRESS)) +
geom_col(show.legend = FALSE) +
labs(title = "Total QUANTITY by Address (All Months Combined)",
x = "Address",
y = "QUANTITY") +
scale_y_continuous(labels = comma) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_fill_viridis_d()

# Calculate total quantity per state
state_summary <- all_data %>%
group_by(ADDRESS) %>%
summarise(Total_Quantity = sum(QUANTITY, na.rm = TRUE)) %>%
arrange(desc(Total_Quantity))
# Find the highest and lowest quantity state
highest_quantity_state <- state_summary %>% slice(1)
lowest_quantity_state <- state_summary %>% slice(n())
# Display results
print("State with Highest Quantity:")
## [1] "State with Highest Quantity:"
print(highest_quantity_state)
## # A tibble: 1 × 2
## ADDRESS Total_Quantity
## <chr> <int>
## 1 UP 96953
print("State with Lowest Quantity:")
## [1] "State with Lowest Quantity:"
print(lowest_quantity_state)
## # A tibble: 1 × 2
## ADDRESS Total_Quantity
## <chr> <int>
## 1 MH 220
# Find the top 3 highest quantity states
top_3_states <- state_summary %>% slice_head(n = 3)
# Display results
print("Top 3 States with Highest Quantity:")
## [1] "Top 3 States with Highest Quantity:"
print(top_3_states)
## # A tibble: 3 × 2
## ADDRESS Total_Quantity
## <chr> <int>
## 1 UP 96953
## 2 BH 36852
## 3 RJ 35912
state_summary <- all_data %>%
group_by(ADDRESS) %>%
summarise(Total_Quantity = sum(QUANTITY, na.rm = TRUE)) %>%
arrange(Total_Quantity) # Ascending order
bottom_3_states <- state_summary %>% slice_head(n = 3)
# Display results
print("Bottom 3 States with Lowest Quantity:")
## [1] "Bottom 3 States with Lowest Quantity:"
print(bottom_3_states)
## # A tibble: 3 × 2
## ADDRESS Total_Quantity
## <chr> <int>
## 1 MH 220
## 2 MP 1339
## 3 WB 1610
all_data$DATE <- as.Date(all_data$DATE, format="%Y-%m-%d")
monthly_dispatch <- all_data %>%
mutate(YearMonth = format(DATE, "%Y-%m")) %>%
group_by(YearMonth) %>%
summarise(
Total_Quantity = sum(QUANTITY, na.rm = TRUE),
Total_Amount = sum(`TOTAL AMOUNT`, na.rm = TRUE)
) %>%
arrange(YearMonth)
# Calculate month-over-month growth
monthly_dispatch <- monthly_dispatch %>%
mutate(
Quantity_Growth = (Total_Quantity / lag(Total_Quantity) - 1) * 100,
Revenue_Growth = (Total_Amount / lag(Total_Amount) - 1) * 100
)
# Print results
print(monthly_dispatch)
## # A tibble: 4 × 5
## YearMonth Total_Quantity Total_Amount Quantity_Growth Revenue_Growth
## <chr> <int> <int> <dbl> <dbl>
## 1 2024-11 65539 11024210 NA NA
## 2 2024-12 63761 10851635 -2.71 -1.57
## 3 2025-01 69982 11519689 9.76 6.16
## 4 2025-02 38501 6112255 -45.0 -46.9
# Plot Monthly Dispatch Trend (Quantity)
ggplot(monthly_dispatch, aes(x = YearMonth, y = Total_Quantity, group = 1)) +
geom_line(color = "blue", size = 1) +
geom_point(color = "red", size = 3) +
labs(title = "Monthly Dispatch Trend", x = "Month", y = "Total Quantity") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Plot Monthly Revenue Trend (Total Amount) with Adjusted Scale
ggplot(monthly_dispatch, aes(x = YearMonth, y = Total_Amount, group = 1)) +
geom_line(color = "purple", size = 1) +
geom_point(color = "orange", size = 3) +
scale_y_continuous(labels = scales::comma, limits = c(min(monthly_dispatch$Total_Amount) * 0.9,
max(monthly_dispatch$Total_Amount) * 1.1)) +
labs(title = "Monthly Revenue Trend", x = "Month", y = "Total Amount (₹)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
