| #Dataset Summary |
| Rows: 20000 |
| Columns: 20 |
| Includes: |
| #Transaction Amount |
| ##Date/Time |
| #Sender/Receiver Banks |
| #City |
| #Gender |
| #Transaction Type/Status |
| #Device Type |
| #Merchant |
| #Purpose |
| #Age |
| #Payment Mode/Currency |
######################################
#1.Load Libraries
install.packages(c( “readxl”, “dplyr”, “ggplot2”, “plotly”, “lubridate”, “scales”, “hms”, “tidyr” ))
library(readxl) library(dplyr) library(ggplot2) library(plotly) library(lubridate) library(scales) library(hms) library(tidyr)
#2.Import Dataset
upi <- read_excel(“UPI Transactions.xlsx”)
#3. Data Cleaning
str(upi) summary(upi)
upi\(TransactionDate <- as.Date(upi\)TransactionDate) upi\(TransactionTime <- as_hms(upi\)TransactionTime)
upi <- distinct(upi)
colSums(is.na(upi))
#4. Feature Engineering
upi <- upi %>% mutate( Month = month(TransactionDate, label = TRUE), Hour = hour(TransactionTime),
AgeGroup = case_when(
CustomerAge < 25 ~ "18-24",
CustomerAge < 35 ~ "25-34",
CustomerAge < 45 ~ "35-44",
TRUE ~ "45+"
)
)
ggplot(upi, aes(Month)) + geom_bar(fill=“#1f77b4”) + labs( title=“Monthly UPI Transaction Volume”, x=“Month”, y=“Number of Transactions” ) + theme_minimal()
ggplot(upi, aes(Amount)) + geom_histogram(fill=“#2ca02c”, bins=30) + labs( title=“Distribution of UPI Transaction Amounts”, x=“Transaction Amount”, y=“Frequency” ) + theme_minimal()
ggplot(upi, aes(PaymentMode, Amount, fill=PaymentMode)) + geom_boxplot() + labs( title=“Transaction Amount by Payment Mode”, x=“Payment Mode”, y=“Amount” ) + theme_minimal()
upi %>% count(Hour) %>% ggplot(aes(Hour, n)) + geom_line(color=“red”, linewidth=1.2) + geom_point() + labs( title=“Peak UPI Transaction Hours”, x=“Hour of Day”, y=“Transactions” ) + theme_minimal()
upi %>% count(BankNameSent, Status) %>% ggplot(aes(BankNameSent, n, fill=Status)) + geom_col() + coord_flip() + labs( title=“Transaction Status by Sending Bank”, x=“Bank”, y=“Transactions” ) + theme_minimal()
upi %>% group_by(MerchantName) %>% summarise(TotalRevenue=sum(Amount)) %>% slice_max(TotalRevenue, n=10) %>% ggplot(aes(reorder(MerchantName, TotalRevenue), TotalRevenue)) + geom_col(fill=“purple”) + coord_flip() + labs( title=“Top 10 Merchants by Revenue”, x=“Merchant”, y=“Revenue” ) + theme_minimal()
upi %>% count(City, Status) %>% ggplot(aes(City, Status, fill=n)) + geom_tile() + labs( title=“Transaction Status by City”, x=“City”, y=“Status” ) + theme_minimal()
p <- ggplot(upi, aes(CustomerAge, Amount, color=Gender)) + geom_point(alpha=0.5) + labs( title=“Interactive Age vs Transaction Amount”, x=“Customer Age”, y=“Transaction Amount” ) + theme_minimal()
ggplotly(p)
upi %>% group_by(AgeGroup) %>% summarise(AvgAmount = mean(Amount)) %>% ggplot(aes(AgeGroup, AvgAmount, fill = AgeGroup)) + geom_col()
upi %>% count(City, Status) %>% ggplot(aes(City, Status, fill = n)) + geom_tile()
upi %>% group_by(MerchantName) %>% summarise(TotalRevenue = sum(Amount)) %>% arrange(desc(TotalRevenue))