# load packages
install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# load dataset
digital_wallet <- read.csv("/cloud/project/Digital/digital_wallet_transactions.csv")
colnames(digital_wallet)
## [1] "idx" "transaction_id" "user_id"
## [4] "transaction_date" "product_category" "product_name"
## [7] "merchant_name" "product_amount" "transaction_fee"
## [10] "cashback" "loyalty_points" "payment_method"
## [13] "transaction_status" "merchant_id" "device_type"
## [16] "location"
## total revenue generated by each product category over a given time period
library(dplyr)
# aggregate product amount per category over time
category_revenue <- digital_wallet %>%
group_by (product_category,transaction_date)%>%
summarise (total_revenue = sum(product_amount,na.rm = TRUE),.groups='drop')%>%
arrange(desc(total_revenue))%>%
slice(1:10)
# print results
print(category_revenue)
## # A tibble: 10 × 3
## product_category transaction_date total_revenue
## <chr> <chr> <dbl>
## 1 Flight Booking 2024-08-16 15:41 9997.
## 2 Gift Card 2023-11-19 13:40 9995.
## 3 Flight Booking 2024-06-19 23:36 9994.
## 4 Internet Bill 2024-07-20 15:29 9992.
## 5 Bus Ticket 2023-11-16 00:56 9992.
## 6 Education Fee 2024-04-05 12:43 9989.
## 7 Movie Ticket 2024-06-29 02:26 9987.
## 8 Streaming Service 2024-02-23 05:34 9983.
## 9 Rent Payment 2024-08-04 09:32 9980.
## 10 Food Delivery 2023-10-07 13:29 9980.
# How many txns are completed by device type, & what is the total revenue generated by each device type
install.packages ("dplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(dplyr)
device_txs <- digital_wallet %>%
group_by (device_type)%>%
summarise(
transaction_count = n(),
total_fees = sum (transaction_fee),.groups='drop'
)
# print results
print(device_txs)
## # A tibble: 3 × 3
## device_type transaction_count total_fees
## <chr> <int> <dbl>
## 1 Android 2995 75584.
## 2 Web 479 12496.
## 3 iOS 1526 37865.
# visualization
install.packages("ggplot2")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(ggplot2)
ggplot(data=device_txs, mapping=aes(x=device_type, y=total_fees, fill=transaction_count))+geom_col(color='black')+
labs(title='Transactions completed by device Type')

# Merchant that generates the highest transaction volume and what is the total revenue for each merchant?
install.packages("dplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(dplyr)
top_merchant <- digital_wallet %>%
group_by (merchant_name) %>%
summarise (transaction_count = n()) %>%
arrange(desc(transaction_count))%>%
slice(1:10)
# print results
print(top_merchant)
## # A tibble: 10 × 2
## merchant_name transaction_count
## <chr> <int>
## 1 Airbnb 106
## 2 Flipkart 102
## 3 MakeMyTrip 85
## 4 Netflix 72
## 5 Unacademy 69
## 6 Mumbai Water 66
## 7 Carnival Cinemas 65
## 8 Hotstar 63
## 9 Max Life 63
## 10 Udemy 62
# visualization
install.packages("ggplot2")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(ggplot2)
ggplot(data=top_merchant, mapping=aes(x=merchant_name, y=transaction_count, fill=merchant_name))+
geom_col(color='black') + labs (title='Transaction volume by Merchant')

# % of transactions that result in cashback or loyalty points, and how it varies by product category
txn_category <- digital_wallet %>%
group_by(product_category)%>%
summarise(
transaction_count=n(),
cash_back_total=sum(cashback),
loyalty_total=sum(loyalty_points),.groups='drop'
) %>%
arrange(desc(transaction_count))
print(txn_category)
## # A tibble: 20 × 4
## product_category transaction_count cash_back_total loyalty_total
## <chr> <int> <dbl> <int>
## 1 Streaming Service 299 15352. 154187
## 2 Education Fee 286 14380. 135073
## 3 Hotel Booking 274 13641. 138772
## 4 Water Bill 273 13821. 137217
## 5 Movie Ticket 272 13814. 127701
## 6 Food Delivery 259 13578. 130077
## 7 Taxi Fare 256 13252. 131527
## 8 Electricity Bill 252 13203. 127312
## 9 Rent Payment 251 12996. 126527
## 10 Gas Bill 250 12183. 119866
## 11 Loan Repayment 245 12005. 125202
## 12 Online Shopping 243 11996. 120849
## 13 Mobile Recharge 241 12222. 119676
## 14 Grocery Shopping 238 12093. 119898
## 15 Bus Ticket 235 11850. 115844
## 16 Internet Bill 233 11751. 111868
## 17 Gaming Credits 231 11212. 118792
## 18 Insurance Premium 225 11089. 116995
## 19 Gift Card 221 11551. 104768
## 20 Flight Booking 216 11306. 111801
# What is the distribution of transaction statuses across different product categories?
txn_distribution <- digital_wallet %>%
group_by (product_category,transaction_status )%>%
summarise(status_count = n(),.groups='drop')%>%
slice(1:20)
# print results
print(txn_distribution)
## # A tibble: 20 × 3
## product_category transaction_status status_count
## <chr> <chr> <int>
## 1 Bus Ticket Failed 5
## 2 Bus Ticket Pending 3
## 3 Bus Ticket Successful 227
## 4 Education Fee Failed 10
## 5 Education Fee Successful 276
## 6 Electricity Bill Failed 10
## 7 Electricity Bill Pending 2
## 8 Electricity Bill Successful 240
## 9 Flight Booking Failed 6
## 10 Flight Booking Pending 3
## 11 Flight Booking Successful 207
## 12 Food Delivery Failed 6
## 13 Food Delivery Pending 4
## 14 Food Delivery Successful 249
## 15 Gaming Credits Failed 11
## 16 Gaming Credits Pending 6
## 17 Gaming Credits Successful 214
## 18 Gas Bill Failed 5
## 19 Gas Bill Pending 6
## 20 Gas Bill Successful 239
# How do transaction volumes vary by location?
city_txns <- digital_wallet %>%
group_by (location)%>%
summarise(transaction_count = n(),.groups='drop')%>%
arrange(desc(transaction_count))%>%
slice(1:10)
#print results
print (city_txns)
## # A tibble: 3 × 2
## location transaction_count
## <chr> <int>
## 1 Urban 3485
## 2 Suburban 1017
## 3 Rural 498
# visualization
ggplot(data=city_txns, mapping=aes(x=location, y=transaction_count, fill=location))+geom_col(color='black')

# Who is the most active merchant in terms of product distribution
top_merchant <- digital_wallet %>%
group_by (merchant_name, product_name)%>%
summarise(product_count = n(),.groups='drop')%>%
arrange(desc(product_count))%>%
slice(1:10)
# print results
print (top_merchant)
## # A tibble: 10 × 3
## merchant_name product_name product_count
## <chr> <chr> <int>
## 1 Netflix Basic Plan 21
## 2 Coursera Python Programming Course 20
## 3 Cinepolis PK 19
## 4 BSNL Postpaid Plan 599 18
## 5 Carnival Cinemas Baahubali 2 18
## 6 Flipkart Laptop Backpack 18
## 7 Jugnoo Share Ride 18
## 8 Unacademy Python Programming Course 18
## 9 Airbnb Villa 17
## 10 Cinepolis 3 Idiots 17
# visualization
ggplot(data=top_merchant, mapping=aes(x=product_name, y=product_count, fill=merchant_name))+
geom_col(color='black')+labs(title='Top merchants by product distribution') +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
