# load packages
install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# load dataset
digital_wallet <- read.csv("/cloud/project/Digital/digital_wallet_transactions.csv")
colnames(digital_wallet)
##  [1] "idx"                "transaction_id"     "user_id"           
##  [4] "transaction_date"   "product_category"   "product_name"      
##  [7] "merchant_name"      "product_amount"     "transaction_fee"   
## [10] "cashback"           "loyalty_points"     "payment_method"    
## [13] "transaction_status" "merchant_id"        "device_type"       
## [16] "location"
## total revenue generated by each product category over a given time period

library(dplyr)

# aggregate product amount per category over time
category_revenue <- digital_wallet %>%
  group_by (product_category,transaction_date)%>%
  summarise (total_revenue = sum(product_amount,na.rm = TRUE),.groups='drop')%>%
  arrange(desc(total_revenue))%>%
  slice(1:10)

# print results
print(category_revenue)
## # A tibble: 10 × 3
##    product_category  transaction_date total_revenue
##    <chr>             <chr>                    <dbl>
##  1 Flight Booking    2024-08-16 15:41         9997.
##  2 Gift Card         2023-11-19 13:40         9995.
##  3 Flight Booking    2024-06-19 23:36         9994.
##  4 Internet Bill     2024-07-20 15:29         9992.
##  5 Bus Ticket        2023-11-16 00:56         9992.
##  6 Education Fee     2024-04-05 12:43         9989.
##  7 Movie Ticket      2024-06-29 02:26         9987.
##  8 Streaming Service 2024-02-23 05:34         9983.
##  9 Rent Payment      2024-08-04 09:32         9980.
## 10 Food Delivery     2023-10-07 13:29         9980.
# How many txns are completed by device type, & what is the total revenue generated by each device type

install.packages ("dplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(dplyr)

device_txs <- digital_wallet %>%
  group_by (device_type)%>%
  summarise(
    transaction_count = n(),
    total_fees = sum (transaction_fee),.groups='drop'
  )
# print results
print(device_txs)
## # A tibble: 3 × 3
##   device_type transaction_count total_fees
##   <chr>                   <int>      <dbl>
## 1 Android                  2995     75584.
## 2 Web                       479     12496.
## 3 iOS                      1526     37865.
# visualization

install.packages("ggplot2")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(ggplot2)

ggplot(data=device_txs, mapping=aes(x=device_type, y=total_fees, fill=transaction_count))+geom_col(color='black')+
  labs(title='Transactions completed by device Type')

 # Merchant that generates the highest transaction volume and what is the total revenue for each merchant?

install.packages("dplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(dplyr)

top_merchant <- digital_wallet %>%
  group_by (merchant_name) %>%
  summarise (transaction_count = n()) %>%
  arrange(desc(transaction_count))%>%
  slice(1:10)

# print results
print(top_merchant)
## # A tibble: 10 × 2
##    merchant_name    transaction_count
##    <chr>                        <int>
##  1 Airbnb                         106
##  2 Flipkart                       102
##  3 MakeMyTrip                      85
##  4 Netflix                         72
##  5 Unacademy                       69
##  6 Mumbai Water                    66
##  7 Carnival Cinemas                65
##  8 Hotstar                         63
##  9 Max Life                        63
## 10 Udemy                           62
# visualization

install.packages("ggplot2")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(ggplot2)

ggplot(data=top_merchant, mapping=aes(x=merchant_name, y=transaction_count, fill=merchant_name))+
  geom_col(color='black') + labs (title='Transaction volume by Merchant')

# % of transactions that result in cashback or loyalty points, and how it varies by product category

txn_category <- digital_wallet %>%
  group_by(product_category)%>%
  summarise(
    transaction_count=n(),
    cash_back_total=sum(cashback),
    loyalty_total=sum(loyalty_points),.groups='drop'
  ) %>%
  arrange(desc(transaction_count))
print(txn_category)
## # A tibble: 20 × 4
##    product_category  transaction_count cash_back_total loyalty_total
##    <chr>                         <int>           <dbl>         <int>
##  1 Streaming Service               299          15352.        154187
##  2 Education Fee                   286          14380.        135073
##  3 Hotel Booking                   274          13641.        138772
##  4 Water Bill                      273          13821.        137217
##  5 Movie Ticket                    272          13814.        127701
##  6 Food Delivery                   259          13578.        130077
##  7 Taxi Fare                       256          13252.        131527
##  8 Electricity Bill                252          13203.        127312
##  9 Rent Payment                    251          12996.        126527
## 10 Gas Bill                        250          12183.        119866
## 11 Loan Repayment                  245          12005.        125202
## 12 Online Shopping                 243          11996.        120849
## 13 Mobile Recharge                 241          12222.        119676
## 14 Grocery Shopping                238          12093.        119898
## 15 Bus Ticket                      235          11850.        115844
## 16 Internet Bill                   233          11751.        111868
## 17 Gaming Credits                  231          11212.        118792
## 18 Insurance Premium               225          11089.        116995
## 19 Gift Card                       221          11551.        104768
## 20 Flight Booking                  216          11306.        111801
# What is the distribution of transaction statuses across different product categories?

txn_distribution <- digital_wallet %>%
  group_by (product_category,transaction_status )%>%
  summarise(status_count = n(),.groups='drop')%>%
  slice(1:20)

# print results
print(txn_distribution)
## # A tibble: 20 × 3
##    product_category transaction_status status_count
##    <chr>            <chr>                     <int>
##  1 Bus Ticket       Failed                        5
##  2 Bus Ticket       Pending                       3
##  3 Bus Ticket       Successful                  227
##  4 Education Fee    Failed                       10
##  5 Education Fee    Successful                  276
##  6 Electricity Bill Failed                       10
##  7 Electricity Bill Pending                       2
##  8 Electricity Bill Successful                  240
##  9 Flight Booking   Failed                        6
## 10 Flight Booking   Pending                       3
## 11 Flight Booking   Successful                  207
## 12 Food Delivery    Failed                        6
## 13 Food Delivery    Pending                       4
## 14 Food Delivery    Successful                  249
## 15 Gaming Credits   Failed                       11
## 16 Gaming Credits   Pending                       6
## 17 Gaming Credits   Successful                  214
## 18 Gas Bill         Failed                        5
## 19 Gas Bill         Pending                       6
## 20 Gas Bill         Successful                  239
# How do transaction volumes vary by location?

city_txns <- digital_wallet %>%
  group_by (location)%>%
  summarise(transaction_count = n(),.groups='drop')%>%
  arrange(desc(transaction_count))%>%
  slice(1:10)

#print results
print (city_txns)
## # A tibble: 3 × 2
##   location transaction_count
##   <chr>                <int>
## 1 Urban                 3485
## 2 Suburban              1017
## 3 Rural                  498
# visualization
ggplot(data=city_txns, mapping=aes(x=location, y=transaction_count, fill=location))+geom_col(color='black')

# Who is the most active merchant in terms of product distribution

top_merchant <- digital_wallet %>%
  group_by (merchant_name, product_name)%>%
  summarise(product_count = n(),.groups='drop')%>%
  arrange(desc(product_count))%>%
  slice(1:10)

# print results
print (top_merchant)
## # A tibble: 10 × 3
##    merchant_name    product_name              product_count
##    <chr>            <chr>                             <int>
##  1 Netflix          Basic Plan                           21
##  2 Coursera         Python Programming Course            20
##  3 Cinepolis        PK                                   19
##  4 BSNL             Postpaid Plan 599                    18
##  5 Carnival Cinemas Baahubali 2                          18
##  6 Flipkart         Laptop Backpack                      18
##  7 Jugnoo           Share Ride                           18
##  8 Unacademy        Python Programming Course            18
##  9 Airbnb           Villa                                17
## 10 Cinepolis        3 Idiots                             17
# visualization
ggplot(data=top_merchant, mapping=aes(x=product_name, y=product_count, fill=merchant_name))+
  geom_col(color='black')+labs(title='Top merchants by product distribution') +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))