R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

# Load libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.1     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.2.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggplot2)

# Load data
df <- read_csv("superstore.csv")
## Rows: 9994 Columns: 21
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (16): Order ID, Order Date, Ship Date, Ship Mode, Customer ID, Customer ...
## dbl  (5): Row ID, Sales, Quantity, Discount, Profit
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Quick check of column names
colnames(df)
##  [1] "Row ID"        "Order ID"      "Order Date"    "Ship Date"    
##  [5] "Ship Mode"     "Customer ID"   "Customer Name" "Segment"      
##  [9] "Country"       "City"          "State"         "Postal Code"  
## [13] "Region"        "Product ID"    "Category"      "Sub-Category" 
## [17] "Product Name"  "Sales"         "Quantity"      "Discount"     
## [21] "Profit"
# Create the Risk Column
df_audit <- df %>%
  mutate(Transaction_Risk = case_when(
    Discount > 0.2 & Profit < 0 ~ "High Risk: Leakage",
    TRUE ~ "Normal"
  ))

# Count how many high risk transactions found
table(df_audit$Transaction_Risk)
## 
## High Risk: Leakage             Normal 
##               1348               8646
# Visualize the Leakage
ggplot(data = df_audit, aes(x = Discount, y = Profit, color = Transaction_Risk)) +
  geom_point(alpha = 0.6) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "black") + # Zero profit line
  labs(
    title = "Audit Finding: Discount Abuse Driving Revenue Leakage",
    subtitle = "Transactions with >20% discount consistently result in negative profit",
    x = "Discount Applied",
    y = "Profit ($)"
  ) +
  theme_minimal()