title: “Central Clinic Financial Analysis Report” author: “Oyan Naivest” date: “2025-01-26” output: flexdashboard::flex_dashboard: orientation: rows vertical_layout: fill toc: true # Table of Contents toc_float: true # Floating Table of Contents # Set global options for the document knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE) —

Introduction

This report presents an analysis of the Central Clinic’s financial data for the year 2024. The dataset includes daily revenue and expenses for both day and night operations. The goal of this analysis is to assess the clinic’s financial health, identify trends, and provide actionable insights to improve operational efficiency.

Data Overview

The dataset contains the following columns:

Preview of the data:

# Install and load required packages
if (!require(pacman)) install.packages("pacman")
## Loading required package: pacman
pacman::p_load(
  readxl,     # For reading Excel files
  dplyr,      # For data manipulation
  ggplot2,    # For data visualization
  lubridate,  # For date handling
  here,       # For file path management
  knitr,      # For knitting the report
  rmarkdown,   # For rendering R Markdown documents
  tidyverse,
  flextable,
  flexdashboard,
  janitor,
  esquisse,
  plotly,
  viridis,
  tinytex
)
# Correct path to the data folder and file
file_path <- here::here("data", "CASHFLOWS.xlsx")

# Verify the file exists
if (!file.exists(file_path)) {
  stop(paste("File does not exist at:", file_path))
}

# Import the dataset
clinic_data <- read_excel(file_path)

# Preview the data
head(clinic_data)
## # A tibble: 6 × 13
##   Date                `D-Received` `D-Expenses` `D-Profit After`
##   <dttm>                     <dbl>        <dbl>            <dbl>
## 1 2024-01-01 00:00:00        32500        36504             4050
## 2 2024-01-02 00:00:00        22000        43108             1200
## 3 2024-01-03 00:00:00        28250        31152             1900
## 4 2024-01-04 00:00:00        33300        31871             1800
## 5 2024-01-05 00:00:00        23400        28224             5704
## 6 2024-01-06 00:00:00        21350        20415              850
## # ℹ 9 more variables: `D-Accounts Held` <dbl>, `N-Received` <dbl>,
## #   `N-Expenses` <dbl>, `N-Profit After` <dbl>, `N-Accounts Held` <dbl>,
## #   `Total-Received` <dbl>, `Total-Expenses` <dbl>, `Total-Profit After` <dbl>,
## #   `Total-Accounts Held` <dbl>
#cleaning
colnames(clinic_data)
##  [1] "Date"                "D-Received"          "D-Expenses"         
##  [4] "D-Profit After"      "D-Accounts Held"     "N-Received"         
##  [7] "N-Expenses"          "N-Profit After"      "N-Accounts Held"    
## [10] "Total-Received"      "Total-Expenses"      "Total-Profit After" 
## [13] "Total-Accounts Held"
clinic_data <- clinic_data %>%
  janitor::clean_names()  # Cleans column names like D-Received to d_received

clinic_data <- clinic_data %>%
  mutate(
    d_received = as.numeric(d_received),
    d_expenses = as.numeric(d_expenses),
    n_received = as.numeric(n_received),
    n_expenses = as.numeric(n_expenses)
  )
# Summary statistics for day and night business
summary(clinic_data[, c("d_received", "d_expenses", "d_profit_after", "n_received", "n_expenses", "n_profit_after")])
##    d_received      d_expenses    d_profit_after    n_received   
##  Min.   : 4250   Min.   : 2164   Min.   :    0   Min.   :    0  
##  1st Qu.:16413   1st Qu.:18369   1st Qu.:  300   1st Qu.: 4962  
##  Median :20650   Median :23473   Median : 1390   Median : 6900  
##  Mean   :21252   Mean   :23750   Mean   : 2077   Mean   : 7377  
##  3rd Qu.:25588   3rd Qu.:28405   3rd Qu.: 3138   3rd Qu.: 9358  
##  Max.   :46750   Max.   :47893   Max.   :17461   Max.   :33250  
##    n_expenses    n_profit_after 
##  Min.   :    0   Min.   :    0  
##  1st Qu.: 4150   1st Qu.:    0  
##  Median : 5950   Median :  300  
##  Mean   : 6575   Mean   : 1282  
##  3rd Qu.: 8434   3rd Qu.: 1574  
##  Max.   :28883   Max.   :15600
# Check the range of dates
range(clinic_data$date)
## [1] "2024-01-01 UTC" "2024-12-31 UTC"
# Summarize total day and night revenue, expenses, and profit for the entire year
day_night_summary <- clinic_data %>%
  summarize(
    total_day_revenue = sum(d_received, na.rm = TRUE),
    total_night_revenue = sum(n_received, na.rm = TRUE),
    total_day_expenses = sum(d_expenses, na.rm = TRUE),
    total_night_expenses = sum(n_expenses, na.rm = TRUE),
    total_day_profit = sum(d_profit_after, na.rm = TRUE),
    total_night_profit = sum(n_profit_after, na.rm = TRUE)
  )

# Create a comparison plot for revenue, expenses, and profit
library(ggplot2)

comparison_plot <- clinic_data %>%
  gather(key = "category", value = "value", d_received, n_received, d_expenses, n_expenses, d_profit_after, n_profit_after) %>%
  mutate(
    category = factor(category, levels = c("d_received", "n_received", "d_expenses", "n_expenses", "d_profit_after", "n_profit_after"))
  ) %>%
  ggplot(aes(x = date, y = value, color = category)) +
  geom_line() +
  labs(title = "Comparison of Day and Night Revenue, Expenses, and Profit",
       x = "Date", y = "Amount (KES)") +
  theme_minimal()

print(comparison_plot)

# Calculate monthly sums for day and night business
clinic_data$date <- as.Date(clinic_data$date)  # Ensure the date is in the correct format
clinic_data$month <- format(clinic_data$date, "%Y-%m")

monthly_summary <- clinic_data %>%
  group_by(month) %>%
  summarize(
    month_day_revenue = sum(d_received, na.rm = TRUE),
    month_night_revenue = sum(n_received, na.rm = TRUE),
    month_day_expenses = sum(d_expenses, na.rm = TRUE),
    month_night_expenses = sum(n_expenses, na.rm = TRUE),
    month_day_profit = sum(d_profit_after, na.rm = TRUE),
    month_night_profit = sum(n_profit_after, na.rm = TRUE)
  )

# Plot monthly trends
monthly_trend_plot <- monthly_summary %>%
  gather(key = "category", value = "value", month_day_revenue, month_night_revenue, month_day_expenses, month_night_expenses, month_day_profit, month_night_profit) %>%
  mutate(
    category = factor(category, levels = c("month_day_revenue", "month_night_revenue", "month_day_expenses", "month_night_expenses", "month_day_profit", "month_night_profit"))
  ) %>%
  ggplot(aes(x = month, y = value, color = category, group = category)) +
  geom_line() +
  labs(title = "Monthly Trend of Day and Night Business",
       x = "Month", y = "Amount (KES)") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

print(monthly_trend_plot)

# Calculate profit margin for day and night
clinic_data <- clinic_data %>%
  mutate(
    day_profit_margin = d_profit_after / d_received * 100,
    night_profit_margin = n_profit_after / n_received * 100
  )

# Summary of profit margins
day_night_profit_margin_summary <- clinic_data %>%
  summarize(
    avg_day_profit_margin = mean(day_profit_margin, na.rm = TRUE),
    avg_night_profit_margin = mean(night_profit_margin, na.rm = TRUE)
  )

# Display the average profit margins
print(day_night_profit_margin_summary)
## # A tibble: 1 × 2
##   avg_day_profit_margin avg_night_profit_margin
##                   <dbl>                   <dbl>
## 1                  10.7                     Inf
# Stacked bar chart for Day vs Night Revenue, Expenses, and Profit

day_night_comparison <- clinic_data %>%
  gather(key = "category", value = "value", d_received, n_received, d_expenses, n_expenses, d_profit_after, n_profit_after) %>%
  mutate(
    category = factor(category, levels = c("d_received", "n_received", "d_expenses", "n_expenses", "d_profit_after", "n_profit_after"))
  )

# Stacked bar plot
ggplot(day_night_comparison, aes(x = date, y = value, fill = category)) +
  geom_bar(stat = "identity", position = "stack") +
  labs(title = "Day vs Night Business Comparison (Revenue, Expenses, Profit)",
       x = "Date", y = "Amount (KES)") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Line plot for Monthly Trends of Revenue, Expenses, and Profit (Day vs Night)
monthly_summary %>%
  gather(key = "category", value = "value", month_day_revenue, month_night_revenue, month_day_expenses, month_night_expenses, month_day_profit, month_night_profit) %>%
  mutate(
    category = factor(category, levels = c("month_day_revenue", "month_night_revenue", "month_day_expenses", "month_night_expenses", "month_day_profit", "month_night_profit"))
  ) %>%
  ggplot(aes(x = month, y = value, color = category, group = category)) +
  geom_line(size = 1) +
  labs(title = "Monthly Trends of Day vs Night Business Performance",
       x = "Month", y = "Amount (KES)") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Boxplot to compare Day vs Night Profit Margins
ggplot(clinic_data, aes(x = factor(1), y = day_profit_margin, fill = "Day")) +
  geom_boxplot() +
  geom_boxplot(aes(y = night_profit_margin, fill = "Night")) +
  labs(title = "Day vs Night Profit Margins Comparison",
       x = "Time of Day", y = "Profit Margin (%)") +
  scale_x_discrete(labels = c("Day & Night")) +
  scale_fill_manual(values = c("Day" = "blue", "Night" = "green")) +
  theme_minimal()
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

# Create a daily profitability metric
clinic_data <- clinic_data %>%
  mutate(
    day_profit = d_received - d_expenses,
    night_profit = n_received - n_expenses
  )

# Heatmap for daily profitability

ggplot(clinic_data, aes(x = date, y = 1, fill = day_profit)) +
  geom_tile() +
  scale_fill_viridis(option = "C") +
  labs(title = "Heatmap of Daily Profitability (Day Business)",
       x = "Date", y = "Day Business", fill = "Profit (KES)") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Calculate cumulative revenue and expenses over time
clinic_data <- clinic_data %>%
  arrange(date) %>%
  mutate(
    cumulative_day_revenue = cumsum(d_received),
    cumulative_night_revenue = cumsum(n_received),
    cumulative_day_expenses = cumsum(d_expenses),
    cumulative_night_expenses = cumsum(n_expenses)
  )

# Plot cumulative growth in revenue and expenses
ggplot(clinic_data) +
  geom_line(aes(x = date, y = cumulative_day_revenue, color = "Day Revenue"), size = 1) +
  geom_line(aes(x = date, y = cumulative_night_revenue, color = "Night Revenue"), size = 1) +
  geom_line(aes(x = date, y = cumulative_day_expenses, color = "Day Expenses"), size = 1, linetype = "dashed") +
  geom_line(aes(x = date, y = cumulative_night_expenses, color = "Night Expenses"), size = 1, linetype = "dashed") +
  labs(title = "Cumulative Growth of Revenue and Expenses",
       x = "Date", y = "Cumulative Amount (KES)") +
  scale_color_manual(values = c("Day Revenue" = "blue", "Night Revenue" = "green", "Day Expenses" = "red", "Night Expenses" = "orange")) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Ensure the date is in the correct format and extract the month
clinic_data$date <- as.Date(clinic_data$date)  # Ensure the date is in the correct format
clinic_data$month <- format(clinic_data$date, "%Y-%m")  # Extract month and year

# Aggregate data monthly for day and night revenue and expenses
monthly_data <- clinic_data %>%
  group_by(month) %>%
  summarize(
    month_day_received = sum(d_received, na.rm = TRUE),
    month_day_expenses = sum(d_expenses, na.rm = TRUE),
    month_night_received = sum(n_received, na.rm = TRUE),
    month_night_expenses = sum(n_expenses, na.rm = TRUE)
  )

# Plot histograms for monthly data
library(ggplot2)

# Histogram for Day Received
ggplot(monthly_data, aes(x = month_day_received)) +
  geom_histogram(binwidth = 5000, fill = "skyblue", color = "black", alpha = 0.7) +
  labs(title = "Histogram of Monthly Day Revenue", x = "Day Revenue (KES)", y = "Frequency") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Histogram for Day Expenses
ggplot(monthly_data, aes(x = month_day_expenses)) +
  geom_histogram(binwidth = 5000, fill = "orange", color = "black", alpha = 0.7) +
  labs(title = "Histogram of Monthly Day Expenses", x = "Day Expenses (KES)", y = "Frequency") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Histogram for Night Received
ggplot(monthly_data, aes(x = month_night_received)) +
  geom_histogram(binwidth = 5000, fill = "green", color = "black", alpha = 0.7) +
  labs(title = "Histogram of Monthly Night Revenue", x = "Night Revenue (KES)", y = "Frequency") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Histogram for Night Expenses
ggplot(monthly_data, aes(x = month_night_expenses)) +
  geom_histogram(binwidth = 5000, fill = "red", color = "black", alpha = 0.7) +
  labs(title = "Histogram of Monthly Night Expenses", x = "Night Expenses (KES)", y = "Frequency") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Calculate the profitability for day and night
clinic_data <- clinic_data %>%
  mutate(
    day_profit = d_received - d_expenses,
    night_profit = n_received - n_expenses
  )

# Stacked area plot for day and night profit comparison
ggplot(clinic_data, aes(x = date)) +
  geom_area(aes(y = day_profit, fill = "Day Profit"), alpha = 0.5) +
  geom_area(aes(y = night_profit, fill = "Night Profit"), alpha = 0.5) +
  labs(title = "Profitability Comparison: Day vs Night Business",
       x = "Date", y = "Profit (KES)") +
  scale_fill_manual(values = c("Day Profit" = "blue", "Night Profit" = "green")) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Calculate Cost-to-Revenue Ratio
clinic_data <- clinic_data %>%
  mutate(
    day_cost_to_revenue = d_expenses / d_received,
    night_cost_to_revenue = n_expenses / n_received
  )

# Plot the Cost-to-Revenue Ratio over time
ggplot(clinic_data) +
  geom_line(aes(x = date, y = day_cost_to_revenue, color = "Day Cost-to-Revenue"), size = 1) +
  geom_line(aes(x = date, y = night_cost_to_revenue, color = "Night Cost-to-Revenue"), size = 1) +
  labs(title = "Cost-to-Revenue Ratio: Day vs Night Business",
       x = "Date", y = "Cost-to-Revenue Ratio") +
  scale_color_manual(values = c("Day Cost-to-Revenue" = "blue", "Night Cost-to-Revenue" = "green")) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Using K-means clustering to segment the days based on profitability
set.seed(123)  # For reproducibility
profitability_data <- clinic_data %>%
  select(date, d_received, d_expenses, d_profit_after, n_received, n_expenses, n_profit_after)

# Normalize data before clustering
profitability_data_norm <- scale(profitability_data[, -1])  # Remove date column for clustering

# Perform K-means clustering with 3 clusters
kmeans_result <- kmeans(profitability_data_norm, centers = 3)

# Add cluster labels to the original data
clinic_data$cluster <- kmeans_result$cluster

# Plot the clusters
ggplot(clinic_data, aes(x = date, y = d_profit_after, color = as.factor(cluster))) +
  geom_point() +
  labs(title = "Customer Segmentation Based on Day Profitability",
       x = "Date", y = "Day Profit (KES)", color = "Cluster") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))