title: “Central Clinic Financial Analysis Report” author: “Oyan Naivest” date: “2025-01-26” output: flexdashboard::flex_dashboard: orientation: rows vertical_layout: fill toc: true # Table of Contents toc_float: true # Floating Table of Contents # Set global options for the document knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE) —
This report presents an analysis of the Central Clinic’s financial data for the year 2024. The dataset includes daily revenue and expenses for both day and night operations. The goal of this analysis is to assess the clinic’s financial health, identify trends, and provide actionable insights to improve operational efficiency.
The dataset contains the following columns:
date
: Date of transaction.d_received
: Revenue generated during the day.d_expenses
: Expenses incurred during the day.d_profit_after
: Profit after expenses during the
day.n_received
: Revenue generated at night.n_expenses
: Expenses incurred at night.n_profit_after
: Profit after expenses during the
night.# Install and load required packages
if (!require(pacman)) install.packages("pacman")
## Loading required package: pacman
pacman::p_load(
readxl, # For reading Excel files
dplyr, # For data manipulation
ggplot2, # For data visualization
lubridate, # For date handling
here, # For file path management
knitr, # For knitting the report
rmarkdown, # For rendering R Markdown documents
tidyverse,
flextable,
flexdashboard,
janitor,
esquisse,
plotly,
viridis,
tinytex
)
# Correct path to the data folder and file
file_path <- here::here("data", "CASHFLOWS.xlsx")
# Verify the file exists
if (!file.exists(file_path)) {
stop(paste("File does not exist at:", file_path))
}
# Import the dataset
clinic_data <- read_excel(file_path)
# Preview the data
head(clinic_data)
## # A tibble: 6 × 13
## Date `D-Received` `D-Expenses` `D-Profit After`
## <dttm> <dbl> <dbl> <dbl>
## 1 2024-01-01 00:00:00 32500 36504 4050
## 2 2024-01-02 00:00:00 22000 43108 1200
## 3 2024-01-03 00:00:00 28250 31152 1900
## 4 2024-01-04 00:00:00 33300 31871 1800
## 5 2024-01-05 00:00:00 23400 28224 5704
## 6 2024-01-06 00:00:00 21350 20415 850
## # ℹ 9 more variables: `D-Accounts Held` <dbl>, `N-Received` <dbl>,
## # `N-Expenses` <dbl>, `N-Profit After` <dbl>, `N-Accounts Held` <dbl>,
## # `Total-Received` <dbl>, `Total-Expenses` <dbl>, `Total-Profit After` <dbl>,
## # `Total-Accounts Held` <dbl>
#cleaning
colnames(clinic_data)
## [1] "Date" "D-Received" "D-Expenses"
## [4] "D-Profit After" "D-Accounts Held" "N-Received"
## [7] "N-Expenses" "N-Profit After" "N-Accounts Held"
## [10] "Total-Received" "Total-Expenses" "Total-Profit After"
## [13] "Total-Accounts Held"
clinic_data <- clinic_data %>%
janitor::clean_names() # Cleans column names like D-Received to d_received
clinic_data <- clinic_data %>%
mutate(
d_received = as.numeric(d_received),
d_expenses = as.numeric(d_expenses),
n_received = as.numeric(n_received),
n_expenses = as.numeric(n_expenses)
)
# Summary statistics for day and night business
summary(clinic_data[, c("d_received", "d_expenses", "d_profit_after", "n_received", "n_expenses", "n_profit_after")])
## d_received d_expenses d_profit_after n_received
## Min. : 4250 Min. : 2164 Min. : 0 Min. : 0
## 1st Qu.:16413 1st Qu.:18369 1st Qu.: 300 1st Qu.: 4962
## Median :20650 Median :23473 Median : 1390 Median : 6900
## Mean :21252 Mean :23750 Mean : 2077 Mean : 7377
## 3rd Qu.:25588 3rd Qu.:28405 3rd Qu.: 3138 3rd Qu.: 9358
## Max. :46750 Max. :47893 Max. :17461 Max. :33250
## n_expenses n_profit_after
## Min. : 0 Min. : 0
## 1st Qu.: 4150 1st Qu.: 0
## Median : 5950 Median : 300
## Mean : 6575 Mean : 1282
## 3rd Qu.: 8434 3rd Qu.: 1574
## Max. :28883 Max. :15600
# Check the range of dates
range(clinic_data$date)
## [1] "2024-01-01 UTC" "2024-12-31 UTC"
# Summarize total day and night revenue, expenses, and profit for the entire year
day_night_summary <- clinic_data %>%
summarize(
total_day_revenue = sum(d_received, na.rm = TRUE),
total_night_revenue = sum(n_received, na.rm = TRUE),
total_day_expenses = sum(d_expenses, na.rm = TRUE),
total_night_expenses = sum(n_expenses, na.rm = TRUE),
total_day_profit = sum(d_profit_after, na.rm = TRUE),
total_night_profit = sum(n_profit_after, na.rm = TRUE)
)
# Create a comparison plot for revenue, expenses, and profit
library(ggplot2)
comparison_plot <- clinic_data %>%
gather(key = "category", value = "value", d_received, n_received, d_expenses, n_expenses, d_profit_after, n_profit_after) %>%
mutate(
category = factor(category, levels = c("d_received", "n_received", "d_expenses", "n_expenses", "d_profit_after", "n_profit_after"))
) %>%
ggplot(aes(x = date, y = value, color = category)) +
geom_line() +
labs(title = "Comparison of Day and Night Revenue, Expenses, and Profit",
x = "Date", y = "Amount (KES)") +
theme_minimal()
print(comparison_plot)
# Calculate monthly sums for day and night business
clinic_data$date <- as.Date(clinic_data$date) # Ensure the date is in the correct format
clinic_data$month <- format(clinic_data$date, "%Y-%m")
monthly_summary <- clinic_data %>%
group_by(month) %>%
summarize(
month_day_revenue = sum(d_received, na.rm = TRUE),
month_night_revenue = sum(n_received, na.rm = TRUE),
month_day_expenses = sum(d_expenses, na.rm = TRUE),
month_night_expenses = sum(n_expenses, na.rm = TRUE),
month_day_profit = sum(d_profit_after, na.rm = TRUE),
month_night_profit = sum(n_profit_after, na.rm = TRUE)
)
# Plot monthly trends
monthly_trend_plot <- monthly_summary %>%
gather(key = "category", value = "value", month_day_revenue, month_night_revenue, month_day_expenses, month_night_expenses, month_day_profit, month_night_profit) %>%
mutate(
category = factor(category, levels = c("month_day_revenue", "month_night_revenue", "month_day_expenses", "month_night_expenses", "month_day_profit", "month_night_profit"))
) %>%
ggplot(aes(x = month, y = value, color = category, group = category)) +
geom_line() +
labs(title = "Monthly Trend of Day and Night Business",
x = "Month", y = "Amount (KES)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(monthly_trend_plot)
# Calculate profit margin for day and night
clinic_data <- clinic_data %>%
mutate(
day_profit_margin = d_profit_after / d_received * 100,
night_profit_margin = n_profit_after / n_received * 100
)
# Summary of profit margins
day_night_profit_margin_summary <- clinic_data %>%
summarize(
avg_day_profit_margin = mean(day_profit_margin, na.rm = TRUE),
avg_night_profit_margin = mean(night_profit_margin, na.rm = TRUE)
)
# Display the average profit margins
print(day_night_profit_margin_summary)
## # A tibble: 1 × 2
## avg_day_profit_margin avg_night_profit_margin
## <dbl> <dbl>
## 1 10.7 Inf
# Stacked bar chart for Day vs Night Revenue, Expenses, and Profit
day_night_comparison <- clinic_data %>%
gather(key = "category", value = "value", d_received, n_received, d_expenses, n_expenses, d_profit_after, n_profit_after) %>%
mutate(
category = factor(category, levels = c("d_received", "n_received", "d_expenses", "n_expenses", "d_profit_after", "n_profit_after"))
)
# Stacked bar plot
ggplot(day_night_comparison, aes(x = date, y = value, fill = category)) +
geom_bar(stat = "identity", position = "stack") +
labs(title = "Day vs Night Business Comparison (Revenue, Expenses, Profit)",
x = "Date", y = "Amount (KES)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Line plot for Monthly Trends of Revenue, Expenses, and Profit (Day vs Night)
monthly_summary %>%
gather(key = "category", value = "value", month_day_revenue, month_night_revenue, month_day_expenses, month_night_expenses, month_day_profit, month_night_profit) %>%
mutate(
category = factor(category, levels = c("month_day_revenue", "month_night_revenue", "month_day_expenses", "month_night_expenses", "month_day_profit", "month_night_profit"))
) %>%
ggplot(aes(x = month, y = value, color = category, group = category)) +
geom_line(size = 1) +
labs(title = "Monthly Trends of Day vs Night Business Performance",
x = "Month", y = "Amount (KES)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Boxplot to compare Day vs Night Profit Margins
ggplot(clinic_data, aes(x = factor(1), y = day_profit_margin, fill = "Day")) +
geom_boxplot() +
geom_boxplot(aes(y = night_profit_margin, fill = "Night")) +
labs(title = "Day vs Night Profit Margins Comparison",
x = "Time of Day", y = "Profit Margin (%)") +
scale_x_discrete(labels = c("Day & Night")) +
scale_fill_manual(values = c("Day" = "blue", "Night" = "green")) +
theme_minimal()
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
# Create a daily profitability metric
clinic_data <- clinic_data %>%
mutate(
day_profit = d_received - d_expenses,
night_profit = n_received - n_expenses
)
# Heatmap for daily profitability
ggplot(clinic_data, aes(x = date, y = 1, fill = day_profit)) +
geom_tile() +
scale_fill_viridis(option = "C") +
labs(title = "Heatmap of Daily Profitability (Day Business)",
x = "Date", y = "Day Business", fill = "Profit (KES)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Calculate cumulative revenue and expenses over time
clinic_data <- clinic_data %>%
arrange(date) %>%
mutate(
cumulative_day_revenue = cumsum(d_received),
cumulative_night_revenue = cumsum(n_received),
cumulative_day_expenses = cumsum(d_expenses),
cumulative_night_expenses = cumsum(n_expenses)
)
# Plot cumulative growth in revenue and expenses
ggplot(clinic_data) +
geom_line(aes(x = date, y = cumulative_day_revenue, color = "Day Revenue"), size = 1) +
geom_line(aes(x = date, y = cumulative_night_revenue, color = "Night Revenue"), size = 1) +
geom_line(aes(x = date, y = cumulative_day_expenses, color = "Day Expenses"), size = 1, linetype = "dashed") +
geom_line(aes(x = date, y = cumulative_night_expenses, color = "Night Expenses"), size = 1, linetype = "dashed") +
labs(title = "Cumulative Growth of Revenue and Expenses",
x = "Date", y = "Cumulative Amount (KES)") +
scale_color_manual(values = c("Day Revenue" = "blue", "Night Revenue" = "green", "Day Expenses" = "red", "Night Expenses" = "orange")) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Ensure the date is in the correct format and extract the month
clinic_data$date <- as.Date(clinic_data$date) # Ensure the date is in the correct format
clinic_data$month <- format(clinic_data$date, "%Y-%m") # Extract month and year
# Aggregate data monthly for day and night revenue and expenses
monthly_data <- clinic_data %>%
group_by(month) %>%
summarize(
month_day_received = sum(d_received, na.rm = TRUE),
month_day_expenses = sum(d_expenses, na.rm = TRUE),
month_night_received = sum(n_received, na.rm = TRUE),
month_night_expenses = sum(n_expenses, na.rm = TRUE)
)
# Plot histograms for monthly data
library(ggplot2)
# Histogram for Day Received
ggplot(monthly_data, aes(x = month_day_received)) +
geom_histogram(binwidth = 5000, fill = "skyblue", color = "black", alpha = 0.7) +
labs(title = "Histogram of Monthly Day Revenue", x = "Day Revenue (KES)", y = "Frequency") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Histogram for Day Expenses
ggplot(monthly_data, aes(x = month_day_expenses)) +
geom_histogram(binwidth = 5000, fill = "orange", color = "black", alpha = 0.7) +
labs(title = "Histogram of Monthly Day Expenses", x = "Day Expenses (KES)", y = "Frequency") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Histogram for Night Received
ggplot(monthly_data, aes(x = month_night_received)) +
geom_histogram(binwidth = 5000, fill = "green", color = "black", alpha = 0.7) +
labs(title = "Histogram of Monthly Night Revenue", x = "Night Revenue (KES)", y = "Frequency") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Histogram for Night Expenses
ggplot(monthly_data, aes(x = month_night_expenses)) +
geom_histogram(binwidth = 5000, fill = "red", color = "black", alpha = 0.7) +
labs(title = "Histogram of Monthly Night Expenses", x = "Night Expenses (KES)", y = "Frequency") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Calculate the profitability for day and night
clinic_data <- clinic_data %>%
mutate(
day_profit = d_received - d_expenses,
night_profit = n_received - n_expenses
)
# Stacked area plot for day and night profit comparison
ggplot(clinic_data, aes(x = date)) +
geom_area(aes(y = day_profit, fill = "Day Profit"), alpha = 0.5) +
geom_area(aes(y = night_profit, fill = "Night Profit"), alpha = 0.5) +
labs(title = "Profitability Comparison: Day vs Night Business",
x = "Date", y = "Profit (KES)") +
scale_fill_manual(values = c("Day Profit" = "blue", "Night Profit" = "green")) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Calculate Cost-to-Revenue Ratio
clinic_data <- clinic_data %>%
mutate(
day_cost_to_revenue = d_expenses / d_received,
night_cost_to_revenue = n_expenses / n_received
)
# Plot the Cost-to-Revenue Ratio over time
ggplot(clinic_data) +
geom_line(aes(x = date, y = day_cost_to_revenue, color = "Day Cost-to-Revenue"), size = 1) +
geom_line(aes(x = date, y = night_cost_to_revenue, color = "Night Cost-to-Revenue"), size = 1) +
labs(title = "Cost-to-Revenue Ratio: Day vs Night Business",
x = "Date", y = "Cost-to-Revenue Ratio") +
scale_color_manual(values = c("Day Cost-to-Revenue" = "blue", "Night Cost-to-Revenue" = "green")) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Using K-means clustering to segment the days based on profitability
set.seed(123) # For reproducibility
profitability_data <- clinic_data %>%
select(date, d_received, d_expenses, d_profit_after, n_received, n_expenses, n_profit_after)
# Normalize data before clustering
profitability_data_norm <- scale(profitability_data[, -1]) # Remove date column for clustering
# Perform K-means clustering with 3 clusters
kmeans_result <- kmeans(profitability_data_norm, centers = 3)
# Add cluster labels to the original data
clinic_data$cluster <- kmeans_result$cluster
# Plot the clusters
ggplot(clinic_data, aes(x = date, y = d_profit_after, color = as.factor(cluster))) +
geom_point() +
labs(title = "Customer Segmentation Based on Day Profitability",
x = "Date", y = "Day Profit (KES)", color = "Cluster") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))