SET MY WORKING DIRECTORY AND LOAD THE REQUIRED PACKAGES

setwd ("C:/Users/Administrator/Desktop/R Studio Scripts")

library (tidyverse)
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3

IMPORT & LOAD THE SAMPLE DATA

Household_Expenses_Sample <- read_excel("Household_Expenses_Sample.xlsx") #import the data
View(Household_Expenses_Sample) #view the data

BROWSE THROUGH THE DATA

names(Household_Expenses_Sample) #view our variable names
## [1] "Date"           "Category"       "Description"    "Amount_KES"    
## [5] "Payment_Method"
head(Household_Expenses_Sample) #view the first 5 observations
## # A tibble: 6 × 5
##   Date                Category      Description       Amount_KES Payment_Method
##   <dttm>              <chr>         <chr>                  <dbl> <chr>         
## 1 2025-01-01 00:00:00 Gas           Refill                 22275 Bank Transfer 
## 2 2025-01-02 00:00:00 Internet      One-time purchase      17564 Card          
## 3 2025-01-02 00:00:00 Toiletries    Monthly bill           28891 Bank Transfer 
## 4 2025-01-02 00:00:00 Entertainment Service fee            19390 M-Pesa        
## 5 2025-01-02 00:00:00 Utilities     Refill                 21672 M-Pesa        
## 6 2025-01-03 00:00:00 Groceries     Refill                 24390 Card
tail(Household_Expenses_Sample) #view the last 5 observations
## # A tibble: 6 × 5
##   Date                Category      Description       Amount_KES Payment_Method
##   <dttm>              <chr>         <chr>                  <dbl> <chr>         
## 1 2025-03-30 00:00:00 Groceries     Refill                 27436 Card          
## 2 2025-03-30 00:00:00 Toiletries    Refill                  2016 M-Pesa        
## 3 2025-03-30 00:00:00 Internet      Misc.                  10107 Bank Transfer 
## 4 2025-03-30 00:00:00 Internet      Service fee             7760 Bank Transfer 
## 5 2025-03-31 00:00:00 Toiletries    Refill                 25747 M-Pesa        
## 6 2025-03-31 00:00:00 Entertainment One-time purchase      22180 M-Pesa
attach(Household_Expenses_Sample)

CLEAN A VARIABLE

Household_Expenses_Sample<-Household_Expenses_Sample %>%
  mutate(Date = as.Date(Date),
         Month= lubridate::floor_date(Date,"month"))

SUMMARY STATISTICS

Household_Expenses_Sample %>%
  group_by(Month, Category) %>%
  summarise(Total = sum(Amount_KES), .groups = "drop")
## # A tibble: 24 × 3
##    Month      Category      Total
##    <date>     <chr>         <dbl>
##  1 2025-01-01 Entertainment 63250
##  2 2025-01-01 Gas           84333
##  3 2025-01-01 Groceries     95148
##  4 2025-01-01 Internet      85618
##  5 2025-01-01 Rent          52198
##  6 2025-01-01 Toiletries    74927
##  7 2025-01-01 Transport     42691
##  8 2025-01-01 Utilities     73999
##  9 2025-02-01 Entertainment 61135
## 10 2025-02-01 Gas           71177
## # ℹ 14 more rows

VISUALIZATION

ggplot(Household_Expenses_Sample, aes(x = Category, y = Amount_KES, fill = Payment_Method)) +
  geom_boxplot() +
  coord_flip() +
  labs(title = "Distribution of Household Expenses by Category",
       x = "Category", y = "Amount (KES)") +
  theme_minimal()