SET MY WORKING DIRECTORY AND LOAD THE REQUIRED PACKAGES
setwd ("C:/Users/Administrator/Desktop/R Studio Scripts")
library (tidyverse)
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
IMPORT & LOAD THE SAMPLE DATA
Household_Expenses_Sample <- read_excel("Household_Expenses_Sample.xlsx") #import the data
View(Household_Expenses_Sample) #view the data
BROWSE THROUGH THE DATA
names(Household_Expenses_Sample) #view our variable names
## [1] "Date" "Category" "Description" "Amount_KES"
## [5] "Payment_Method"
head(Household_Expenses_Sample) #view the first 5 observations
## # A tibble: 6 × 5
## Date Category Description Amount_KES Payment_Method
## <dttm> <chr> <chr> <dbl> <chr>
## 1 2025-01-01 00:00:00 Gas Refill 22275 Bank Transfer
## 2 2025-01-02 00:00:00 Internet One-time purchase 17564 Card
## 3 2025-01-02 00:00:00 Toiletries Monthly bill 28891 Bank Transfer
## 4 2025-01-02 00:00:00 Entertainment Service fee 19390 M-Pesa
## 5 2025-01-02 00:00:00 Utilities Refill 21672 M-Pesa
## 6 2025-01-03 00:00:00 Groceries Refill 24390 Card
tail(Household_Expenses_Sample) #view the last 5 observations
## # A tibble: 6 × 5
## Date Category Description Amount_KES Payment_Method
## <dttm> <chr> <chr> <dbl> <chr>
## 1 2025-03-30 00:00:00 Groceries Refill 27436 Card
## 2 2025-03-30 00:00:00 Toiletries Refill 2016 M-Pesa
## 3 2025-03-30 00:00:00 Internet Misc. 10107 Bank Transfer
## 4 2025-03-30 00:00:00 Internet Service fee 7760 Bank Transfer
## 5 2025-03-31 00:00:00 Toiletries Refill 25747 M-Pesa
## 6 2025-03-31 00:00:00 Entertainment One-time purchase 22180 M-Pesa
attach(Household_Expenses_Sample)
CLEAN A VARIABLE
Household_Expenses_Sample<-Household_Expenses_Sample %>%
mutate(Date = as.Date(Date),
Month= lubridate::floor_date(Date,"month"))
SUMMARY STATISTICS
Household_Expenses_Sample %>%
group_by(Month, Category) %>%
summarise(Total = sum(Amount_KES), .groups = "drop")
## # A tibble: 24 × 3
## Month Category Total
## <date> <chr> <dbl>
## 1 2025-01-01 Entertainment 63250
## 2 2025-01-01 Gas 84333
## 3 2025-01-01 Groceries 95148
## 4 2025-01-01 Internet 85618
## 5 2025-01-01 Rent 52198
## 6 2025-01-01 Toiletries 74927
## 7 2025-01-01 Transport 42691
## 8 2025-01-01 Utilities 73999
## 9 2025-02-01 Entertainment 61135
## 10 2025-02-01 Gas 71177
## # ℹ 14 more rows
VISUALIZATION
ggplot(Household_Expenses_Sample, aes(x = Category, y = Amount_KES, fill = Payment_Method)) +
geom_boxplot() +
coord_flip() +
labs(title = "Distribution of Household Expenses by Category",
x = "Category", y = "Amount (KES)") +
theme_minimal()
