Objectives

Reconcile BS, IS with TB. See how to leverage poewerful R in financial audit.

Sample company_BS long dataframe

Procedures

You should do a bit cleanups as follows; 1. Copy and paste (values) to a new workbook. 2. Remove unnecessary rows and columns such as key, checks, description and total. 3. Add row names such as subtotals. 4. Change months to character. 5. Modify the layout of workbook a bit, such as column 1 and 2.

Import data to R. as_cell() were not used as it produced a dataframe with fct coulmn, instead of chr column. xlsx_cells () and behead () were used to produce a dataframe with chr and numberic coulmns. For more, refer to unpivotr package.

library(tidyxl)
library(tidyverse)
library(purrr)
library(unpivotr)

path <- "C:/Users/Stewart Li/Dropbox/0. Stewart publication_Always updated/0. Stewart UC/8. Stewart R book_Compilation/Chapter_Excel/accounting_sample.xlsx"
bstokens <- xlsx_cells(path, sheet = "BS") %>%
  behead("N", header) %>%
  select(row, col, data_type, character, numeric) 

Extract main_headers, sub_headers and month cloumns.

main_headers <- c(
    "ASSETS",
    "EQUITY & LIABILITIES")

main_header_rows <- bstokens %>%
  filter(character %in% main_headers) %>%
  pull(row)

tokens_summary <- bstokens %>%
  filter(row %in% c(1, main_header_rows)) # main headers inside of token_summary

tokens_main <- bstokens %>%
  filter(!(row %in% main_header_rows)) # sub headers and month

Change sub_headers and month to rows and columns.

row_headers <- tokens_main %>% 
  filter(col <= 2) %>% 
  rename(header = character) %>% 
  split(.$col) %>% 
  set_names(c("main_headers", "sub_headers"))

col_headers <- tokens_main %>%
  filter(row == 4) %>%
  rename(header = character)

Produce a tidy dataframe.

tokens_main_tidy <- tokens_main %>%
  enhead(col_headers, "N") %>% 
  enhead(row_headers$sub_headers, "W") %>% 
  enhead(row_headers$main_headers, "WNW") %>% 
  rename(main_header = header, month = header.data, sub_header = header.header)

Produce a tidy dataframe summary.

row_headers_summary <- tokens_summary %>%
  filter(col == 1) %>%
  rename(header = character)

col_headers_summary <- col_headers

tokens_summary_tidy <- tokens_summary %>%
  enhead(row_headers_summary, "W") %>%
  enhead(col_headers_summary, "N") %>%
  rename(main_header = header.data, month = header.header)

Reorder months as factor. Check the number of level of main headers. Write a function to apply walk and plot for all sub-headers.

ordered_months <- c("Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", 
                    "Oct",   "Nov", "Dec",  "Jan") # must be same variable names

main_headers <- unique(tokens_main_tidy$main_header)
yearly_plot <- function(x, header) {
  p <- x %>%
    filter(main_header == header) %>%
    ggplot(aes(x = month, y = numeric.data, group = sub_header)) +
    geom_line() +
    facet_wrap(~sub_header, ncol = 3) +
    labs(title = header) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
  print(p)
}
tokens_plot_ready <- tokens_main_tidy %>%
  mutate(month = factor(month, levels = ordered_months, ordered = TRUE), 
         numeric.data = as.numeric(numeric.data))

walk(main_headers, ~yearly_plot(tokens_plot_ready, .x))

Select relevant variables and export to csv.

colnames(tokens_plot_ready)[5] <- "amount"

bsdf <- tokens_plot_ready %>% 
  select(main_header, sub_header, month, amount) 
head(bsdf, 6)  
## # A tibble: 6 x 4
##   main_header        sub_header                         month  amount
##   <chr>              <chr>                              <ord>   <dbl>
## 1 Non-Current Assets Property, Plant & Equipment - Cost Mar   183750.
## 2 Non-Current Assets Property, Plant & Equipment - Cost Apr   199200.
## 3 Non-Current Assets Property, Plant & Equipment - Cost May   199200.
## 4 Non-Current Assets Property, Plant & Equipment - Cost Jun   199200.
## 5 Non-Current Assets Property, Plant & Equipment - Cost Jul   199200.
## 6 Non-Current Assets Property, Plant & Equipment - Cost Aug   199200.
write.csv(bsdf, file = "tidybs.csv")

Alternative way to produce BS long.

library(xlsx)
library(reshape2)
library(tidyverse)

bs1 <- read.xlsx("accounting_sample.xlsx", sheetIndex = "BS", startRow=3)

bs1 <- bs1 %>% rename(mainheader = NA., subheader = NA..1) %>% 
  fill(c(mainheader), .direction = "down") %>% 
  filter(subheader != "") 
  
bs1 <- melt(bs1, id = c("mainheader", "subheader")) 
bs1 <- bs1 %>% rename(month = variable) %>% 
  rename(amount = value)

write.csv(bs1, file = "tidybs1.csv")

Produce balance sheet as at the year end by summarizing.

bscf <- bsdf %>% filter(month == "Feb") %>% 
  select(-c("main_header", "month")) %>% 
  rename(cf = amount)

Sample company_IS long dataframe

This way will result in order change in income statement. For instance, sales amount will be on the bottom.

library(reshape2)
library(tidyverse)

is <- read.xlsx("accounting_sample.xlsx", sheetIndex = "IS", startRow=3)

is <- is %>% rename(main_header = NA., sub_header = NA..1) %>% 
  fill(c(main_header), .direction = "down") %>% 
  filter(sub_header != "") 
  
is <- melt(is, id = c("main_header", "sub_header")) 
is <- is %>% rename(month = variable) %>% 
  rename(amount = value)

write.csv(is, file = "tidyis.csv")

Plot for each subheaders. Walk function is applied to an object. Types of expenses should be seperately presented in more detail in order to plot well.

yearly_plot_is <- function(x, header) {
  p <- x %>%
    filter(main_header == header) %>%
    ggplot(aes(x = month, y = amount, group = sub_header)) +
    geom_line() +
    facet_wrap(~sub_header, ncol = 3) +
    labs(title = header) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
  print(p)
}

mainheader <- unique(is$main_header)

walk(mainheader, ~yearly_plot_is(is, .x))

is %>% filter(sub_header == "Software Sales") %>% 
ggplot(aes(amount)) + geom_density()

Make a income statement by summarizing.

iscf <- is %>% group_by(sub_header) %>% 
  summarise(cf = round(sum(amount),2)) %>% 
  ungroup()

Combine BS and IS. -$ is preserved in R. It is easier to perform casting by exporting csv.file. Thereafter, tb should agree to JV listing and roginal TB.

tb <- rbind(bscf, iscf)
write.csv(tb, file = "tidytb.csv")

Compute Z-Score.

X1 <- (tb$cf[[16]] - tb$cf[[31]]) / tb$cf[[17]]
X2 <- (tb$cf[[20]] + tb$cf[[21]]) / tb$cf[[17]]
X3 <- tb$cf[[53]] / tb$cf[[17]]
X4 <- tb$cf[[22]] / (tb$cf[[24]] + tb$cf[[31]])
X5 <- tb$cf[[59]] / tb$cf[[17]]
Z <- 1.2*X1 + 1.4*X2 + 3.3*X3 + 0.6*X4 + 0.999*X5
Z < 1.81
## [1] FALSE
Z > 2.99
## [1] TRUE
data.frame(item = c("X1", "X2", "X3", "X4", "X5", "Z"), score = round(c(X1, X2, X3, X4, X5, Z),2))
##   item score
## 1   X1  0.39
## 2   X2  0.62
## 3   X3  0.30
## 4   X4  1.64
## 5   X5  1.90
## 6    Z  5.19

Compute M-Score.

Conclusions

This method mainly can be used for; 1. Quickly plot each accounts over the period. 2. Easily compute Z and M score.

References

Data can be obtained from https://www.excel-skills.com/templates/basic_accounting_sample.xls

Know more about the approach by referring to https://github.com/DavisVaughan/tidying-excel-cashflows-blog-companion/blob/master/img/2018-02-16-tidying-excel-cash-flow-spreadsheets-using-R/highlight-problems.png

copyright @ Stewart Li stewardli8@msn.com