Reconcile BS, IS with TB. See how to leverage poewerful R in financial audit.
You should do a bit cleanups as follows; 1. Copy and paste (values) to a new workbook. 2. Remove unnecessary rows and columns such as key, checks, description and total. 3. Add row names such as subtotals. 4. Change months to character. 5. Modify the layout of workbook a bit, such as column 1 and 2.
Import data to R. as_cell() were not used as it produced a dataframe with fct coulmn, instead of chr column. xlsx_cells () and behead () were used to produce a dataframe with chr and numberic coulmns. For more, refer to unpivotr package.
library(tidyxl)
library(tidyverse)
library(purrr)
library(unpivotr)
path <- "C:/Users/Stewart Li/Dropbox/0. Stewart publication_Always updated/0. Stewart UC/8. Stewart R book_Compilation/Chapter_Excel/accounting_sample.xlsx"
bstokens <- xlsx_cells(path, sheet = "BS") %>%
behead("N", header) %>%
select(row, col, data_type, character, numeric)
Extract main_headers, sub_headers and month cloumns.
main_headers <- c(
"ASSETS",
"EQUITY & LIABILITIES")
main_header_rows <- bstokens %>%
filter(character %in% main_headers) %>%
pull(row)
tokens_summary <- bstokens %>%
filter(row %in% c(1, main_header_rows)) # main headers inside of token_summary
tokens_main <- bstokens %>%
filter(!(row %in% main_header_rows)) # sub headers and month
Change sub_headers and month to rows and columns.
row_headers <- tokens_main %>%
filter(col <= 2) %>%
rename(header = character) %>%
split(.$col) %>%
set_names(c("main_headers", "sub_headers"))
col_headers <- tokens_main %>%
filter(row == 4) %>%
rename(header = character)
Produce a tidy dataframe.
tokens_main_tidy <- tokens_main %>%
enhead(col_headers, "N") %>%
enhead(row_headers$sub_headers, "W") %>%
enhead(row_headers$main_headers, "WNW") %>%
rename(main_header = header, month = header.data, sub_header = header.header)
Produce a tidy dataframe summary.
row_headers_summary <- tokens_summary %>%
filter(col == 1) %>%
rename(header = character)
col_headers_summary <- col_headers
tokens_summary_tidy <- tokens_summary %>%
enhead(row_headers_summary, "W") %>%
enhead(col_headers_summary, "N") %>%
rename(main_header = header.data, month = header.header)
Reorder months as factor. Check the number of level of main headers. Write a function to apply walk and plot for all sub-headers.
ordered_months <- c("Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep",
"Oct", "Nov", "Dec", "Jan") # must be same variable names
main_headers <- unique(tokens_main_tidy$main_header)
yearly_plot <- function(x, header) {
p <- x %>%
filter(main_header == header) %>%
ggplot(aes(x = month, y = numeric.data, group = sub_header)) +
geom_line() +
facet_wrap(~sub_header, ncol = 3) +
labs(title = header) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(p)
}
tokens_plot_ready <- tokens_main_tidy %>%
mutate(month = factor(month, levels = ordered_months, ordered = TRUE),
numeric.data = as.numeric(numeric.data))
walk(main_headers, ~yearly_plot(tokens_plot_ready, .x))
Select relevant variables and export to csv.
colnames(tokens_plot_ready)[5] <- "amount"
bsdf <- tokens_plot_ready %>%
select(main_header, sub_header, month, amount)
head(bsdf, 6)
## # A tibble: 6 x 4
## main_header sub_header month amount
## <chr> <chr> <ord> <dbl>
## 1 Non-Current Assets Property, Plant & Equipment - Cost Mar 183750.
## 2 Non-Current Assets Property, Plant & Equipment - Cost Apr 199200.
## 3 Non-Current Assets Property, Plant & Equipment - Cost May 199200.
## 4 Non-Current Assets Property, Plant & Equipment - Cost Jun 199200.
## 5 Non-Current Assets Property, Plant & Equipment - Cost Jul 199200.
## 6 Non-Current Assets Property, Plant & Equipment - Cost Aug 199200.
write.csv(bsdf, file = "tidybs.csv")
Alternative way to produce BS long.
library(xlsx)
library(reshape2)
library(tidyverse)
bs1 <- read.xlsx("accounting_sample.xlsx", sheetIndex = "BS", startRow=3)
bs1 <- bs1 %>% rename(mainheader = NA., subheader = NA..1) %>%
fill(c(mainheader), .direction = "down") %>%
filter(subheader != "")
bs1 <- melt(bs1, id = c("mainheader", "subheader"))
bs1 <- bs1 %>% rename(month = variable) %>%
rename(amount = value)
write.csv(bs1, file = "tidybs1.csv")
Produce balance sheet as at the year end by summarizing.
bscf <- bsdf %>% filter(month == "Feb") %>%
select(-c("main_header", "month")) %>%
rename(cf = amount)
This way will result in order change in income statement. For instance, sales amount will be on the bottom.
library(reshape2)
library(tidyverse)
is <- read.xlsx("accounting_sample.xlsx", sheetIndex = "IS", startRow=3)
is <- is %>% rename(main_header = NA., sub_header = NA..1) %>%
fill(c(main_header), .direction = "down") %>%
filter(sub_header != "")
is <- melt(is, id = c("main_header", "sub_header"))
is <- is %>% rename(month = variable) %>%
rename(amount = value)
write.csv(is, file = "tidyis.csv")
Plot for each subheaders. Walk function is applied to an object. Types of expenses should be seperately presented in more detail in order to plot well.
yearly_plot_is <- function(x, header) {
p <- x %>%
filter(main_header == header) %>%
ggplot(aes(x = month, y = amount, group = sub_header)) +
geom_line() +
facet_wrap(~sub_header, ncol = 3) +
labs(title = header) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(p)
}
mainheader <- unique(is$main_header)
walk(mainheader, ~yearly_plot_is(is, .x))
is %>% filter(sub_header == "Software Sales") %>%
ggplot(aes(amount)) + geom_density()
Make a income statement by summarizing.
iscf <- is %>% group_by(sub_header) %>%
summarise(cf = round(sum(amount),2)) %>%
ungroup()
Combine BS and IS. -$ is preserved in R. It is easier to perform casting by exporting csv.file. Thereafter, tb should agree to JV listing and roginal TB.
tb <- rbind(bscf, iscf)
write.csv(tb, file = "tidytb.csv")
Compute Z-Score.
X1 <- (tb$cf[[16]] - tb$cf[[31]]) / tb$cf[[17]]
X2 <- (tb$cf[[20]] + tb$cf[[21]]) / tb$cf[[17]]
X3 <- tb$cf[[53]] / tb$cf[[17]]
X4 <- tb$cf[[22]] / (tb$cf[[24]] + tb$cf[[31]])
X5 <- tb$cf[[59]] / tb$cf[[17]]
Z <- 1.2*X1 + 1.4*X2 + 3.3*X3 + 0.6*X4 + 0.999*X5
Z < 1.81
## [1] FALSE
Z > 2.99
## [1] TRUE
data.frame(item = c("X1", "X2", "X3", "X4", "X5", "Z"), score = round(c(X1, X2, X3, X4, X5, Z),2))
## item score
## 1 X1 0.39
## 2 X2 0.62
## 3 X3 0.30
## 4 X4 1.64
## 5 X5 1.90
## 6 Z 5.19
Compute M-Score.
This method mainly can be used for; 1. Quickly plot each accounts over the period. 2. Easily compute Z and M score.
Data can be obtained from https://www.excel-skills.com/templates/basic_accounting_sample.xls
Know more about the approach by referring to https://github.com/DavisVaughan/tidying-excel-cashflows-blog-companion/blob/master/img/2018-02-16-tidying-excel-cash-flow-spreadsheets-using-R/highlight-problems.png
copyright @ Stewart Li stewardli8@msn.com