1. Data Exploratory Analysis
library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
df_treasury <- read_xlsx("C:/Users/Anhuynh/Desktop/Work Projects/Treasury Data Exercise.xlsx", sheet = "Dataset_Manipulated")
# 1.1. Check missing data
mis_dt <- function(x) {sum(is.na(x))/length(x)*100}
apply(df_treasury, 2, mis_dt)
## Flow Transaction date Account
## 0.0000000 0.0000000 0.0000000
## Value date Flow cur. Flow amount
## 0.0000000 0.0000000 0.0000000
## Rate against U.S. Dollar Acct cur. Account amount
## 0.0000000 0.0000000 0.0000000
## Flow status Budget code Budget description
## 0.0000000 0.4904518 0.4904518
## Description Reference Number
## 0.0000000 0.0000000 0.0000000
## Fee amount Accounting date Update date
## 0.0000000 0.0000000 0.0000000
## Update time Origin Account description
## 0.0000000 0.0000000 0.0000000
## Flow description
## 0.0000000
options(scipen = 999) # turn off scientific notation for numbers
# Recalculate Flow/Account Amount
df_treasury$Flow_amt_usd <- df_treasury$`Rate against U.S. Dollar` * df_treasury$`Flow amount`
# 1.2. Check Outliers using Percentiles method
lower_bound <- quantile(df_treasury$Flow_amt_usd, 0.025)
upper_bound <- quantile(df_treasury$Flow_amt_usd, 0.975)
outlier_ind <- which(df_treasury$Flow_amt_usd < lower_bound | df_treasury$Flow_amt_usd > upper_bound)
## Print Outliers
df_treasury[outlier_ind, ]
## # A tibble: 917 × 23
## Flow `Transaction date` Account `Value date` `Flow cur.`
## <chr> <dttm> <dbl> <dttm> <chr>
## 1 -NOCASH 2020-02-06 00:00:00 2 2020-02-06 00:00:00 EUR
## 2 +195 2020-02-18 00:00:00 4 2020-02-18 00:00:00 MXN
## 3 +195 2020-04-14 00:00:00 4 2020-04-14 00:00:00 MXN
## 4 -698 2020-01-20 00:00:00 4 2020-01-20 00:00:00 MXN
## 5 -698 2020-02-20 00:00:00 4 2020-02-20 00:00:00 MXN
## 6 -698 2020-03-20 00:00:00 4 2020-03-20 00:00:00 MXN
## 7 -698 2020-04-20 00:00:00 4 2020-04-20 00:00:00 MXN
## 8 -698 2020-05-20 00:00:00 4 2020-05-20 00:00:00 MXN
## 9 -699 2020-01-20 00:00:00 4 2020-01-20 00:00:00 MXN
## 10 -699 2020-02-20 00:00:00 4 2020-02-20 00:00:00 MXN
## # ℹ 907 more rows
## # ℹ 18 more variables: `Flow amount` <dbl>, `Rate against U.S. Dollar` <dbl>,
## # `Acct cur.` <chr>, `Account amount` <dbl>, `Flow status` <chr>,
## # `Budget code` <chr>, `Budget description` <chr>, Description <chr>,
## # Reference <dbl>, Number <dbl>, `Fee amount` <dbl>,
## # `Accounting date` <dttm>, `Update date` <dttm>, `Update time` <dttm>,
## # Origin <chr>, `Account description` <chr>, `Flow description` <chr>, …
## Outlier Visualization
### Box plot
require(ggplot2)
ggplot(df_treasury) +
aes(x = "", y = Flow_amt_usd) +
geom_boxplot(fill = "#0c4c8a") +
theme_minimal()

### Histogram
hist(df_treasury$Flow_amt_usd, col = "green")
rug(df_treasury$Flow_amt_usd)
abline(v = median(df_treasury$Flow_amt_usd), col = "magenta", lwd = 4)

# 1.3. Standardization
# standardize the data using scale() function
library(dplyr)
df_treasury_stan <- df_treasury %>%
mutate(Flow_amt_usd_sta = scale(df_treasury$Flow_amt_usd))
# 1.4. Feature Engineering
## Create a "Time_lag" feature of time difference between Transaction date & Value date
df_treasury_stan <- df_treasury_stan %>%
mutate(Time_lag = difftime(df_treasury_stan$`Value date`, df_treasury_stan$`Transaction date`, units = "days"))
## Data Viz
df_timelag <- data.frame(Time_Lag = c("Same day", "Before or after 01 day", "Before or after more than 02 days"), Transaction_Volume = c(18819, 235, 112))
ggplot(data=df_timelag, aes(x = Time_Lag, y = Transaction_Volume, fill = Time_Lag)) +
geom_bar(stat="identity", width=0.8) +
geom_text(aes(y=df_timelag$Transaction_Volume, label= df_timelag$Transaction_Volume), vjust=1, color="black", size=3) +
labs(title="Time Lag (Days) Between Value Date & Transaction Date") +
theme(legend.position="none")
## Warning: Use of `df_timelag$Transaction_Volume` is discouraged.
## ℹ Use `Transaction_Volume` instead.
## Use of `df_timelag$Transaction_Volume` is discouraged.
## ℹ Use `Transaction_Volume` instead.

# 1.5. Correlation Of Variables
## data preparation
df_treasury_stan$`Flow status` <- as.factor(df_treasury_stan$`Flow status`)
df_treasury_stan$`Budget code` <- as.factor(df_treasury_stan$`Budget code`)
## examining correlation with the most popular Budget codes
df_treasury_stan_top_budget <- df_treasury_stan[which(df_treasury_stan$`Budget code` %in% c("+COLLECT", "-AP_VEND", "-BANKFEE", "-FX", "-INTERCO", "-MISC", "-PAYROL", "-TAXES", "-TRLEXP", "-ZBA", "-ZNOCODE", "+INTERCO", "+STOCK", "+ZBA")),]
df_treasury_top_cor <- df_treasury_stan_top_budget[, c(10,11,24,25)]
library(ggcorrplot) ## apply to multiple data types
model.matrix(~0+., data = df_treasury_top_cor) %>%
cor(use="pairwise.complete.obs") %>%
ggcorrplot(show.diag=FALSE, type="lower", lab=TRUE, lab_size=2)
## Warning in cor(., use = "pairwise.complete.obs"): the standard deviation is
## zero

## examining correlation with the least popular Budget codes
df_treasury_stan_bot_budget <- df_treasury_stan[which(df_treasury_stan$`Budget code` %in% c("-AP_RENT", "-BENEFIT", "-DEBT", "-ESPP", "-IC_LOAN", "-ICD", "-INT_ICL", "-INT_INC", "-INVEST", "-PAYFEE", "-PAYFID", "-PAYTAX", "-POOL", "-SALETAX", "-SWEEP", "-VDI", "+ESPP", "+FX", "+IC_LOAN", "+ICD", "+INT_ICL", "+INT_INC", "+MISC", "+SWEEP", "+TAXES", "+ZNOCODE")),]
df_treasury_bot_cor <- df_treasury_stan_bot_budget[, c(10,11,24,25)]
library(ggcorrplot) ## apply to multiple data types
model.matrix(~0+., data = df_treasury_bot_cor) %>%
cor(use="pairwise.complete.obs") %>%
ggcorrplot(show.diag=FALSE, type="lower", lab=TRUE, lab_size=2)
## Warning in cor(., use = "pairwise.complete.obs"): the standard deviation is
## zero

df_treasury_stan[which(df_treasury_stan$`Budget code`=="-SWEEP"),]
## # A tibble: 6 × 25
## Flow `Transaction date` Account `Value date` `Flow cur.`
## <chr> <dttm> <dbl> <dttm> <chr>
## 1 -MISC 2019-02-26 00:00:00 60 2020-02-21 00:00:00 BRL
## 2 -MISC 2020-02-11 00:00:00 60 2020-02-10 00:00:00 BRL
## 3 -MISC 2020-03-17 00:00:00 60 2020-03-16 00:00:00 BRL
## 4 -MISC 2020-03-20 00:00:00 60 2020-03-19 00:00:00 BRL
## 5 -MISC 2020-03-27 00:00:00 60 2020-03-26 00:00:00 BRL
## 6 -MISC 2020-05-19 00:00:00 60 2020-05-18 00:00:00 BRL
## # ℹ 20 more variables: `Flow amount` <dbl>, `Rate against U.S. Dollar` <dbl>,
## # `Acct cur.` <chr>, `Account amount` <dbl>, `Flow status` <fct>,
## # `Budget code` <fct>, `Budget description` <chr>, Description <chr>,
## # Reference <dbl>, Number <dbl>, `Fee amount` <dbl>,
## # `Accounting date` <dttm>, `Update date` <dttm>, `Update time` <dttm>,
## # Origin <chr>, `Account description` <chr>, `Flow description` <chr>,
## # Flow_amt_usd <dbl>, Flow_amt_usd_sta <dbl[,1]>, Time_lag <drtn>
# 1.6. Investigate Relationship between Budget codes & Flow amounts
## as observed from overall correlation matrix, choosing the most affected budget codes
df_treasury_stan_chosen_budget <- df_treasury_stan[which(df_treasury_stan$`Budget code` %in% c("-AP_VEND", "-ZBA", "+ZBA", "+ICD", "-ICD") & df_treasury_stan$Flow %in% c("+277", "+275", "+206", "+195", "-577", "-575", "-495")),]
df_treasury_chosen_cor <- df_treasury_stan_chosen_budget[, c(1,10,11,24)]
library(ggcorrplot) ## apply to multiple data types
model.matrix(~0+., data = df_treasury_chosen_cor) %>%
cor(use="pairwise.complete.obs") %>%
ggcorrplot(show.diag=FALSE, type="lower", lab=TRUE, lab_size=2)
## Warning in cor(., use = "pairwise.complete.obs"): the standard deviation is
## zero

# Relationship of Flow Amount ~ Budget Code
par(mfrow = c(2,2), mar = c(5,4,2,1))
ggplot(data=df_treasury_stan, aes(x = `Budget code`, y=Flow_amt_usd, fill=`Budget code`)) +
geom_bar(stat="identity", width=0.8) +
geom_text(aes(y=df_treasury_stan$Flow_amt_usd, label=""), vjust=1, color="black", size=3) +
labs(title="Change of Flow/Account Amount Over Budget Codes") +
theme(legend.title = element_blank(), axis.text.x=element_text(angle=45,hjust=1,vjust=1)) +
theme(legend.position="none")
## Warning: Use of `df_treasury_stan$Flow_amt_usd` is discouraged.
## ℹ Use `Flow_amt_usd` instead.

df_treasury_stan[which(df_treasury_stan$`Budget code`%in% c("-AP_VEND", "-ZBA", "+ZBA", "+ICD", "-ICD") & df_treasury_stan$Flow %in% c("+277", "+275", "+206", "+195", "-577", "-575", "-495")),]
## # A tibble: 4,732 × 25
## Flow `Transaction date` Account `Value date` `Flow cur.`
## <chr> <dttm> <dbl> <dttm> <chr>
## 1 -495 2020-02-04 00:00:00 1 2020-02-04 00:00:00 CAD
## 2 -495 2020-03-10 00:00:00 1 2020-03-10 00:00:00 CAD
## 3 -495 2020-03-26 00:00:00 1 2020-03-26 00:00:00 CAD
## 4 -495 2020-04-15 00:00:00 1 2020-04-15 00:00:00 CAD
## 5 -495 2020-05-06 00:00:00 1 2020-05-06 00:00:00 CAD
## 6 -495 2020-05-06 00:00:00 1 2020-05-06 00:00:00 CAD
## 7 -495 2020-05-06 00:00:00 1 2020-05-06 00:00:00 CAD
## 8 -495 2020-05-13 00:00:00 1 2020-05-13 00:00:00 CAD
## 9 -495 2020-05-22 00:00:00 1 2020-05-22 00:00:00 CAD
## 10 -495 2020-01-15 00:00:00 2 2020-01-15 00:00:00 EUR
## # ℹ 4,722 more rows
## # ℹ 20 more variables: `Flow amount` <dbl>, `Rate against U.S. Dollar` <dbl>,
## # `Acct cur.` <chr>, `Account amount` <dbl>, `Flow status` <fct>,
## # `Budget code` <fct>, `Budget description` <chr>, Description <chr>,
## # Reference <dbl>, Number <dbl>, `Fee amount` <dbl>,
## # `Accounting date` <dttm>, `Update date` <dttm>, `Update time` <dttm>,
## # Origin <chr>, `Account description` <chr>, `Flow description` <chr>, …
fit1 <- lm(Flow_amt_usd_sta ~ as.factor(`Budget code`) *as.factor(Flow) -1, df_treasury_stan_chosen_budget) # With Flow
summary(fit1)$coef
## Estimate Std. Error t value
## as.factor(`Budget code`)-AP_VEND -0.11466412 0.02762923 -4.15010150
## as.factor(`Budget code`)-ICD 3.20659363 0.19627214 16.33748717
## as.factor(`Budget code`)-ZBA 0.64585365 0.11675242 5.53182221
## as.factor(`Budget code`)+ICD 4.05413985 0.42753174 9.48266402
## as.factor(`Budget code`)+ZBA -0.13109628 1.63035887 -0.08040946
## as.factor(Flow)-575 0.42936120 0.14693339 2.92214853
## as.factor(Flow)+195 0.04066614 0.34107424 0.11922959
## as.factor(Flow)+206 -2.72757708 0.66976891 -4.07241518
## as.factor(Flow)+275 1.08909665 1.63248035 0.66714227
## as.factor(Flow)+277 0.78095483 1.63455543 0.47777813
## Pr(>|t|)
## as.factor(`Budget code`)-AP_VEND 0.000033817854686727118044588247247617118773632682859897613525391
## as.factor(`Budget code`)-ICD 0.000000000000000000000000000000000000000000000000000000000208163
## as.factor(`Budget code`)-ZBA 0.000000033402144618755744696122350623213037579262163490056991577
## as.factor(`Budget code`)+ICD 0.000000000000000000003818508387144790880082800832440170779591426
## as.factor(`Budget code`)+ZBA 0.935915007282081878869917090923991054296493530273437500000000000
## as.factor(Flow)-575 0.003492749204348178856949225234984623966738581657409667968750000
## as.factor(Flow)+195 0.905098538921346174035420517611782997846603393554687500000000000
## as.factor(Flow)+206 0.000047290203063972310578225166644017463113414123654365539550781
## as.factor(Flow)+275 0.504713858726509423391348718723747879266738891601562500000000000
## as.factor(Flow)+277 0.632830245313695360920291932416148483753204345703125000000000000
fit2 <- lm(Flow_amt_usd_sta ~ as.factor(`Budget code`) -1, df_treasury_stan_chosen_budget) # without Flow
summary(fit2)$coef
## Estimate Std. Error t value
## as.factor(`Budget code`)-AP_VEND -0.1143973 0.02763028 -4.140286
## as.factor(`Budget code`)-ICD 3.2065936 0.19692680 16.283175
## as.factor(`Budget code`)-ZBA 0.9169437 0.07112160 12.892619
## as.factor(`Budget code`)+ICD 3.5411573 0.23133661 15.307380
## as.factor(`Budget code`)+ZBA 0.8528732 0.06798140 12.545686
## Pr(>|t|)
## as.factor(`Budget code`)-AP_VEND 0.0000352919015092657145388904682548059099644888192415237426757812
## as.factor(`Budget code`)-ICD 0.0000000000000000000000000000000000000000000000000000000004812368
## as.factor(`Budget code`)-ZBA 0.0000000000000000000000000000000000002101283889512836462526088055
## as.factor(`Budget code`)+ICD 0.0000000000000000000000000000000000000000000000000011628865146983
## as.factor(`Budget code`)+ZBA 0.0000000000000000000000000000000000153737318024414360041751681507
# evaluation with ANOVA
anova(fit1, fit2)
## Analysis of Variance Table
##
## Model 1: Flow_amt_usd_sta ~ as.factor(`Budget code`) * as.factor(Flow) -
## 1
## Model 2: Flow_amt_usd_sta ~ as.factor(`Budget code`) - 1
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 4722 12551
## 2 4727 12649 -5 -97.248 7.3172 0.0000007696 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 1.7. Update Patterns
library(dplyr)
library(ggplot2)
library(lubridate)
library(timetk)
## prepare Update Date data
df_treasury_stan_time <- df_treasury_stan[which(df_treasury_stan$`Flow cur.` %in% c("USD", "EUR", "BRL", "AUD", "GBP","ILS", "JPY", "CAD","INR", "SGD") & df_treasury_stan$`Acct cur.` %in% c("USD", "EUR", "BRL", "AUD", "GBP","ILS", "JPY", "CAD","INR", "SGD")),c(3,5,8,18,19,23)]
df_treasury_stan_time$Account <- as.factor(df_treasury_stan_time$Account)
df_treasury_stan_time$month <- lubridate::month(df_treasury_stan_time$`Update date`)
df_treasury_stan_time$year <- lubridate::year(df_treasury_stan_time$`Update date`)
# line chart by month - Full years
df_treasury_stan_time %>%
# count by date
group_by(`Update date`) %>%
mutate(Number_account = n()) %>%
ggplot() +
geom_line(aes(x =`Update date`, y = Number_account, colour = factor(`Acct cur.`))) +
labs(title = "Variation in Number of Accounts Of Most Popular Currencies", colour = "Account Currency") +
theme(legend.position="bottom")

# line chart by month - Year 2020
df_treasury_stan_time[which(df_treasury_stan_time$year == 2020),] %>%
# count by date
group_by(`Update date`) %>%
mutate(Number_account = n()) %>%
ggplot() +
geom_line(aes(x =`Update date`, y = Number_account, colour = factor(`Acct cur.`))) +
labs(title = "Variation in Number of Accounts Of Most Popular Currencies", subtitle = "Year: 2020", colour = "Account Currency") +
theme(legend.position="bottom")

# line chart by month - Full years
df_treasury_stan_time %>%
# count by date
group_by(`Update date`) %>%
mutate(Transaction_Volume = sum(Flow_amt_usd)) %>%
ggplot() +
geom_line(aes(x =`Update date`, y = Transaction_Volume, colour = factor(`Flow cur.`))) +
labs(title = "Variation in Transaction Volume Of Most Popular Currencies", subtitle = "Conversion in U.S Dollars", colour = "Flow Currency") +
theme(legend.position="bottom")

# line chart by month - Year 2020
df_treasury_stan_time[which(df_treasury_stan_time$year == 2020),] %>%
# count by date
group_by(`Update date`) %>%
mutate(Transaction_Volume = sum(Flow_amt_usd)) %>%
ggplot() +
geom_line(aes(x =`Update date`, y = Transaction_Volume, colour = factor(`Flow cur.`))) +
labs(title = "Variation in Transaction Volume Of Most Popular Currencies", subtitle = "Conversion in U.S Dollars, Year: 2020", colour = "Flow Currency") +
theme(legend.position="bottom")

## Count on Accounts and Total Transaction Volume Per Currency
df_treasury_stan_time_cnt <- df_treasury_stan_time %>%
# count by date
group_by(`Update date`) %>%
mutate(Number_account = n())
aggregate(df_treasury_stan_time_cnt$Number_account, list( df_treasury_stan_time_cnt$`Acct cur.`), FUN=sum)
## Group.1 x
## 1 AUD 165120
## 2 BRL 264939
## 3 CAD 84810
## 4 EUR 233267
## 5 GBP 150648
## 6 ILS 98515
## 7 INR 79937
## 8 JPY 99584
## 9 SGD 63354
## 10 USD 1942736
aggregate(df_treasury_stan_time$Flow_amt_usd, list(df_treasury_stan_time$`Flow cur.`), FUN=sum)
## Group.1 x
## 1 AUD 50089718
## 2 BRL 36035919
## 3 CAD 41246313
## 4 EUR 303389517
## 5 GBP 158949365
## 6 ILS 31670313
## 7 INR 32388721
## 8 JPY 42861570
## 9 SGD 11502069
## 10 USD 5080602999
# Update Time for full dataset
## Extract Hour:Minute and timezone AM/PM indicator from datetime
df_treasury_stan$Update_time_HM <- format(as.POSIXct(df_treasury_stan$`Update time`), format = "%H:%M %p")
df_treasury_stan$month <- lubridate::month(df_treasury_stan$`Update date`)
df_treasury_stan$year <- lubridate::year(df_treasury_stan$`Update date`)
extract_am_pm <- function(time_string) {
am_pm <- substr(time_string, nchar(time_string) - 1, nchar(time_string))
return(am_pm)
}
df_treasury_stan$tz_cat <- ifelse(extract_am_pm(df_treasury_stan$Update_time_HM) == "AM", "Morning Hours" , "Afternoon Hours")
## data viz
df_treasury_stan[which(df_treasury_stan$year == 2020),] %>%
group_by(`Update date`) %>%
mutate(Number_account = n()) %>%
ggplot(aes(x = `Update date`, y = Number_account)) +
geom_line(aes(color = tz_cat, linetype = tz_cat)) +
labs(title="Changes in Account Numbers at Different Update Times of the Day", subtitle = "Year: 2020") +
scale_color_manual(values = c("#de2d26", "green", "#de2d26", "#de2d26", "#de2d26", "#de2d26", "#de2d26")) +
geom_smooth(method = "loess") +
theme(legend.position="bottom", plot.title = element_text(size=13))
## `geom_smooth()` using formula = 'y ~ x'

df_treasury_stan[which(df_treasury_stan$year == 2020),] %>%
group_by(`Update date`) %>%
mutate(Transaction_Volumn = sum(Flow_amt_usd)) %>%
ggplot(aes(x = `Update date`, y = Transaction_Volumn)) +
geom_line(aes(color = tz_cat, linetype = tz_cat)) +
labs(title="Changes in Transaction Volume at Different Update Times of the Day",subtitle = "Conversion in U.S Dollars, Year: 2020") +
scale_color_manual(values = c("#de2d26", "green", "#de2d26", "#de2d26", "#de2d26", "#de2d26", "#de2d26")) +
geom_smooth(method = "loess") +
theme(legend.position="bottom", plot.title = element_text(size=13))
## `geom_smooth()` using formula = 'y ~ x'

## Account
aggregate(df_treasury_stan$Flow_amt_usd, list( df_treasury_stan$Account), FUN=sum)
## Group.1 x
## 1 1 20108194.441
## 2 2 12787807.780
## 3 3 288681.306
## 4 4 327540.638
## 5 5 2285190074.000
## 6 6 1122038408.000
## 7 7 594024995.000
## 8 8 28290008.000
## 9 9 656694703.685
## 10 10 17360007.000
## 11 11 21138117.534
## 12 12 24480027.764
## 13 13 2426150.585
## 14 14 4711681.341
## 15 15 627291.639
## 16 16 66777623.310
## 17 17 170889.010
## 18 18 353171.772
## 19 19 46260030.000
## 20 20 5520529.517
## 21 21 158334016.892
## 22 22 68090033.000
## 23 23 23455843.141
## 24 24 340003.000
## 25 25 2250153.403
## 26 26 18074139.820
## 27 27 10853009.203
## 28 28 230004.000
## 29 29 12281210.079
## 30 30 2.312
## 31 31 31670310.292
## 32 32 60011.000
## 33 33 4519566.569
## 34 34 2784725.446
## 35 35 2007566.528
## 36 36 654921.090
## 37 37 1562759.566
## 38 38 441102.973
## 39 39 777024.420
## 40 40 165781371.692
## 41 41 5.232
## 42 42 224750015.000
## 43 43 11572517.823
## 44 44 490003.000
## 45 45 50089717.211
## 46 46 4673100.884
## 47 47 3220028.000
## 48 48 4348373.480
## 49 49 37354270.119
## 50 50 3520023.000
## 51 51 5617200.140
## 52 52 1405720.000
## 53 53 3380005.000
## 54 54 110600.000
## 55 55 31658900.630
## 56 56 2000000.000
## 57 57 975697.565
## 58 58 729820.364
## 59 59 34386684.147
## 60 60 1730476.147
# Flow status
aggregate(df_treasury_stan$Flow_amt_usd, list( df_treasury_stan$`Flow status`), FUN=sum)
## Group.1 x
## 1 Actual 5725803101
## 2 Confirmed forecast 109953764
# Budget code
aggregate(df_treasury_stan$Flow_amt_usd, list(df_treasury_stan$`Budget code`), FUN=sum)
## Group.1 x
## 1 -AP_RENT 459301.31
## 2 -AP_VEND 269032493.82
## 3 -BANKFEE 211571.36
## 4 -BENEFIT 2068713.85
## 5 -DEBT 20000000.00
## 6 -ESPP 16113436.57
## 7 -FX 82045378.98
## 8 -IC_LOAN 11100000.00
## 9 -ICD 542875008.00
## 10 -INT_ICL 382101.11
## 11 -INT_INC 11164.71
## 12 -INTERCO 172465101.41
## 13 -INVEST 8323390.48
## 14 -MISC 1612131.83
## 15 -PAYFEE 60009.00
## 16 -PAYFID 28020007.00
## 17 -PAYROL 159021132.69
## 18 -PAYTAX 187325487.41
## 19 -POOL 2953600.03
## 20 -SALETAX 2757003.00
## 21 -SWEEP 318200.00
## 22 -TAXES 20289916.85
## 23 -TRLEXP 9154248.52
## 24 -VDI 1810000.00
## 25 -ZBA 1250682142.84
## 26 -ZNOCODE 540048.57
## 27 +COLLECT 854498045.85
## 28 +ESPP 16250007.11
## 29 +FX 13811461.89
## 30 +IC_LOAN 14470000.00
## 31 +ICD 412953760.00
## 32 +INT_ICL 32271.11
## 33 +INT_INC 40738.13
## 34 +INTERCO 274570801.40
## 35 +MISC 17099390.00
## 36 +STOCK 36827856.33
## 37 +SWEEP 5744811.92
## 38 +TAXES 376880.74
## 39 +ZBA 1285662588.93
## 40 +ZNOCODE 3832898.20
## Count on Flow
df_treasury_stan <- df_treasury_stan %>%
# count by date
group_by(Flow) %>%
mutate(flow_cnt = n())
# Flow Description
aggregate(df_treasury_stan$flow_cnt, list(df_treasury_stan$`Flow description`), FUN=sum)
## Group.1 x
## 1 ACH Concentration Credit 4
## 2 ACH Credit Received 359862
## 3 ACH Debit Received 543528
## 4 ACH Return Item 25
## 5 ACH Return Item or Adjustment Settlement 49
## 6 ACH Settlement 1639714
## 7 Bank fees 97969
## 8 Bank Originated Debit 10260
## 9 Book Transfer Credit 9702
## 10 Book Transfer Debit 70225
## 11 Cash Flows NOT impacting balance calculation 5066
## 12 Check Deposit 1600
## 13 Credit interests on investment 1
## 14 Credit Reversal 81
## 15 Currency and Coin Deposited 1
## 16 Debit Adjustment 4
## 17 Debit Reversal 100
## 18 Deposited Item Returned 64
## 19 Domestic Letter of Credit 1
## 20 Domestic transfer 116084
## 21 Float Adjustment 16
## 22 Foreign exchange 9
## 23 Foreign Exchange Debit 8649
## 24 Foreign Exchange of Credit 121
## 25 Foreign Remittance Credit 3422
## 26 Interest Adjustment Debit 9
## 27 Interest Credit 361
## 28 Interest Debit 3969
## 29 Interest Received 1
## 30 Internal Money Transfer 225
## 31 International Money Transfer 361
## 32 International Money Transfer Debits 676
## 33 Issued Cheques 4
## 34 Letter of Credit 9
## 35 Loan Interests 16
## 36 Lockbox Deposit 15376
## 37 Miscellaneous ACH Credit 321789
## 38 Miscellaneous ACH Debit 2689340
## 39 Miscellaneous collection 5184
## 40 Miscellaneous Credit 16
## 41 Miscellaneous Debit 2401
## 42 Miscellaneous disbursement 7744
## 43 Miscellaneous Fee Refund 9
## 44 Miscellaneous Fees 27556
## 45 Miscellaneous Security Debit 1024
## 46 Other Deposit 16
## 47 Outgoing Internal Money Transfer 184665
## 48 Paid Check 217863
## 49 Preauthorized ACH Credit 8465719
## 50 Preauthorized ACH Debit 343396
## 51 Return Item 289
## 52 summary total automatic transfer debit 9409
## 53 Sweep Principal Buy 2116
## 54 Sweep Principal Sell 2601
## 55 Total ACh Debits 196
## 56 Wire Transfer 25511983
## 57 ZBA Credit 148710
## 58 ZBA Credit Transfer 42408
## 59 ZBA Debit 111556
## 60 ZBA Debit Transfer 37830