Background

Banks and other financial institutions submit to the FDIC a report of all of the money held for deposit at each of their branches. A compilation of the reports from each bank is available as a single dataset in the FDIC’s website.

This report will detail the steps taken to analyze the FDIC ‘Summary of Deposit’ data for 2015.

The analysis

Loading the needed libraries to run the code

library(scales)
library(data.table)
library(stringr)
library(plyr)
library(ggplot2)
library(maps)
library(ggthemes)
library(ggExtra)

Initial data load

all.sod <- as.data.frame(fread("ALL_2015.csv"))

# The totals of deposits per branch field had commas and was in Thousands
all.sod$DEPSUMBR <- str_replace_all(all.sod$DEPSUMBR, ",","")
all.sod$DEPSUMBR <- sapply(all.sod$DEPSUMBR, as.numeric)
all.sod$DEPSUMBR <- all.sod$DEPSUMBR * 1000 
# The asset size per bank field had commas and was in Thousands
all.sod$ASSET <- str_replace_all(all.sod$ASSET, ",","")
all.sod$ASSET <- sapply(all.sod$ASSET, as.numeric)
all.sod$ASSET <- all.sod$ASSET * 1000
# Combined institutions that belong to the same holding company into the 'institution' field
all.sod$institution <- all.sod$NAMEHCR
all.sod$institution[is.null(all.sod$institution)] <- all.sod$NAMEFULL[is.null(all.sod$institution)]
all.sod$institution[all.sod$institution==""] <- all.sod$NAMEFULL[all.sod$institution==""]
all.sod$institution <- sapply(all.sod$institution, str_to_upper)

1. In 2015, $10 .6 Trillion dollars in total deposits were reported

text.plot <- ggplot() + theme_fivethirtyeight() + removeGrid()
text.plot <- text.plot + theme(axis.title = element_blank())
text.plot <- text.plot + theme(axis.text.x = element_blank())
text.plot <- text.plot + theme(axis.text.y = element_blank())
text.plot <- text.plot + ylim(10,50) + xlim(0,50)
text.plot <- text.plot + geom_text(aes( x=25,y=47,label="Total in deposits at all branch locations"), size=8.5, color="brown")
text.plot <- text.plot + geom_text(aes( x=25,y=40,label=paste("$",comma(sum(all.sod$DEPSUMBR)),sep="")), size=16, color="brown")
text.plot <- text.plot + geom_errorbarh(aes(y=35.5, x=10,xmin=6, xmax=50), size=1, color="black")
text.plot <- text.plot + geom_text(aes( x=27,y=34,label="13 digits"), size=4, color="black")

text.plot <- text.plot + geom_rect(aes(xmin=3, ymin=17, xmax=23, ymax=30), fill="white")

text.plot <- text.plot + geom_text(aes( x=12,y=26.5,label=comma(length(unique(all.sod$institution)))), size=17, color="brown")
text.plot <- text.plot + geom_text(aes( x=12,y=20.5,label="Banks"), size=14.5, color="brown")
text.plot <- text.plot + geom_rect(aes(xmin=23, ymin=24, xmax=49, ymax=30), fill="brown")
text.plot <- text.plot + geom_text(aes( x=36,y=27,label=paste(comma(nrow(all.sod)), "Branches")), size=9, color="white")
text.plot <- text.plot + geom_text(aes( x=29,y=21,label=round(nrow(all.sod) / length(unique(all.sod$institution)), digits=2)), size=10, color="black")
text.plot <- text.plot + geom_text(aes( x=40,y=22,label="Average branches"), size=5, color="black")
text.plot <- text.plot + geom_text(aes( x=37,y=20,label="by Bank"), size=5, color="black")

print(text.plot)

2. The data identifies 93 thousand branches distributed over 5,600

print(paste("Bank count:" , comma(length(unique(all.sod$institution))), "| Branch count:",comma(nrow(all.sod))))
## [1] "Bank count: 5,651 | Branch count: 93,283"

3. One tenth of 1% of the banks have 32% of all deposits and 18% of all of the branches

Creating a dataset for the totals by bank

sod.by.bank.deposits <- aggregate(DEPSUMBR~institution, data=all.sod, FUN="sum")
sod.by.bank.branches <- count(all.sod,c("institution"))
sod.by.bank.assets <- aggregate(ASSET~institution, data=all.sod[all.sod$DEPSUM>0, ], FUN="sum")
sod.by.bank <- merge(x=sod.by.bank.deposits, y=sod.by.bank.branches, by="institution")
sod.by.bank <- merge(x=sod.by.bank, y=sod.by.bank.assets, by="institution")
colnames(sod.by.bank) <- c("institution","deposits","branches","assets")
sod.by.bank$branch_rank <- rank(sod.by.bank$deposits, ties.method = "first")
sod.by.bank$top_branch  <- sod.by.bank$institution
sod.by.bank$top_branch[sod.by.bank$branch_rank< max(sod.by.bank$branch_rank)-4] <- ""

Creting the function to convert large numbers within the plot axis

abbrev_number <-  function(x){
  new_vector <- NULL
  for(j in 1 : length(x)){
    new_number<-"0"
    if(is.na(x[j])){0 ; x[j]<-0}
    if(x[j]>=1000){new_number <- paste(round(x[j] / 1000,1),"K", sep="")}
    if(x[j]>=1000000){new_number <- paste(round(x[j] / 1000000,1),"M", sep="")}
    if(x[j]>=1000000000){new_number <- paste(round(x[j] / 1000000000,1),"B", sep="")}
    if(x[j]>=1000000000000){new_number <- paste(round(x[j] / 1000000000000,1),"T", sep="")}
    new_vector <- c(new_vector,new_number)
  }
return(new_vector)
}
fixed.mid <- 1000000000000
asset.plot <- ggplot() + theme_fivethirtyeight()
asset.plot <- asset.plot + geom_point(aes(x=sod.by.bank$deposits, y=sod.by.bank$branches,color=sod.by.bank$assets), size=sod.by.bank$assets/200000000000)
asset.plot <- asset.plot + geom_text(aes(hjust=1.1,label=sod.by.bank$top_branch, x=sod.by.bank$deposits, y=sod.by.bank$branches), size=2.5)
asset.plot <- asset.plot + theme(plot.title = element_text( face="bold", size=14))
asset.plot <- asset.plot + theme(axis.text.y = element_text( face="bold", size=9))
asset.plot <- asset.plot + theme(axis.text.x = element_text( face="bold", size=9))
asset.plot <- asset.plot + theme(axis.title = element_text(face="bold", size=10))
asset.plot <- asset.plot + theme(legend.text = element_text( face="bold", size=8))
asset.plot <- asset.plot + theme(legend.title = element_text( face="bold", size=8))     
asset.plot <- asset.plot + theme(legend.position="right",legend.direction="vertical",legend.text.align=0)
asset.plot <- asset.plot + scale_x_continuous(labels =abbrev_number)
asset.plot <- asset.plot + scale_y_continuous(labels =abbrev_number)
asset.plot <- asset.plot + scale_colour_gradient2(name="Asset$", breaks=c(min(sod.by.bank$assets), fixed.mid,  max(sod.by.bank$assets)), labels=c(abbrev_number(min(sod.by.bank$assets)),abbrev_number(fixed.mid),  abbrev_number(max(sod.by.bank$assets))), low = "blue", mid="brown" , high = "orange", midpoint = fixed.mid)
asset.plot <- asset.plot + labs(title ="Number of Branches and Amount in Deposits by Bank",  x = "Deposits $", y ="Branches" )
print(asset.plot)

top3 <- sod.by.bank[sod.by.bank$branch_rank>=max(sod.by.bank$branch_rank)-2,c(1,2,3)]
print(paste(top3$institution, comma(top3$deposits), comma(top3$branches), sep=" | "))
## [1] "BANK OF AMERICA CORPORATION | 1,185,875,742,000 | 4,862"
## [2] "JPMORGAN CHASE & CO. | 1,130,503,863,000 | 5,548"       
## [3] "WELLS FARGO & COMPANY | 1,101,883,515,000 | 6,236"
print(paste("Total deposits:", comma(sum(top3$deposits)), "| Total branches:" , comma(sum(top3$branches))))
## [1] "Total deposits: 3,418,263,120,000 | Total branches: 16,646"
print(paste("% of deposits:", round(sum(top3$deposits)/sum(all.sod$DEPSUMBR), digits = 2),"| % of branches", round(sum(top3$branches)/nrow(all.sod),digits=2)))
## [1] "% of deposits: 0.32 | % of branches 0.18"
top5 <- sod.by.bank[sod.by.bank$branch_rank<=max(sod.by.bank$branch_rank)-3 & sod.by.bank$branch_rank>=max(sod.by.bank$branch_rank)-4,c(1,2,3)]
print(paste(top5$institution, comma(top5$deposits), comma(top5$branches), sep=" | "))
## [1] "CITIGROUP INC. | 468,350,590,000 | 812"
## [2] "U.S. BANCORP | 279,550,033,000 | 3,221"

4. 82% of banks reported deposits in 10 branches or less

all.count <- NULL
index.count <-NULL
for(j in 1 : max(10)){
  current.count <- length(sod.by.bank$deposits[sod.by.bank$branches<=j])/ nrow(sod.by.bank)
  all.count <- c(all.count, current.count) 
  index.count <- c(index.count, j)
}
branch.count <- as.data.frame(cbind(index.count, all.count))

bar.labels <- paste((round(all.count,digits = 2))*100, "%", sep="" )
branch.bar <- ggplot(data=branch.count, aes(index.count, all.count)) 
branch.bar <- branch.bar + geom_bar(stat="identity", fill="orange")
branch.bar <- branch.bar + theme_fivethirtyeight() 
branch.bar <- branch.bar + theme(plot.title = element_text( face="bold", size=16))
branch.bar <- branch.bar + theme(axis.title = element_text( face="bold", size=10))
branch.bar <- branch.bar + theme(axis.text.x = element_text( face="bold", size=9))
branch.bar <- branch.bar + theme(legend.text = element_text( face="bold", size=8))
branch.bar <- branch.bar + theme(legend.title = element_text( face="bold", size=8))
branch.bar <- branch.bar + scale_x_continuous(breaks =c(0:10))
branch.bar <- branch.bar + scale_y_continuous(breaks =c(1:10)/10, labels=NULL)
branch.bar <- branch.bar + geom_text(aes(label=bar.labels, x=index.count, y=all.count+0.05))
branch.bar <- branch.bar + labs(title ="Branch count by Bank",  x = "Max number of branches", y =NULL)
print(branch.bar)

5. 3 out of 4 branches are in the eastern half of the US

east <- all.sod[all.sod$SIMS_LONGITUDE<=-70 & all.sod$SIMS_LONGITUDE>-97.25,]
west <- all.sod[all.sod$SIMS_LONGITUDE<=-97.25 & all.sod$SIMS_LONGITUDE>=-124.5,]

map.branch <- ggplot()+ geom_point(data=east, aes(x=SIMS_LONGITUDE, y=SIMS_LATITUDE),color="red", size=0.001)
map.branch <- map.branch + geom_point(data=west, aes(x=SIMS_LONGITUDE, y=SIMS_LATITUDE),color="brown", size=0.001)
map.branch <- map.branch + theme_fivethirtyeight()
map.branch <- map.branch + xlim(-125,-70)+ylim(20,50)
map.branch <- map.branch + theme(plot.title = element_text( face="bold", size=16))
map.branch <- map.branch + removeGrid(x=TRUE, y =TRUE)
map.branch <- map.branch + geom_errorbarh(aes(y=24, x=-80,xmin=-97.25, xmax=-70))
map.branch <- map.branch + geom_errorbarh(aes(y=24, x=-100,xmin=-97.25, xmax=-124.5))
map.branch <- map.branch + geom_text(aes(size=6, y=25, x= -110, label=paste(comma(nrow(west)),"branches")),size=4)
map.branch <- map.branch + geom_text(aes(size=6, y=23, x= -110, label=paste("$",abbrev_number(sum(west$DEPSUMBR, na.rm=TRUE))," deposits", sep="")),size=4)
map.branch <- map.branch + geom_text(aes(size=6, y=25, x= -85, label=paste(comma(nrow(east)),"branches")),size=4)
map.branch <- map.branch + geom_text(aes(size=6, y=23, x= -85, label=paste("$", abbrev_number(sum(east$DEPSUMBR, na.rm=TRUE))," deposits", sep="")),size=4)
map.branch <- map.branch + geom_text(aes( y=23, x= -70, label=paste("Lon", "-70")),size=2)
map.branch <- map.branch + geom_text(aes( y=23, x= -97.25, label=paste("Lon", "-97.25")),size=2)
map.branch <- map.branch + geom_text(aes( y=23, x= -124.5, label=paste("Lon", "-124.5")),size=2)
map.branch <- map.branch + labs(title ="Branch locations in the lower 48 states")
map.branch <- map.branch + theme(axis.ticks = element_blank(), axis.text.x = element_blank(), axis.text.y = element_blank())

print(map.branch)

Here are the branch counts for the top 10 branches

sod.by.state.deposits <- aggregate(DEPSUMBR~STALPBR, data=all.sod, FUN="sum")
sod.by.state.branches <- count(all.sod,c("STALPBR"))
sod.by.state <- merge(x=sod.by.state.deposits, y=sod.by.state.branches, by=c("STALPBR"))
colnames(sod.by.state) <- c("state","deposits","branches")
sod.by.state$branch_rank <- rank(sod.by.state$branches, ties.method = "first")
top10 <- sod.by.state[sod.by.state$branch_rank>=max(sod.by.state$branch_rank)-9,]
print(paste(top10$state, ": " ,top10$branches, sep=""))
##  [1] "CA: 7099" "FL: 5388" "IL: 4629" "MI: 2743" "NC: 2528" "NJ: 3118"
##  [7] "NY: 5270" "OH: 3839" "PA: 4394" "TX: 6730"

Logo for the post

Decided that it would be cool to use ‘ggplot2’ to create the logo using the same branch location plot on item 5 but with a black background with yellow dots.

map.dark <- ggplot()+ geom_point(data=all.sod, aes(x=SIMS_LONGITUDE, y=SIMS_LATITUDE),color="yellow", size=0.001)
map.dark <- map.dark + theme_hc(bgcolor = "darkunica")
map.dark <- map.dark + xlim(-125,-70)+ylim(20,50)
map.dark <- map.dark + removeGrid(x=TRUE, y =TRUE)
map.dark <- map.dark + theme(axis.title = element_blank())
map.dark <- map.dark + geom_text(aes( y=40.3, x= -97.97, label="Open Data & Banking"),size=12, color="black")
map.dark <- map.dark + geom_text(aes( y=40, x= -98, label="Open Data & Banking"),size=12, color="white")
map.dark <- map.dark + labs(title =NULL)
map.dark <- map.dark + theme(axis.ticks = element_blank(), axis.text.x = element_blank(), axis.text.y = element_blank())

print(map.dark)

```