Banks and other financial institutions submit to the FDIC a report of all of the money held for deposit at each of their branches. A compilation of the reports from each bank is available as a single dataset in the FDIC’s website.
This report will detail the steps taken to analyze the FDIC ‘Summary of Deposit’ data for 2015.
Loading the needed libraries to run the code
library(scales)
library(data.table)
library(stringr)
library(plyr)
library(ggplot2)
library(maps)
library(ggthemes)
library(ggExtra)
all.sod <- as.data.frame(fread("ALL_2015.csv"))
# The totals of deposits per branch field had commas and was in Thousands
all.sod$DEPSUMBR <- str_replace_all(all.sod$DEPSUMBR, ",","")
all.sod$DEPSUMBR <- sapply(all.sod$DEPSUMBR, as.numeric)
all.sod$DEPSUMBR <- all.sod$DEPSUMBR * 1000
# The asset size per bank field had commas and was in Thousands
all.sod$ASSET <- str_replace_all(all.sod$ASSET, ",","")
all.sod$ASSET <- sapply(all.sod$ASSET, as.numeric)
all.sod$ASSET <- all.sod$ASSET * 1000
# Combined institutions that belong to the same holding company into the 'institution' field
all.sod$institution <- all.sod$NAMEHCR
all.sod$institution[is.null(all.sod$institution)] <- all.sod$NAMEFULL[is.null(all.sod$institution)]
all.sod$institution[all.sod$institution==""] <- all.sod$NAMEFULL[all.sod$institution==""]
all.sod$institution <- sapply(all.sod$institution, str_to_upper)
text.plot <- ggplot() + theme_fivethirtyeight() + removeGrid()
text.plot <- text.plot + theme(axis.title = element_blank())
text.plot <- text.plot + theme(axis.text.x = element_blank())
text.plot <- text.plot + theme(axis.text.y = element_blank())
text.plot <- text.plot + ylim(10,50) + xlim(0,50)
text.plot <- text.plot + geom_text(aes( x=25,y=47,label="Total in deposits at all branch locations"), size=8.5, color="brown")
text.plot <- text.plot + geom_text(aes( x=25,y=40,label=paste("$",comma(sum(all.sod$DEPSUMBR)),sep="")), size=16, color="brown")
text.plot <- text.plot + geom_errorbarh(aes(y=35.5, x=10,xmin=6, xmax=50), size=1, color="black")
text.plot <- text.plot + geom_text(aes( x=27,y=34,label="13 digits"), size=4, color="black")
text.plot <- text.plot + geom_rect(aes(xmin=3, ymin=17, xmax=23, ymax=30), fill="white")
text.plot <- text.plot + geom_text(aes( x=12,y=26.5,label=comma(length(unique(all.sod$institution)))), size=17, color="brown")
text.plot <- text.plot + geom_text(aes( x=12,y=20.5,label="Banks"), size=14.5, color="brown")
text.plot <- text.plot + geom_rect(aes(xmin=23, ymin=24, xmax=49, ymax=30), fill="brown")
text.plot <- text.plot + geom_text(aes( x=36,y=27,label=paste(comma(nrow(all.sod)), "Branches")), size=9, color="white")
text.plot <- text.plot + geom_text(aes( x=29,y=21,label=round(nrow(all.sod) / length(unique(all.sod$institution)), digits=2)), size=10, color="black")
text.plot <- text.plot + geom_text(aes( x=40,y=22,label="Average branches"), size=5, color="black")
text.plot <- text.plot + geom_text(aes( x=37,y=20,label="by Bank"), size=5, color="black")
print(text.plot)
print(paste("Bank count:" , comma(length(unique(all.sod$institution))), "| Branch count:",comma(nrow(all.sod))))
## [1] "Bank count: 5,651 | Branch count: 93,283"
Creating a dataset for the totals by bank
sod.by.bank.deposits <- aggregate(DEPSUMBR~institution, data=all.sod, FUN="sum")
sod.by.bank.branches <- count(all.sod,c("institution"))
sod.by.bank.assets <- aggregate(ASSET~institution, data=all.sod[all.sod$DEPSUM>0, ], FUN="sum")
sod.by.bank <- merge(x=sod.by.bank.deposits, y=sod.by.bank.branches, by="institution")
sod.by.bank <- merge(x=sod.by.bank, y=sod.by.bank.assets, by="institution")
colnames(sod.by.bank) <- c("institution","deposits","branches","assets")
sod.by.bank$branch_rank <- rank(sod.by.bank$deposits, ties.method = "first")
sod.by.bank$top_branch <- sod.by.bank$institution
sod.by.bank$top_branch[sod.by.bank$branch_rank< max(sod.by.bank$branch_rank)-4] <- ""
Creting the function to convert large numbers within the plot axis
abbrev_number <- function(x){
new_vector <- NULL
for(j in 1 : length(x)){
new_number<-"0"
if(is.na(x[j])){0 ; x[j]<-0}
if(x[j]>=1000){new_number <- paste(round(x[j] / 1000,1),"K", sep="")}
if(x[j]>=1000000){new_number <- paste(round(x[j] / 1000000,1),"M", sep="")}
if(x[j]>=1000000000){new_number <- paste(round(x[j] / 1000000000,1),"B", sep="")}
if(x[j]>=1000000000000){new_number <- paste(round(x[j] / 1000000000000,1),"T", sep="")}
new_vector <- c(new_vector,new_number)
}
return(new_vector)
}
fixed.mid <- 1000000000000
asset.plot <- ggplot() + theme_fivethirtyeight()
asset.plot <- asset.plot + geom_point(aes(x=sod.by.bank$deposits, y=sod.by.bank$branches,color=sod.by.bank$assets), size=sod.by.bank$assets/200000000000)
asset.plot <- asset.plot + geom_text(aes(hjust=1.1,label=sod.by.bank$top_branch, x=sod.by.bank$deposits, y=sod.by.bank$branches), size=2.5)
asset.plot <- asset.plot + theme(plot.title = element_text( face="bold", size=14))
asset.plot <- asset.plot + theme(axis.text.y = element_text( face="bold", size=9))
asset.plot <- asset.plot + theme(axis.text.x = element_text( face="bold", size=9))
asset.plot <- asset.plot + theme(axis.title = element_text(face="bold", size=10))
asset.plot <- asset.plot + theme(legend.text = element_text( face="bold", size=8))
asset.plot <- asset.plot + theme(legend.title = element_text( face="bold", size=8))
asset.plot <- asset.plot + theme(legend.position="right",legend.direction="vertical",legend.text.align=0)
asset.plot <- asset.plot + scale_x_continuous(labels =abbrev_number)
asset.plot <- asset.plot + scale_y_continuous(labels =abbrev_number)
asset.plot <- asset.plot + scale_colour_gradient2(name="Asset$", breaks=c(min(sod.by.bank$assets), fixed.mid, max(sod.by.bank$assets)), labels=c(abbrev_number(min(sod.by.bank$assets)),abbrev_number(fixed.mid), abbrev_number(max(sod.by.bank$assets))), low = "blue", mid="brown" , high = "orange", midpoint = fixed.mid)
asset.plot <- asset.plot + labs(title ="Number of Branches and Amount in Deposits by Bank", x = "Deposits $", y ="Branches" )
print(asset.plot)
top3 <- sod.by.bank[sod.by.bank$branch_rank>=max(sod.by.bank$branch_rank)-2,c(1,2,3)]
print(paste(top3$institution, comma(top3$deposits), comma(top3$branches), sep=" | "))
## [1] "BANK OF AMERICA CORPORATION | 1,185,875,742,000 | 4,862"
## [2] "JPMORGAN CHASE & CO. | 1,130,503,863,000 | 5,548"
## [3] "WELLS FARGO & COMPANY | 1,101,883,515,000 | 6,236"
print(paste("Total deposits:", comma(sum(top3$deposits)), "| Total branches:" , comma(sum(top3$branches))))
## [1] "Total deposits: 3,418,263,120,000 | Total branches: 16,646"
print(paste("% of deposits:", round(sum(top3$deposits)/sum(all.sod$DEPSUMBR), digits = 2),"| % of branches", round(sum(top3$branches)/nrow(all.sod),digits=2)))
## [1] "% of deposits: 0.32 | % of branches 0.18"
top5 <- sod.by.bank[sod.by.bank$branch_rank<=max(sod.by.bank$branch_rank)-3 & sod.by.bank$branch_rank>=max(sod.by.bank$branch_rank)-4,c(1,2,3)]
print(paste(top5$institution, comma(top5$deposits), comma(top5$branches), sep=" | "))
## [1] "CITIGROUP INC. | 468,350,590,000 | 812"
## [2] "U.S. BANCORP | 279,550,033,000 | 3,221"
all.count <- NULL
index.count <-NULL
for(j in 1 : max(10)){
current.count <- length(sod.by.bank$deposits[sod.by.bank$branches<=j])/ nrow(sod.by.bank)
all.count <- c(all.count, current.count)
index.count <- c(index.count, j)
}
branch.count <- as.data.frame(cbind(index.count, all.count))
bar.labels <- paste((round(all.count,digits = 2))*100, "%", sep="" )
branch.bar <- ggplot(data=branch.count, aes(index.count, all.count))
branch.bar <- branch.bar + geom_bar(stat="identity", fill="orange")
branch.bar <- branch.bar + theme_fivethirtyeight()
branch.bar <- branch.bar + theme(plot.title = element_text( face="bold", size=16))
branch.bar <- branch.bar + theme(axis.title = element_text( face="bold", size=10))
branch.bar <- branch.bar + theme(axis.text.x = element_text( face="bold", size=9))
branch.bar <- branch.bar + theme(legend.text = element_text( face="bold", size=8))
branch.bar <- branch.bar + theme(legend.title = element_text( face="bold", size=8))
branch.bar <- branch.bar + scale_x_continuous(breaks =c(0:10))
branch.bar <- branch.bar + scale_y_continuous(breaks =c(1:10)/10, labels=NULL)
branch.bar <- branch.bar + geom_text(aes(label=bar.labels, x=index.count, y=all.count+0.05))
branch.bar <- branch.bar + labs(title ="Branch count by Bank", x = "Max number of branches", y =NULL)
print(branch.bar)
east <- all.sod[all.sod$SIMS_LONGITUDE<=-70 & all.sod$SIMS_LONGITUDE>-97.25,]
west <- all.sod[all.sod$SIMS_LONGITUDE<=-97.25 & all.sod$SIMS_LONGITUDE>=-124.5,]
map.branch <- ggplot()+ geom_point(data=east, aes(x=SIMS_LONGITUDE, y=SIMS_LATITUDE),color="red", size=0.001)
map.branch <- map.branch + geom_point(data=west, aes(x=SIMS_LONGITUDE, y=SIMS_LATITUDE),color="brown", size=0.001)
map.branch <- map.branch + theme_fivethirtyeight()
map.branch <- map.branch + xlim(-125,-70)+ylim(20,50)
map.branch <- map.branch + theme(plot.title = element_text( face="bold", size=16))
map.branch <- map.branch + removeGrid(x=TRUE, y =TRUE)
map.branch <- map.branch + geom_errorbarh(aes(y=24, x=-80,xmin=-97.25, xmax=-70))
map.branch <- map.branch + geom_errorbarh(aes(y=24, x=-100,xmin=-97.25, xmax=-124.5))
map.branch <- map.branch + geom_text(aes(size=6, y=25, x= -110, label=paste(comma(nrow(west)),"branches")),size=4)
map.branch <- map.branch + geom_text(aes(size=6, y=23, x= -110, label=paste("$",abbrev_number(sum(west$DEPSUMBR, na.rm=TRUE))," deposits", sep="")),size=4)
map.branch <- map.branch + geom_text(aes(size=6, y=25, x= -85, label=paste(comma(nrow(east)),"branches")),size=4)
map.branch <- map.branch + geom_text(aes(size=6, y=23, x= -85, label=paste("$", abbrev_number(sum(east$DEPSUMBR, na.rm=TRUE))," deposits", sep="")),size=4)
map.branch <- map.branch + geom_text(aes( y=23, x= -70, label=paste("Lon", "-70")),size=2)
map.branch <- map.branch + geom_text(aes( y=23, x= -97.25, label=paste("Lon", "-97.25")),size=2)
map.branch <- map.branch + geom_text(aes( y=23, x= -124.5, label=paste("Lon", "-124.5")),size=2)
map.branch <- map.branch + labs(title ="Branch locations in the lower 48 states")
map.branch <- map.branch + theme(axis.ticks = element_blank(), axis.text.x = element_blank(), axis.text.y = element_blank())
print(map.branch)
Here are the branch counts for the top 10 branches
sod.by.state.deposits <- aggregate(DEPSUMBR~STALPBR, data=all.sod, FUN="sum")
sod.by.state.branches <- count(all.sod,c("STALPBR"))
sod.by.state <- merge(x=sod.by.state.deposits, y=sod.by.state.branches, by=c("STALPBR"))
colnames(sod.by.state) <- c("state","deposits","branches")
sod.by.state$branch_rank <- rank(sod.by.state$branches, ties.method = "first")
top10 <- sod.by.state[sod.by.state$branch_rank>=max(sod.by.state$branch_rank)-9,]
print(paste(top10$state, ": " ,top10$branches, sep=""))
## [1] "CA: 7099" "FL: 5388" "IL: 4629" "MI: 2743" "NC: 2528" "NJ: 3118"
## [7] "NY: 5270" "OH: 3839" "PA: 4394" "TX: 6730"
Decided that it would be cool to use ‘ggplot2’ to create the logo using the same branch location plot on item 5 but with a black background with yellow dots.
map.dark <- ggplot()+ geom_point(data=all.sod, aes(x=SIMS_LONGITUDE, y=SIMS_LATITUDE),color="yellow", size=0.001)
map.dark <- map.dark + theme_hc(bgcolor = "darkunica")
map.dark <- map.dark + xlim(-125,-70)+ylim(20,50)
map.dark <- map.dark + removeGrid(x=TRUE, y =TRUE)
map.dark <- map.dark + theme(axis.title = element_blank())
map.dark <- map.dark + geom_text(aes( y=40.3, x= -97.97, label="Open Data & Banking"),size=12, color="black")
map.dark <- map.dark + geom_text(aes( y=40, x= -98, label="Open Data & Banking"),size=12, color="white")
map.dark <- map.dark + labs(title =NULL)
map.dark <- map.dark + theme(axis.ticks = element_blank(), axis.text.x = element_blank(), axis.text.y = element_blank())
print(map.dark)
```