library(tidyr)
library(readr)
library(dplyr)
library(tidyverse)
library(tibble)
library(igraph)
library(data.table)
library(plyr)
library(ggplot2)
library(treemapify)
library(treemap)
library(lubridate)
Real Estate is a zero sum game. Real estate companies cannot increase the number of people who want to sell their homes so the only thing they can do is to increase their share of the finite market. There are two sides to a real estate transaction the Listing Side and the Selling side. The Listing Agent gets the home on the market the Selling Agent finds the buyer. There are more Selling Agents than Listing Agents . So to real estate companies it seems an obvious that the thing to do to increase sales is to increaseListing Agents focusing on the most succesful.This is the topic of monthly real estate meetings: How can we get the most succesful Listing Agents to switch to our company?
This project demonstrates that real estate companies are looking at
this question the wrong way. Real Estate companies see their world in Pie Charts when they should really be looking at Network Graphs. Real Estate companies want to recruit the biggest slices of those Listing Agents market share pie charts but this is almost almost futile. It is very difficult to get a top Listing Agent to switch firms. Things are going so very well for them where they are why should they take the gamble? But it turns out that each major Listing Agent has a coterie of agents that sell their listings. And by recruiting these Selling Agents one can replicate having the star Listing Agent in their firm. In fact, becasue of the difference between the commision splits between the star Listing Agent and the typical Selling Agent the company can actually make more money by focusing on gathering-up the coterie of the Selling Agentsthan if they had recruited the Listing Agent themself .
Typical real estate agents work on a 50/50 split. In a 4% commision on a $100,000 sale half goes to the Selling side and the remaining half goes tothe Listing side. Of that 50% the star Listing Agent might get as much as 70% and the firm the remaining 30%. The major Listing agents can get 70% or more of the commsion while the agent selling their gets a more typical 50% commision.
library(xtable)
commision_split<- fread('https://docs.google.com/spreadsheets/d/e/2PACX-1vSuclWJd3GlIRcgIcBHMGcOXI_NgaRMIQ1jqfOlV2BTvD7vGsQnuQhFGn7qp_rrUSP3FtuJSDfNoEEx/pub?output=csv')
print(commision_split)
## V1 Sale _Price Commsion_Split What Agent Nets
## 1: Listing_Agent 100000 70/30 1400
## 2: Selling_Agent 100000 50/50 2000
## What _Company_Nets
## 1: 600
## 2: 2000
library(data.table)
SeaCliff1<- fread('https://docs.google.com/spreadsheets/d/e/2PACX-1vRzU2os2K1_iwC7Gjk2r6TM03DxOyiRB644NLpCGMRpHpU2bGKNi8CUB8U5vG_Mkfc1wR2SQj8ah-c2/pub?output=csv')
head(SeaCliff1)
## Listing Date Listing Agent Selling Agent Sold Price
## 1: 5/23/2012 Sciubba, Terry Gioia, Kim K 490000
## 2: 1/1/2012 Cleary, Christina T Brix, Dee Dee H 585000
## 3: 3/12/2012 Harrigan, Patricia Jenney, Marilyn R 1050000
## 4: 3/26/2012 Moser, Patricia M Mindich, Melisa D 535000
## 5: 2/6/2012 Sciubba, Terry Vigliotti, Kim 1087500
## 6: 3/27/2012 Regolini, Lynn P Burke, Loraine 380000
# A preference
#eliminate scientific notation
options(scipen = 999)
# The names have spaces. That is going to give us grief somewehre down the line. Lets fix that.
names(SeaCliff1)<-c("Listing_Date","Listing_Agent","Selling_Agent","Sold_Price")
#head(SeaCliff1)
#Lets seperate the last and first names of the Listing_Agents.
SeaCliff1<-separate(SeaCliff1,col = "Listing_Agent",into = c("Listing_Last","Listing_First"),sep = ",")
#head(SeaCliff1)
#Now Lets seperate the last and first namse od the Selling_Agents.
SeaCliff1<-separate(SeaCliff1,col = "Selling_Agent",into = c("Selling_Last","Selling_First"),sep = ",")
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 7 rows [62,
## 72, 87, 167, 215, 216, 290].
#head(SeaCliff1)
#Now drop first names
SeaCliff1$Listing_First<- NULL
SeaCliff1$Selling_First<-NULL
#head(SeaCliff1)
names(SeaCliff1)<-c("Date","Listing","Selling","Price")
head(SeaCliff1)
## Date Listing Selling Price
## 1: 5/23/2012 Sciubba Gioia 490000
## 2: 1/1/2012 Cleary Brix 585000
## 3: 3/12/2012 Harrigan Jenney 1050000
## 4: 3/26/2012 Moser Mindich 535000
## 5: 2/6/2012 Sciubba Vigliotti 1087500
## 6: 3/27/2012 Regolini Burke 380000
#Lets make sure there are no rows with missing data.
na.omit(SeaCliff1)
## Date Listing Selling Price
## 1: 5/23/2012 Sciubba Gioia 490000
## 2: 1/1/2012 Cleary Brix 585000
## 3: 3/12/2012 Harrigan Jenney 1050000
## 4: 3/26/2012 Moser Mindich 535000
## 5: 2/6/2012 Sciubba Vigliotti 1087500
## ---
## 329: 6/7/2017 Heimer Parisi 739000
## 330: 1/31/2017 Maxwell Pournaras McAndrew 725000
## 331: 11/10/2017 Ross Ross 590000
## 332: 10/26/2017 Parisi Parisi 592000
## 333: 11/18/2017 Prinz Chase 300000
dim(SeaCliff1) #333,4
## [1] 333 4
head(SeaCliff1)
## Date Listing Selling Price
## 1: 5/23/2012 Sciubba Gioia 490000
## 2: 1/1/2012 Cleary Brix 585000
## 3: 3/12/2012 Harrigan Jenney 1050000
## 4: 3/26/2012 Moser Mindich 535000
## 5: 2/6/2012 Sciubba Vigliotti 1087500
## 6: 3/27/2012 Regolini Burke 380000
# we are only intersted in the year not the month nor date.
SeaCliff1$Date<-format(as.Date(SeaCliff1$Date,format = "%m/%d/%Y"),"%Y")
head(SeaCliff1)
## Date Listing Selling Price
## 1: 2012 Sciubba Gioia 490000
## 2: 2012 Cleary Brix 585000
## 3: 2012 Harrigan Jenney 1050000
## 4: 2012 Moser Mindich 535000
## 5: 2012 Sciubba Vigliotti 1087500
## 6: 2012 Regolini Burke 380000
Lets convert Date from character
SeaCliff1$Date <-as.numeric(SeaCliff1$Date)
head(SeaCliff1)
## Date Listing Selling Price
## 1: 2012 Sciubba Gioia 490000
## 2: 2012 Cleary Brix 585000
## 3: 2012 Harrigan Jenney 1050000
## 4: 2012 Moser Mindich 535000
## 5: 2012 Sciubba Vigliotti 1087500
## 6: 2012 Regolini Burke 380000
I need 2 data sets. One just by listing agents 2nd by each pair of list /sell agent.
SeaCliff2 <- SeaCliff1
head(SeaCliff2)
## Date Listing Selling Price
## 1: 2012 Sciubba Gioia 490000
## 2: 2012 Cleary Brix 585000
## 3: 2012 Harrigan Jenney 1050000
## 4: 2012 Moser Mindich 535000
## 5: 2012 Sciubba Vigliotti 1087500
## 6: 2012 Regolini Burke 380000
Now let us sum up all of the sales of each listing agent.
SC3<- SeaCliff2
Listing_Agents_Total<- ddply(SC3,c("Listing"), summarise,
all = sum(Price))
#Listing_Agents_Total
Listing_Agents_Total2<-Listing_Agents_Total
Listing_Agents_Total3<-arrange(Listing_Agents_Total2 , desc(all))
#Listing_Agents_Total3
blank_theme <- theme_minimal()+
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.border = element_blank(),
panel.grid=element_blank(),
axis.ticks = element_blank(),
plot.title=element_text(size=14, face="bold")
)
SC_Pie_1<- ggplot(Listing_Agents_Total3,aes(x="", y= all, fill= Listing))+
geom_bar(width= 1, stat= "identity")+
theme(legend.position = "none")
SC_Pie2<- SC_Pie_1+
coord_polar("y", start = 0)+
theme(legend.position = "none")
SC_Pie2
SC_treemap<- ggplot(Listing_Agents_Total3,aes(area=all, fill= Listing))+
geom_treemap()+
theme(legend.position = "none")
SC_treemap
First we must group by the Listing Agent and Selling Agent and sum their sales.
B<- SeaCliff2
#head(B)
Team<-ddply(B ,c("Listing","Selling"), summarise,
all= sum(Price))
#Team
SeaCliff_Top_20<- head(arrange(Listing_Agents_Total3, desc(all)),n=20)
#head(SeaCliff_Top_20,20)
graph_one<- graph_from_data_frame(Team, directed = FALSE)
plot(graph_one)
SeaCliff_Top_20<- head(arrange(Listing_Agents_Total3, desc(all)),n=20) #head(SeaCliff_Top_20,20)
Top_20_Teams <- head(arrange(Team, desc(all)), n=20)
#Top_20_Teams
graph_two<- graph_from_data_frame(Top_20_Teams, directed = TRUE)
plot(graph_two)