The primary purpose of this document is to store the code and considerations for the Baseline Model
Folder: Final Documents
# House Price Data
## HPM04 - Monthly Series
HPM04 <- cso_get_data("HPM04")
HPM04 <- HPM04 %>%
pivot_longer(!1:5, names_to = "year_month")
HPM04$Year <-substr(HPM04$year_month,1,4)
HPM04$Month <- sub(".* ", "", HPM04$year_month)
HPM04$Month_NR <- as.integer(factor(HPM04$Month, levels=month.name))
HPM04$Date <- as.yearmon(paste(HPM04$Year, HPM04$Month_NR), "%Y %m")
GEO_Merge_HPM04<- GEO_Merge_HPM04 %>%
rename(Eircode.Output=`Eircode Output`)
HPM04 <- full_join(HPM04, GEO_Merge_HPM04, by = "Eircode.Output")
HPM04<-HPM04 %>%
filter(Statistic == "Mean Sale Price")%>%
filter(Dwelling.Status == "All Dwelling Statuses") %>%
filter(Stamp.Duty.Event == "Executions") %>%
filter(Type.of.Buyer == "All Buyer Types") %>%
filter(Eircode.Output != "All")
HPM04.1 <- HPM04 %>%
group_by(Year,Constituency,Eircode.Output)%>%
summarise(Avg_HP = mean(value, na.rm=TRUE))
## HPA04 - Yearly Series
HPA04 <- cso_get_data("HPA04")
HPA04 <- HPA04 %>%
pivot_longer(!1:5, names_to = "year")
HPA04 <- full_join(HPA04, GEO_Merge_HPM04, by = "Eircode.Output")
HPA04<-HPA04 %>%
filter(Statistic == "Mean Sale Price")%>%
filter(Dwelling.Status == "All Dwelling Statuses") %>%
filter(Stamp.Duty.Event == "Executions") %>%
filter(Type.of.Buyer == "All Buyer Types") %>%
filter(Eircode.Output != "All")
HPA04.1 <- HPA04 %>%
group_by(year,Constituency,Eircode.Output)%>%
summarise(Avg_HP = mean(value, na.rm=TRUE))
# RTB Data
RIA02 <- cso_get_data("RIA02")
RIA02 <- RIA02%>%
pivot_longer(!1:4, names_to = "year")
RIA02 <- merge(GEO_Merge_RIA02, RIA02, by.x = "Location", by.y="Location")
RIA02 <- full_join(RIA02, RTB_Exclude, by = "Location")
# The first iteration to include All Bedrooms, All Property Types
RIA02.1 <- RIA02 %>%
filter(Number.of.Bedrooms=="All bedrooms")%>%
filter(Property.Type=="All property types")%>%
filter(`Excluded (=0)`=="1")
HPM04 contains house price data by Eircode Ouput, the series is monthly. - This was not used for the Thesis baseline model, HPA04 was used as to avoid having to aggregate months to years. However “Model 3B” shows the variance in coefficients if this dataset was used and monthly averages were aggregated to year.
# Calculate average HPI in desired format
HPM04_GE <- merge(HPM04.1, GE_Year, by.x = "Year", by.y = "Year")
HPM04_GE <- HPM04_GE %>%
group_by(Eircode.Output) %>%
dplyr::mutate(lag1 = dplyr::lag(Avg_HP, n = 1, default = NA))
HPM04_GE$HPI <- (HPM04_GE$Avg_HP - HPM04_GE$lag1)/HPM04_GE$lag1
HPM04_GE <- HPM04_GE%>%
filter(GE!="NA")
# Calculate total HPI in desired format
HPM04_GE$Y1 <- ifelse(HPM04_GE$GE == "GE 2016",MINIFS(HPM04_GE$Year,HPM04_GE$GE,"GE 2016"),MINIFS(HPM04_GE$Year,HPM04_GE$GE,"GE 2020"))
HPM04_GE$Yt <- ifelse(HPM04_GE$GE == "GE 2016",MAXIFS(HPM04_GE$Year,HPM04_GE$GE,"GE 2016"),MAXIFS(HPM04_GE$Year,HPM04_GE$GE,"GE 2020"))
HPM04_GE$Find_L <- paste(HPM04_GE$Year,HPM04_GE$Eircode.Output,sep="-")
HPM04_GE$Y1_L <- paste(HPM04_GE$Y1,HPM04_GE$Eircode.Output,sep="-")
HPM04_GE$Yt_L <- paste(HPM04_GE$Yt,HPM04_GE$Eircode.Output,sep="-")
Find_HPM04_L1 <- HPM04_GE %>%
group_by(Year,Y1_L) %>%
select(4,11)
Find_HPM04_L1 <- Find_HPM04_L1 %>%
filter(Year == "2011"|Year == "2016")
HPM04_Merge <- merge(HPM04_GE,Find_HPM04_L1,"Y1_L")
HPM04_Merge <- HPM04_Merge %>%
rename(Avg_HP=Avg_HP.x)
HPM04_Merge<- HPM04_Merge %>%
rename(HP_Y1=Avg_HP.y)
Find_HPM04_L2 <- HPM04_Merge %>%
group_by(Year.x,Yt_L) %>%
select(4,5)
Find_HPM04_L2 <- Find_HPM04_L2 %>%
filter(Year.x == "2015"|Year.x == "2019")
HPM04_Merge <- merge(HPM04_Merge,Find_HPM04_L2,"Yt_L")
HPM04_Merge <- HPM04_Merge %>%
rename(Avg_HP=Avg_HP.x)
HPM04_Merge <- HPM04_Merge %>%
rename(HP_Yt=Avg_HP.y)
HPM04_Merge <- HPM04_Merge %>%
rename(Year=Year.x.x)
HPM04_Final<- HPM04_Merge %>%
select(3:11,14,17)
HPM04_Final$Diff <- HPM04_Final$HP_Yt - HPM04_Final$HP_Y1
HPM04_Final$DiffPc <- HPM04_Final$Diff/HPM04_Final$HP_Y1
# Format the data in "Model" format for Fixed Effects
options(scipen=999)
Model.HPM04 <- HPM04_GE %>%
group_by(Constituency,GE)%>%
summarise(Avg_HPI=mean(HPI,na.rm=TRUE))
Model.HPM04 <- Model.HPM04[order(Model.HPM04$GE),]
Model.HPM04.Tot <- HPM04_Final %>%
group_by(Eircode.Output.x,Constituency,GE)%>%
summarise(Avg_HP_Y1=mean(HP_Y1,na.rm=TRUE),
Avg_HP_Yt=mean(HP_Yt,na.rm=TRUE))
Model.HPM04.Tot <- Model.HPM04.Tot %>%
group_by(Constituency,GE) %>%
summarise(Avg_HP_Y1=mean(Avg_HP_Y1,na.rm=TRUE),
Avg_HP_Yt=mean(Avg_HP_Yt,na.rm=TRUE))
Model.HPM04.Tot$Tot_HPI <- (Model.HPM04.Tot$Avg_HP_Yt-Model.HPM04.Tot$Avg_HP_Y1)/Model.HPM04.Tot$Avg_HP_Y1
Model.HPM04.Tot <- Model.HPM04.Tot[order(Model.HPM04.Tot$GE),]
Model_Vars.HPM04 <- Model.HPM04 %>% right_join(Model.HPM04.Tot,by=c("Constituency","GE"))
TableHPM04 <- Model_Vars.HPM04 %>%
group_by(GE)%>%
summarise(Avg_HPI = mean(Avg_HPI),
Tot_HPI = mean(Tot_HPI))
# Calculate average HPI in desired format
HPA04_GE <- merge(HPA04.1, GE_Year, by.x = "year", by.y = "Year")
HPA04_GE <- HPA04_GE %>%
group_by(Eircode.Output) %>%
dplyr::mutate(lag1 = dplyr::lag(Avg_HP, n = 1, default = NA))
HPA04_GE$HPI <- (HPA04_GE$Avg_HP - HPA04_GE$lag1)/HPA04_GE$lag1
HPA04_GE <- HPA04_GE%>%
filter(GE!="NA")
# Calculate total HPI in desired format
HPA04_GE$Y1 <- ifelse(HPA04_GE$GE == "GE 2016",MINIFS(HPA04_GE$year,HPA04_GE$GE,"GE 2016"),MINIFS(HPA04_GE$year,HPA04_GE$GE,"GE 2020"))
HPA04_GE$Yt <- ifelse(HPA04_GE$GE == "GE 2016",MAXIFS(HPA04_GE$year,HPA04_GE$GE,"GE 2016"),MAXIFS(HPA04_GE$year,HPA04_GE$GE,"GE 2020"))
HPA04_GE$Find_L <- paste(HPA04_GE$year,HPA04_GE$Eircode.Output,sep="-")
HPA04_GE$Y1_L <- paste(HPA04_GE$Y1,HPA04_GE$Eircode.Output,sep="-")
HPA04_GE$Yt_L <- paste(HPA04_GE$Yt,HPA04_GE$Eircode.Output,sep="-")
Find_HPA04_L1 <- HPA04_GE %>%
group_by(year,Y1_L) %>%
select(4,11)
Find_HPA04_L1 <- Find_HPA04_L1 %>%
filter(year == "2011"|year == "2016")
HPA04_Merge <- merge(HPA04_GE,Find_HPA04_L1,"Y1_L")
HPA04_Merge <- HPA04_Merge %>%
rename(Avg_HP=Avg_HP.x)
HPA04_Merge<- HPA04_Merge %>%
rename(HP_Y1=Avg_HP.y)
Find_HPA04_L2 <- HPA04_Merge %>%
group_by(year.x,Yt_L) %>%
select(4,5)
Find_HPA04_L2 <- Find_HPA04_L2 %>%
filter(year.x == "2015"|year.x == "2019")
HPA04_Merge <- merge(HPA04_Merge,Find_HPA04_L2,"Yt_L")
HPA04_Merge <- HPA04_Merge %>%
rename(Avg_HP=Avg_HP.x)
HPA04_Merge <- HPA04_Merge %>%
rename(HP_Yt=Avg_HP.y)
HPA04_Merge <- HPA04_Merge %>%
rename(year=year.x.x)
HPA04_Final<- HPA04_Merge %>%
select(3:11,14,17)
HPA04_Final$Diff <- HPA04_Final$HP_Yt - HPA04_Final$HP_Y1
HPA04_Final$DiffPc <- HPA04_Final$Diff/HPA04_Final$HP_Y1
# Format the data in "Model" format for Fixed Effects
Model.HPA04 <- HPA04_GE %>%
group_by(Constituency,GE)%>%
summarise(Avg_HPI=mean(HPI,na.rm=TRUE))
Model.HPA04 <- Model.HPA04[order(Model.HPA04$GE),]
Model.HPA04.Tot <- HPA04_Final %>%
group_by(Eircode.Output.x,Constituency,GE)%>%
summarise(Avg_HP_Y1=mean(HP_Y1,na.rm=TRUE),
Avg_HP_Yt=mean(HP_Yt,na.rm=TRUE))
Model.HPA04.Tot <- Model.HPA04.Tot %>%
group_by(Constituency,GE) %>%
summarise(Avg_HP_Y1=mean(Avg_HP_Y1,na.rm=TRUE),
Avg_HP_Yt=mean(Avg_HP_Yt,na.rm=TRUE))
Model.HPA04.Tot$Tot_HPI <- (Model.HPA04.Tot$Avg_HP_Yt-Model.HPA04.Tot$Avg_HP_Y1)/Model.HPA04.Tot$Avg_HP_Y1
Model.HPA04.Tot <- Model.HPA04.Tot[order(Model.HPA04.Tot$GE),]
Model_Vars.HPA04 <- Model.HPA04 %>% right_join(Model.HPA04.Tot,by=c("Constituency","GE"))
TableHPA04 <- Model_Vars.HPA04 %>%
group_by(GE)%>%
summarise(Avg_HPI = mean(Avg_HPI),
Tot_HPI = mean(Tot_HPI))
HPA04 is the dataset which was used for the Baseline Model in the Thesis. Model 3A refers to the Basline Model which applied house price data from the HPA04 dataset.
RIA02.1 <- RIA02.1 %>%
filter(year >= "2010")
RTB <- RIA02.1 %>%
group_by(Locations_RIA02,Constituency, year)%>%
summarise(Avg_Value = mean(value,na.rm=TRUE))
# Keep RTB as the comparison to the downloaded file from April, noting the average of -0.20% variation to August upload: See RIA02 Variation.xlsx for details
# Calculate average RPI in desired format
RTB_GE <- merge(RTB, GE_Year, by.x = "year", by.y = "Year")
RTB_GE <- RTB_GE %>%
group_by(Locations_RIA02) %>%
dplyr::mutate(lag1 = dplyr::lag(Avg_Value, n = 1, default = NA))
RTB_GE$RPI <- (RTB_GE$Avg_Value - RTB_GE$lag1)/RTB_GE$lag1
RTB_GE <- RTB_GE%>%
filter(GE!="NA")
# Calculate total RPI in desired format
RTB_GE$Y1 <- ifelse(RTB_GE$GE == "GE 2016",MINIFS(RTB_GE$year,RTB_GE$GE,"GE 2016"),MINIFS(RTB_GE$year,RTB_GE$GE,"GE 2020"))
RTB_GE$Yt <- ifelse(RTB_GE$GE == "GE 2016",MAXIFS(RTB_GE$year,RTB_GE$GE,"GE 2016"),MAXIFS(RTB_GE$year,RTB_GE$GE,"GE 2020"))
RTB_GE$Find_L <- paste(RTB_GE$year,RTB_GE$Locations_RIA02,sep="-")
RTB_GE$Y1_L <- paste(RTB_GE$Y1,RTB_GE$Locations_RIA02,sep="-")
RTB_GE$Yt_L <- paste(RTB_GE$Yt,RTB_GE$Locations_RIA02,sep="-")
Find_RTB_L1 <- RTB_GE %>%
group_by(year,Y1_L) %>%
select(4,12)
Find_RTB_L1 <- Find_RTB_L1 %>%
filter(year == "2011"|year == "2016")
Find_RTB_L1<-Find_RTB_L1 %>%
select(2:3)
RTB_Merge <- merge(RTB_GE,Find_RTB_L1,"Y1_L")
RTB_Merge<- RTB_Merge %>%
rename(Avg_Value=Avg_Value.x)
RTB_Merge<- RTB_Merge %>%
rename(Value_Y1=Avg_Value.y)
Find_RTB_L2 <- RTB_GE %>%
group_by(year,Yt_L) %>%
select(4,12)
Find_RTB_L2 <- Find_RTB_L2 %>%
filter(year == "2015"|year == "2019")
Find_RTB_L2<-Find_RTB_L2 %>%
select(2:3)
RTB_Merge <- merge(RTB_Merge,Find_RTB_L2,"Yt_L")
RTB_Merge<- RTB_Merge %>%
rename(Avg_Value=Avg_Value.x)
RTB_Merge<- RTB_Merge %>%
rename(Value_Yt=Avg_Value.y)
RTB_Merge<- RTB_Merge %>%
rename(Year=year.x)
RTB_Final<- RTB_Merge %>%
select(3:12,14,16)
RTB_Final$Diff <- RTB_Final$Value_Yt - RTB_Final$Value_Y1
RTB_Final$DiffPc <- RTB_Final$Diff/RTB_Final$Value_Y1
# Format the data in "Model" format for Fixed Effects
Model.RTB <- RTB_GE %>%
group_by(Constituency,GE)%>%
summarise(Avg_RPI=mean(RPI,na.rm=TRUE))
Model.RTB <- Model.RTB[order(Model.RTB$GE),]
Model.RTB.Tot <- RTB_Final %>%
group_by(Locations_RIA02,Constituency,GE)%>%
summarise(Value_Y1=mean(Value_Y1,na.rm=TRUE),
Value_Yt=mean(Value_Yt,na.rm=TRUE))
Model.RTB.Tot <- Model.RTB.Tot %>%
group_by(Constituency,GE) %>%
summarise(Value_Y1=mean(Value_Y1,na.rm=TRUE),
Value_Yt=mean(Value_Yt,na.rm=TRUE))
Model.RTB.Tot$Tot_RPI <- (Model.RTB.Tot$Value_Yt-Model.RTB.Tot$Value_Y1)/Model.RTB.Tot$Value_Y1
Model.RTB.Tot <- Model.RTB.Tot[order(Model.RTB.Tot$GE),]
Model_Vars.RTB <- Model.RTB %>% right_join(Model.RTB.Tot,by=c("Constituency","GE"))
TableRTB <- Model_Vars.RTB %>%
group_by(GE)%>%
summarise(Avg_RPI = mean(Avg_RPI),
Tot_RPI = mean(Tot_RPI))
The latest data published for the RTB (RIA02) which includes 2021 data contained revisions to historical data. The data used in the thesis was a download from April 2022. A full comparison of the variations can be seen in the “Final” folder, RIA02 Variaion.csv. The resulting coefficients change slightly, but not materially from the thesis output. At the time of submitting the thesis it was unclear why historical variations would be included in the data, the CSO updated on the data on 10th of August 2022.
The coefficients for Model 3 from the thesis document is -0.184 (0.233) and -1.330 (0.398) for HPI and RPI respectively, which was statistically significant at the 1% level. As can be seen below (Model 3A) this is a small variation.
Candidates<- Candidates %>%
rename(Con=Constituency)
Elec_Res.A <- Candidates %>%
group_by(`Return Constituency`,GE,`Incumbent Party?`) %>%
summarise(Votes = sum(Votes))
Elec_Res.A <- Elec_Res.A %>%
filter(`Incumbent Party?`=="1")
Elec_Res.A <- Elec_Res.A %>%
rename(Incum_Votes=Votes)
Elec_Res.B <- Candidates %>%
group_by(`Return Constituency`,GE) %>%
summarise(Votes = sum(Votes))
Elec.Res <- Elec_Res.A %>% right_join(Elec_Res.B,by=c("Return Constituency","GE"))
Elec.Res$Vote_Share <- Elec.Res$Incum_Votes/Elec.Res$Votes
Elec.Res<- Elec.Res %>%
rename(Constituency=`Return Constituency`)
## SF Vote Share Panel
Parties <- Candidates %>%
group_by(`Return Constituency`,GE,`Party Abbreviation`) %>%
summarise(Votes = sum(Votes))
SF <- Parties %>%
filter(`Party Abbreviation` == "S.F.")
SF<- SF %>%
rename(Constituency=`Return Constituency`)
Elec.Res.Alt <- SF %>% right_join(Elec.Res,by=c("Constituency","GE"))
Elec.Res.Alt <- Elec.Res.Alt %>%
rename(SF_Votes = Votes.x)
Elec.Res.Alt <- Elec.Res.Alt %>%
rename(Tot_Votes = Votes.y)
Elec.Res.Alt$SF_Vote_Share <- Elec.Res.Alt$SF_Votes/Elec.Res.Alt$Tot_Votes
The candidates data was manipulated primarily for incumbent vote share as the independent variable. An alternative specification viewing Sinn Féin vote share was also generated.
CoefPlotComp <- list("Model 3 - Incumbent"=reg_fe1_4A,"Model 3 - SF"=reg_feSF)
ggcoef_compare(CoefPlotComp, variable_labels = c(Tot_HPI = "Total House Price Inflation (HPI)",
Avg_HPI = "Average House Price Inflation (HPI)",
Tot_RPI = "Total Rental Inflation (RPI)",
Avg_RPI = "Average Rental Inflation (RPI)"
))+scale_color_manual(values=c("navy","darkgreen"))
Here, a shift to the left is defined as a constituency which the Sinn Féin Vote Share was higher than the Incumbent.
Elec.Res.Alt$SFvsIncum_VS <- Elec.Res.Alt$SF_Vote_Share - Elec.Res.Alt$Vote_Share
Elec.Res.Alt$Shift <- ifelse(Elec.Res.Alt$SFvsIncum_VS >= 0,"Yes","No")
# merge shapefile
Seat<-GEO_Merge_HPM04%>%
group_by(Constituency,`Seat Tag`)%>%
summarise(nr=n())
Seat <- Seat%>%
select(1,2)
Elec.Res.Alt <- Seat %>% right_join(Elec.Res.Alt,"Constituency")
MapElec<- merge(shp, Elec.Res.Alt, by.x = "CON_SEAT_", by.y="Seat Tag")
MapShift<-MapElec %>%
ggplot()+
geom_sf(aes(fill = Shift))+
theme_void()+
theme(legend.title = element_text(size = 10),
legend.text = element_text(size = 8))+
theme(legend.position = "left")+
labs(title="The Shift to the Left")+
theme(strip.text.x=element_text(vjust=0))+
scale_fill_manual(values = alpha(c("navy", "darkgreen"),0.4))
MapShift + facet_wrap(~GE,ncol=2)