Part I. Loading the data

setwd("d:/my/documents/UvA/Thesis Private Equity/workingFolder")
####My small functions
numUnique <- function(x) length(unique(x))
inCommon <- function(vector1, vector2) length(intersect(vector1, vector2))
winsorMy <- function (x, fraction=.05) {
    if(length(fraction) != 1 || fraction < 0 ||
       fraction > 0.5) {
        stop("bad value for 'fraction'")
    }
    limits <- quantile(x, probs=c(fraction, 1-fraction), na.rm = T)
    x[x < limits[1]] <- limits[1]
    x[x > limits[2]] <- limits[2]
    x }
getStdErr <- function(moodelTest){
    coeftest(moodelTest, vcov = vcovHC(moodelTest, "HC0"))    # robust; HC0 
}


###
library(foreign); library(readr); library(psych); library(MatchIt); library(plm)
library(lubridate); library(estout);library(pglm); library(sandwich);library(lmtest)
###

#-------Loading the data
dataZ <- read.csv("Zephyr_dataZ_1.csv", stringsAsFactors = F)#LBOs
dataZ_temp <- read.csv("Zephyr_dataZ_2.csv", stringsAsFactors = F)
dataZ <- rbind(dataZ, dataZ_temp); rm(dataZ_temp)
numUnique(dataZ$Target.BvD.ID.number)#14417
write(unique(dataZ$Target.BvD.ID.number), file = "BvD14417.txt")
dataA <- read.dta("dataA_from14417_45142obs.dta")
numUnique(dataA$idnr) #4844
dataZ <- dataZ[(dataZ$Target.BvD.ID.number %in% unique(dataA$idnr)),]
length(unique(dataZ$Target.BvD.ID.number))#4844
write(unique(dataZ$Target.BvD.ID.number), file = "BvD4844.txt")

#write((cbind(names(dataA),names(dataA2))), file="namesBvd.csv") #check for the names

########################
dataTemp <- dataA
for (i in 1:nrow(dataTemp)){
    dataTemp[i,"id_year"] <- paste0(paste0(dataTemp[i,"idnr"],"-"),dataTemp[i,"closdate_year"])
}
sum(duplicated( dataTemp$id_year))
dataTemp <- dataTemp[!duplicated( dataTemp$id_year),]
dataA <- dataTemp
#########################


dataZ2 <- read.csv("Zephyr_dataZ2.csv", stringsAsFactors = F)#secondary buyouts
inCommon(dataZ$Target.BvD.ID.number, dataZ2$Target.BvD.ID.number)# only 389
write(unique(dataZ2$Target.BvD.ID.number), file = "BvD3928.txt")
dataA2 <- read.dta("dataA_from3928_13000obs.dta")
numUnique(dataA2$idnr) #1401
dataZ2 <- dataZ2[(dataZ2$Target.BvD.ID.number %in% unique(dataA2$idnr)),]
length(unique(dataZ2$Target.BvD.ID.number))#1401
write(unique(dataZ2$Target.BvD.ID.number), file = "BvD1401.txt")

########################
dataTemp <- dataA2
for (i in 1:nrow(dataTemp)){
    dataTemp[i,"id_year"] <- paste0(paste0(dataTemp[i,"idnr"],"-"),dataTemp[i,"closdate_year"])
}
sum(duplicated( dataTemp$id_year))
dataTemp <- dataTemp[!duplicated( dataTemp$id_year),]
dataA2 <- dataTemp
#########################

#dataAcontrols <- read.dta("controlsZ04_06.dta")
dataAcontrols <- read.dta("controlsZ13_06.dta")
numUnique(dataAcontrols$idnr)#4824

########################
dataTemp <- dataAcontrols
for (i in 1:nrow(dataTemp)){
    dataTemp[i,"id_year"] <- paste0(paste0(dataTemp[i,"idnr"],"-"),dataTemp[i,"closdate_year"])
}
sum(duplicated( dataTemp$id_year))
dataTemp <- dataTemp[!duplicated( dataTemp$id_year),]
dataAcontrols <- dataTemp
#########################

#dataA2controls <- read.dta("controlsZ2_04_06.dta")
dataA2controls <- read.dta("controlsZ2_13_06.dta")
numUnique(dataA2controls$idnr)#1396

########################
dataTemp <- dataA2controls
for (i in 1:nrow(dataTemp)){
    dataTemp[i,"id_year"] <- paste0(paste0(dataTemp[i,"idnr"],"-"),dataTemp[i,"closdate_year"])
}
sum(duplicated( dataTemp$id_year))
dataTemp <- dataTemp[!duplicated( dataTemp$id_year),]
dataA2controls <- dataTemp
#########################
rm(dataTemp)
#---------------

dataZ$Completed.date <- as.Date(dataZ$Completed.date, format = "%d-%m-%Y")
dataZ2$Completed.date <- as.Date(dataZ2$Completed.date, format = "%d-%m-%Y")

dataZ[,6:12] <- lapply((dataZ[,6:12]), parse_number)
dataZ2[,6:12] <- lapply((dataZ2[,6:12]), parse_number)
dataZ$Acquired.stake.... <- parse_number(dataZ$Acquired.stake....)
dataZ2$Acquired.stake.... <- parse_number(dataZ2$Acquired.stake....)

#--Age firm ---
dataA$ageFirm <- year(today()) - dataA$dateinc_year
dataA$age1_3 <- dataA$ageFirm < 4; sum(dataA$age1_3, na.rm=T)
dataA$age4_7 <- dataA$ageFirm > 3 & dataA$ageFirm < 8; sum(dataA$age4_7, na.rm=T)#
dataA2$ageFirm <- year(today()) - dataA2$dateinc_year
dataA2$age1_3 <- dataA2$ageFirm < 4; sum(dataA2$age1_3, na.rm=T)
dataA2$age4_7 <- dataA2$ageFirm >3 & dataA2$ageFirm < 8; sum(dataA2$age4_7, na.rm=T)# 
hist(dataA$ageFirm)

dataAcontrols$ageFirm <- year(today()) - dataAcontrols$dateinc_year
dataAcontrols$age1_3 <- dataAcontrols$ageFirm < 4; sum(dataAcontrols$age1_3, na.rm=T)
dataAcontrols$age4_7 <- dataAcontrols$ageFirm > 3 & dataAcontrols$ageFirm < 8; sum(dataAcontrols$age4_7, na.rm=T)# to check later
dataA2controls$ageFirm <- year(today()) - dataA2controls$dateinc_year
dataA2controls$age1_3 <- dataA2controls$ageFirm < 4; sum(dataA2controls$age1_3, na.rm=T)
dataA2controls$age4_7 <- dataA2controls$ageFirm > 3 & dataA2controls$ageFirm < 8; sum(dataA2controls$age4_7, na.rm=T)# to check later

##---Experienced PEs

X<-data.frame(table(dataZ$Acquiror.BvD.ID.number))
X<-X[order(X[,2], decreasing = T),]; X <- X[4:nrow(X),]; X <- rbind(X[1:2,],X[5:nrow(X),])
X$experienced<- X$Freq > 4                                 
sum(X$experienced)
X<-subset(X, experienced==TRUE)
X$Var1<-as.character(X$Var1)
sum(dataZ$Acquiror.BvD.ID.number %in% X$Var1[1:nrow(X)])
dataZ$experienced <- dataZ$Acquiror.BvD.ID.number %in% X$Var1; sum(dataZ$experienced)
X<-data.frame(table(subset(dataZ, experienced == TRUE)$Acquiror.name))
X<-X[order(X[,2],decreasing = T),][1:20,]
PEfirmsFistPE <- X


X<-data.frame(table(dataZ2$Acquiror.BvD.ID.number))
X<-X[order(X[,2], decreasing = T),]; 
X <- X[4:nrow(X),]; X <- rbind(X[1:2,],X[4:nrow(X),]);X <- rbind(X[1:7,],X[9:nrow(X),])
X$experienced<- X$Freq > 4                                 
sum(X$experienced)
X<-subset(X, experienced==TRUE)
X$Var1<-as.character(X$Var1)
sum(dataZ2$Acquiror.BvD.ID.number %in% X$Var1)
dataZ2$experienced <- dataZ2$Acquiror.BvD.ID.number %in% X$Var1; sum(dataZ2$experienced)
X<-data.frame(table(subset(dataZ2, experienced == TRUE)$Acquiror.name))
X<-X[order(X[,2],decreasing = T),][1:20,]
PEfirmsSBOs <- X
rm(X)
dataPEfirms <- data.frame(cbind(PEfirmsFistPE,PEfirmsSBOs))
rm(PEfirmsFistPE,PEfirmsSBOs)
write.csv(dataPEfirms, file = "dataPEfirms.csv")

Part II. Plots

#------------Plots
par(mfrow=c(1,2), cex = .9)
plot(dataZ$Completed.date, dataZ$Deal.enterprise.value.th.USD, 
     ylab="Enterprise Value", xlab="", 
     main = "Private equity deals measured by EV over time (LBOs)", col="black", pch = 0, cex = 2)

plot(dataZ2$Completed.date, dataZ2$Deal.enterprise.value.th.USD, 
     ylab="Enterprise Value", xlab="",
     main = "Private equity deals measured by EV over time (SBOs)", col="black", pch = 2, cex = 2)
par(mfrow = c(1,1))

par(mfrow=c(1,2), cex = .5)
barplot(table(year(dataZ$Completed.date)), col="grey", ylab = "Number of deals", 
        main ="Private equity deals(1997-2017) LBOs")

barplot(table(year(dataZ2$Completed.date)), col="grey", ylab = "Number of deals", 
        main ="Private equity deals(1997-2017) SBOs")
par(mfrow = c(1,1))


line2user <- function(line, side) {
    lh <- par('cin')[2] * par('cex') * par('lheight')
    x_off <- diff(grconvertX(0:1, 'inches', 'user'))
    y_off <- diff(grconvertY(0:1, 'inches', 'user'))
    switch(side,
           `1` = par('usr')[3] - line * y_off * lh,
           `2` = par('usr')[1] - line * x_off * lh,
           `3` = par('usr')[4] + line * y_off * lh,
           `4` = par('usr')[2] + line * x_off * lh,
           stop("side must be 1, 2, 3, or 4", call.=FALSE))
}


par(mfrow=c(2,2),cex = .4)
plot(dataZ$Completed.date, abs(log(dataZ$Pre.deal.enterprise.value.multiple.on.EBITDA.Last.avail..yr)), pch = 0, xlab = "", ylab ="EV/EBITDA", main = "Pre-deal")
plot(dataZ$Completed.date, log(dataZ$Post.deal.enterprise.value.multiple.on.EBITDA.First.avail..yr),pch = 2, xlab = "", ylab = "", main = "Post-deal")
#par(mfrow = c(1,1))
text(line2user(line=mean(par('mar')[c(2, 4)]), side=2), 
     line2user(line=2, side=3), 'First time LBOs', xpd=NA, cex=1.6, font=2)

#par(mfrow = c(1,2), cex = .5)
plot(dataZ2$Completed.date, abs(log(dataZ2$Pre.deal.enterprise.value.multiple.on.EBITDA.Last.avail..yr)), pch = 0, xlab = "", ylab ="EV/EBITDA", main = "Pre-deal")
plot(dataZ2$Completed.date, log(dataZ2$Post.deal.enterprise.value.multiple.on.EBITDA.First.avail..yr),pch = 2, xlab = "", ylab = "", main = "Post-deal")
text(line2user(line=mean(par('mar')[c(2, 4)]), side=2), 
     line2user(line=2, side=3), 'Secondary buyouts', xpd=NA, cex=1.6, font=2)
par(mfrow = c(1,1))

Part III. Constructing dataset for regression analysis for the sample of first time PE-backed

#####regression dataZ

mergedAZ <- merge(dataA, dataZ, by.x="idnr", by.y="Target.BvD.ID.number",all = F)#look at all=T
dataAcontrols[,135:194] <- NA #completing the dataset for rbind
names(dataAcontrols)[135:194]<-names(mergedAZ)[135:194]

mergedAZ$yearPE <- year(mergedAZ$Completed.date)
mergedAZ$difYear_pe <- mergedAZ$closdate_year - mergedAZ$yearPE
hist(mergedAZ$difYear_pe)
mergedAZ$difYear_pePositive <- mergedAZ$difYear_pe 
for (i in (1:nrow(mergedAZ))){
    if ((mergedAZ[i,"difYear_pePositive"]) > 7 | mergedAZ[i,"difYear_pePositive"] < 0)
    {mergedAZ[i,"difYear_pePositive"] <- NA}
}
hist(mergedAZ$difYear_pePositive)

dataAcontrols$yearPE <- NA # 
dataAcontrols$difYear_pe <- NA # 
dataAcontrols$difYear_pePositive <- NA

mergedAZ$treatControl <- 1
dataAcontrols$treatControl <- 0

dataA_all <- rbind(mergedAZ, dataAcontrols)
dataA_all$pe <- dataA_all$difYear_pe > 0
sum(dataA_all$pe, na.rm=T)
dataReg <- dataA_all; rm(dataA_all)

sum(is.na(dataReg$pe))#58923

table(dataReg$pe)

#Zscore
dataReg$Zscore <- (1/dataReg$toas)*(1.2*dataReg$wkca +
                                        1.4*dataReg$osfd + 3.3* dataReg$ebit + 0.998*dataReg$turn) +
    0.6*dataReg$capi/(dataReg$ncli + dataReg$culi)

dataReg[(!is.finite(dataReg$Zscore)), "Zscore"] <- "NA"
dataReg$Zscore<-as.numeric(dataReg$Zscore)#making it back to numeric

dataReg$Zscore <- winsorMy(dataReg$Zscore, fraction = .008) # winsorising at .008//could be different also

hist(dataReg$Zscore, col="lightgreen")
hist(subset(dataReg, pe==0)$Zscore, col="lightgreen", main="pe==0")
hist(subset(dataReg, pe==1)$Zscore, col="lightgreen", main="pe==1")
####


####extra variables
dataReg$debt_totAssets <- (dataReg$ncli + dataReg$loan)/dataReg$toas
dataReg[(!is.finite(dataReg$debt_totAssets)), "debt_totAssets"] <- "NA"
dataReg$debt_totAssets <- as.numeric(dataReg$debt_totAssets)#making it back to numeric

dataReg$debt_totAssets <- winsorMy(dataReg$debt_totAssets, .001) # winsorising at .001 could be different
hist((dataReg$debt_totAssets))#to see the debt ratio
dataReg$ic <- winsorMy(dataReg$ic, .05)#winsorising the interest coverage

dataReg$debt_EBITDA <- dataReg$ltdb/dataReg$ebta
dataReg[(!is.finite(dataReg$debt_EBITDA)), "debt_EBITDA"] <- "NA"
dataReg$debt_EBITDA <- as.numeric(dataReg$debt_EBITDA)#making it back to numeric

dataReg$debt_EBITDA <- winsorMy(dataReg$debt_EBITDA, .02)# winsorizing at .02
hist(dataReg$debt_EBITDA)

#high yield
highYieldSpread <- read.csv("highYeldBondIndexMerylLynchBoA_downladedFRED.csv", stringsAsFactors = F)
highYieldSpread$DATE<- (parse_date(highYieldSpread$DATE))
highYieldSpread$spread <- parse_number(highYieldSpread$BAMLH0A0HYM2EY)
plot(highYieldSpread$DATE, highYieldSpread$spread, col="red", type = "l")           
highYieldSpread$year<- year(highYieldSpread$DATE)
highYieldAnnual<- aggregate(spread ~ year,mean, data = highYieldSpread)# the annual average high yield spread
#####
####merging with the data
dataReg<- merge(dataReg, highYieldAnnual, by.x = "closdate_year", by.y = "year")# it is working
dataReg$favourable <- (dataReg$spread - median(dataReg$spread)) < 0
rm(highYieldAnnual,highYieldSpread)
#

dataReg$PEbacked <- dataReg$difYear_pe > 0 & dataReg$difYear_pe < 8 ## to look later something is not right
sum(dataReg$PEbacked, na.rm=T)
dataReg$PEbacked[dataReg$PEbacked==F] <- -1

for (i in (1:nrow(dataReg))){
    if (is.na(dataReg[i,"PEbacked"])){dataReg[i,"PEbacked"] <- FALSE}
    else if (dataReg[i,"PEbacked"] == -1) {dataReg[i,"PEbacked"] <- NA}
}
### 
dataReg$PEbacked2 <- dataReg$difYear_pe > 0 & dataReg$difYear_pe < 8 # for the PEonly analysis



##Panel
nams<-paste0(dataReg$closdate_year,"_",dataReg$idnr)
nams<-make.names(nams, unique=T)
s<-do.call(rbind, strsplit(nams, "_"))
dataReg <- cbind(s[,2],parse_number(s[,1]), dataReg)
names(dataReg)[1:2]<-c("id","year")
rm(s,nams)
###

dataRegOriginal <- dataReg
dataReg <- DataRegOriginal

Part IV. Regressions for the first time PE-backed companies

### the reg
modelOls <- lm(Zscore ~ debt_totAssets + I(log(abs(turn)+1))+ etma + quoted+ I(log(ageFirm))+ PEbacked , data = dataReg)

modelPlm <- plm(Zscore ~ I(log(abs(turn)+1)) + etma +debt_totAssets + quoted + I(log(ageFirm))+ PEbacked, 
                data = dataReg, model="random", index=c("id","year"))
vcovTemp =vcovHC(modelPlm, type="HC0",cluster="group") #getting the vcov
modelPlm$vcov <- vcovTemp

modelPlm_1.5 <- plm(Zscore ~ I(log(abs(turn)+1)) + etma +debt_totAssets + quoted + I(log(ageFirm))+ PEbacked, 
                data = dataReg, model="random", effect= "time", index=c("id","year"))
vcovTemp =vcovHC(modelPlm_1.5, type="HC0",cluster="group") #getting the vcov
modelPlm_1.5$vcov <- vcovTemp

modelPlm_2 <- plm(Zscore ~ I(log(abs(turn)+1)) + etma +debt_totAssets + quoted + I(PEbacked*debt_totAssets) + I(log(ageFirm))+ PEbacked, 
                  data = dataReg, model="random", index=c("id","year"))
vcovTemp =vcovHC(modelPlm_2, type="HC0",cluster="group") #getting the vcov
modelPlm_2$vcov <- vcovTemp

modelPlm_3 <- plm(Zscore ~  I(log(abs(turn)+1)) + etma + quoted + I(log(ageFirm))+ ic + PEbacked, 
                  data = dataReg, model="random", index=c("id","year"))
vcovTemp =vcovHC(modelPlm_3, type="HC0",cluster="group") #getting the vcov
modelPlm_3$vcov <- vcovTemp

modelPlm_4 <- plm(Zscore ~ I(log(abs(turn)+1)) + etma + quoted + ic + I(PEbacked*ic) + I(log(ageFirm))+ PEbacked, 
                  data = dataReg, model="random", index=c("id","year"))
vcovTemp =vcovHC(modelPlm_4, type="HC0",cluster="group") #getting the vcov
modelPlm_4$vcov <- vcovTemp

estclear() # clear all the eststo objects
eststo(modelOls) 
eststo(modelPlm)
eststo(modelPlm_1.5)
eststo(modelPlm_2)
eststo(modelPlm_3)
eststo(modelPlm_4)
esttab(filename = "test20_06",csv=T) # print it

#=== only PE firms
modelOlsPE <- lm(Zscore ~ debt_totAssets + I(log(abs(turn)+1))+ etma + PEbacked2, 
                 data = subset(dataReg, treatControl == 1))

modelPlmPE <- plm(Zscore ~ debt_totAssets + I(log(abs(turn)+1))+ etma + PEbacked2, 
                  data = subset(dataReg, treatControl == 1), model= "within", index = c("id","year"))
vcovTemp =vcovHC(modelPlmPE, type="HC0",cluster="group") #getting the vcov
modelPlmPE$vcov <- vcovTemp

modelPlmPE_2 <- plm(Zscore ~ debt_totAssets + I(log(abs(turn)+1))+ etma + PEbacked2 + experienced, 
                    data = subset(dataReg, treatControl == 1), model= "within", index = c("id","year"))
vcovTemp =vcovHC(modelPlmPE_2, type="HC0",cluster="group") #getting the vcov
modelPlmPE_2$vcov <- vcovTemp

modelPlmPE_3 <- plm(Zscore ~ debt_totAssets + I(log(abs(turn)+1))+ etma + PEbacked2 +  favourable, 
                    data = subset(dataReg, treatControl == 1), model= "within", index = c("id","year"))
vcovTemp =vcovHC(modelPlmPE_3, type="HC0",cluster="group") #getting the vcov
modelPlmPE_3$vcov <- vcovTemp

modelPlmPE_3_2 <- plm(Zscore ~ debt_totAssets + I(log(abs(turn)+1))+ etma + PEbacked2 +  I(log(ageFirm)), 
                      data = subset(dataReg, treatControl == 1), model= "random", index = c("id","year"))
vcovTemp =vcovHC(modelPlmPE_3_2, type="HC0",cluster="group") #getting the vcov
modelPlmPE_3_2$vcov <- vcovTemp

modelPlmPE_4 <- plm(Zscore ~ debt_totAssets + I(log(abs(turn)+1))+ etma + PEbacked2 + difYear_pePositive, 
                    data = subset(dataReg, treatControl == 1), model= "within", index = c("id","year"))
vcovTemp =vcovHC(modelPlmPE_4, type="HC0",cluster="group") #getting the vcov
modelPlmPE_4$vcov <- vcovTemp

modelPlmPE_5 <- plm(Zscore ~ debt_totAssets + I(log(abs(turn)+1))+ etma + difYear_pePositive, 
                    data = subset(dataReg, treatControl == 1), model= "within", index = c("id","year"))
vcovTemp =vcovHC(modelPlmPE_5, type="HC0",cluster="group") #getting the vcov
modelPlmPE_5$vcov <- vcovTemp

estclear() # clear all the eststo objects
eststo(modelOlsPE) 
eststo(modelPlmPE)
eststo(modelPlmPE_2)
eststo(modelPlmPE_3)
eststo(modelPlmPE_3_2)
eststo(modelPlmPE_4)
eststo(modelPlmPE_5)
esttab(filename = "test20_06_PE",csv=T) # print it

Part V. Constructing dataset for regression analysis for the SBOs

#####regression dataZ2

mergedAZ2 <- merge(dataA2, dataZ2, by.x="idnr", by.y="Target.BvD.ID.number",all = F)#look at all=T
dataA2controls[,135:194] <- NA #completeing the dataset for rbind
names(dataA2controls)[135:194]<-names(mergedAZ2)[135:194]

mergedAZ2$yearPE <- year(mergedAZ2$Completed.date)
mergedAZ2$difYear_pe<-mergedAZ2$closdate_year-mergedAZ2$yearPE
hist(mergedAZ2$difYear_pe)
mergedAZ2$difYear_pePositive <- mergedAZ2$difYear_pe 
for (i in (1:nrow(mergedAZ2))){
    if ((mergedAZ2[i,"difYear_pePositive"]) > 7 | mergedAZ2[i,"difYear_pePositive"] < 0)
    {mergedAZ2[i,"difYear_pePositive"] <- NA}
}
hist(mergedAZ2$difYear_pePositive)

dataA2controls$yearPE <- NA
dataA2controls$difYear_pe <- NA
dataA2controls$difYear_pePositive <- NA

mergedAZ2$treatControl <- 1
dataA2controls$treatControl <- 0

dataA_all <- rbind(mergedAZ2, dataA2controls)
dataA_all$pe <- dataA_all$difYear_pe > 0
sum(dataA_all$pe, na.rm=T)
dataReg2 <- dataA_all; rm(dataA_all)

sum(is.na(dataReg2$pe))#811

table(dataReg2$pe)

#Zscore
dataReg2$Zscore <- (1/dataReg2$toas)*(1.2*dataReg2$wkca +
                                          1.4*dataReg2$osfd + 3.3* dataReg2$ebit + 0.998*dataReg2$turn) +
    0.6*dataReg2$capi/(dataReg2$ncli + dataReg2$culi)

dataReg2[(!is.finite(dataReg2$Zscore)), "Zscore"]<- "NA"
dataReg2$Zscore<-as.numeric(dataReg2$Zscore)#making it back to numeric

dataReg2$Zscore <- winsorMy(dataReg2$Zscore, fraction = .008)

hist(dataReg2$Zscore)
hist(winsor(parse_number(dataReg2$Zscore)), col="lightgreen")
hist(winsor(parse_number(subset(dataReg2, pe==0)$Zscore)), col="lightgreen", main="pe==0")
hist(winsor(parse_number(subset(dataReg2, pe==1)$Zscore)), col="lightgreen", main="pe==1")
###


####extra variables
dataReg2$debt_totAssets <- (dataReg2$ncli + dataReg2$loan)/dataReg2$toas
dataReg2[(!is.finite(dataReg2$debt_totAssets)), "debt_totAssets"]<- "NA"
dataReg2$debt_totAssets<-as.numeric(dataReg2$debt_totAssets)#making it back to numeric


dataReg2$debt_totAssets <- winsorMy(dataReg2$debt_totAssets, .001)
hist((dataReg2$debt_totAssets))#to see the debt ratio
dataReg2$ic <- winsorMy(dataReg2$ic, .05)#winsorising the interest coverage


dataReg2$debt_EBITDA <- dataReg2$ltdb/dataReg2$ebta
dataReg2[(!is.finite(dataReg2$debt_EBITDA)), "debt_EBITDA"] <- "NA"
dataReg2$debt_EBITDA <- as.numeric(dataReg2$debt_EBITDA)#making it back to numeric

dataReg2$debt_EBITDA <- winsorMy(dataReg2$debt_EBITDA, .02)# winsorizing at .02
hist(dataReg2$debt_EBITDA)



#high yield
highYieldSpread <- read.csv("highYeldBondIndexMerylLynchBoA_downladedFRED.csv", stringsAsFactors = F)
highYieldSpread$DATE<- (parse_date(highYieldSpread$DATE))
highYieldSpread$spread <- parse_number(highYieldSpread$BAMLH0A0HYM2EY)
plot(highYieldSpread$DATE, highYieldSpread$spread, col="red", type = "l")           
highYieldSpread$year<- year(highYieldSpread$DATE)
highYieldAnnual<- aggregate(spread ~ year,mean, data = highYieldSpread)# the annual average high yield spread
#####
####merging with the data
dataReg2<- merge(dataReg2, highYieldAnnual, by.x = "closdate_year", by.y = "year")# it is working
dataReg2$favourable <- (dataReg2$spread - median(dataReg2$spread)) < 0
rm(highYieldAnnual,highYieldSpread)
#

dataReg2$PEbacked <- dataReg2$difYear_pe > 0 & dataReg2$difYear_pe < 8## to look later something is not right
sum(dataReg2$PEbacked, na.rm=T)

dataReg2$PEbacked[dataReg2$PEbacked==F] <- -1

for (i in (1:nrow(dataReg2))){
    if (is.na(dataReg2[i,"PEbacked"])){dataReg2[i,"PEbacked"] <- FALSE}
    else if (dataReg2[i,"PEbacked"] == -1) {dataReg2[i,"PEbacked"] <- NA}
}
### 
dataReg2$PEbacked2 <- dataReg2$difYear_pe > 0 & dataReg2$difYear_pe < 8 



##Panel
nams<-paste0(dataReg2$idnr,"_",dataReg2$closdate_year)
nams<-make.names(nams, unique=T)
s<-do.call(rbind, strsplit(nams, "_"))
dataReg2 <- cbind(s[,1],s[,2], dataReg2)
names(dataReg2)[1:2]<-c("id","year")
rm(s,nams)
###

dataReg2Original <- dataReg2

Part VI. Regressions SBOs

### the reg
modelOls2 <- lm(Zscore ~ debt_totAssets + I(log(abs(turn)+1))+ etma + quoted + I(log(ageFirm)) + PEbacked, data = dataReg2)

modelPlm2 <- plm(Zscore ~ I(log(abs(turn)+1)) + etma +debt_totAssets + quoted + I(log(ageFirm))+ PEbacked, 
                 data = dataReg2, model="random", index=c("id","year"))
vcovTemp =vcovHC(modelPlm2, type="HC0",cluster="group") #getting the vcov
modelPlm2$vcov <- vcovTemp

modelPlm2_1.5 <- plm(Zscore ~ I(log(abs(turn)+1)) + etma +debt_totAssets + quoted + I(log(ageFirm))+ PEbacked, 
                 data = dataReg2, model="random", effect = "time", index=c("id","year"))
vcovTemp =vcovHC(modelPlm2_1.5, type="HC0",cluster="group") #getting the vcov
modelPlm2_1.5$vcov <- vcovTemp

modelPlm2_2 <- plm(Zscore ~ I(log(abs(turn)+1)) + etma +debt_totAssets + quoted+ I(log(ageFirm)) + I(PEbacked*debt_totAssets) + PEbacked, 
                   data = dataReg2, model="random", index=c("id","year"))
vcovTemp =vcovHC(modelPlm2_2, type="HC0",cluster="group") #getting the vcov
modelPlm2_2$vcov <- vcovTemp

modelPlm2_3 <- plm(Zscore ~ I(log(abs(turn)+1)) + etma + quoted + ic + I(log(ageFirm))+ PEbacked, 
                   data = dataReg2, model="random", index=c("id","year"))
vcovTemp =vcovHC(modelPlm2_3, type="HC0",cluster="group") #getting the vcov
modelPlm2_3$vcov <- vcovTemp

modelPlm2_4 <- plm(Zscore ~ I(log(abs(turn)+1)) + etma + quoted + ic + I(PEbacked*ic) + I(log(ageFirm))+ PEbacked, 
                   data = dataReg2, model="random", index=c("id","year"))
vcovTemp =vcovHC(modelPlm2_4, type="HC0",cluster="group") #getting the vcov
modelPlm2_4$vcov <- vcovTemp

estclear() # clear all the eststo objects
eststo(modelOls2) 
eststo(modelPlm2)
eststo(modelPlm2_1.5)
eststo(modelPlm2_2)
eststo(modelPlm2_3)
eststo(modelPlm2_4)
esttab(filename = "test20_06_2",csv=T) # print it

#===
modelOlsPE2 <- lm(Zscore ~ debt_totAssets + I(log(abs(turn)+1))+ etma + PEbacked2, 
                  data = subset(dataReg2, treatControl == 1))

modelPlmPE2 <- plm(Zscore ~ debt_totAssets + I(log(abs(turn)+1))+ etma + PEbacked2, 
                   data = subset(dataReg2, treatControl == 1), model= "within", index = c("id","year"))
vcovTemp =vcovHC(modelPlmPE2, type="HC0",cluster="group") #getting the vcov
modelPlmPE2$vcov <- vcovTemp

modelPlmPE2_2 <- plm(Zscore ~ debt_totAssets + I(log(abs(turn)+1))+ etma + PEbacked2 + experienced, 
                     data = subset(dataReg2, treatControl == 1), model= "within", index = c("id","year"))
vcovTemp =vcovHC(modelPlmPE2_2, type="HC0",cluster="group") #getting the vcov
modelPlmPE2_2$vcov <- vcovTemp

modelPlmPE2_3 <- plm(Zscore ~ debt_totAssets + I(log(abs(turn)+1))+ etma + PEbacked2 +  favourable, 
                     data = subset(dataReg2, treatControl == 1), model= "within", index = c("id","year"))
vcovTemp =vcovHC(modelPlmPE2_3, type="HC0",cluster="group") #getting the vcov
modelPlmPE2_3$vcov <- vcovTemp

modelPlmPE2_3_2 <- plm(Zscore ~ debt_totAssets + I(log(abs(turn)+1))+ etma + PEbacked2 +  I(log(ageFirm)), 
                       data = subset(dataReg2, treatControl == 1), model= "random", index = c("id","year"))
vcovTemp =vcovHC(modelPlmPE2_3_2, type="HC0",cluster="group") #getting the vcov
modelPlmPE2_3_2$vcov <- vcovTemp

modelPlmPE2_4 <- plm(Zscore ~ debt_totAssets + I(log(abs(turn)+1))+ etma + PEbacked2 + difYear_pePositive, 
                     data = subset(dataReg2, treatControl == 1), model= "within", index = c("id","year"))
vcovTemp =vcovHC(modelPlmPE2_4, type="HC0",cluster="group") #getting the vcov
modelPlmPE2_4$vcov <- vcovTemp

modelPlmPE2_5 <- plm(Zscore ~ debt_totAssets + I(log(abs(turn)+1))+ etma + difYear_pePositive, 
                     data = subset(dataReg2, treatControl == 1), model= "within", index = c("id","year"))
vcovTemp =vcovHC(modelPlmPE2_5, type="HC0",cluster="group") #getting the vcov
modelPlmPE2_5$vcov <- vcovTemp

estclear() # clear all the eststo objects
eststo(modelOlsPE2) 
eststo(modelPlmPE2)
eststo(modelPlmPE2_2)
eststo(modelPlmPE2_3)
eststo(modelPlmPE2_3_2)
eststo(modelPlmPE2_4)
eststo(modelPlmPE2_5)
esttab(filename = "test20_06_2PE",csv=T) # print it

PArt VII. Analysis bankruptcies

#====================
##### Bankraptcies
write(unique(dataReg$idnr), file ="dataReg_BvD.txt"); numUnique(dataReg$idnr)
write(unique(dataReg2$idnr), file ="dataReg2_BvD.txt"); numUnique(dataReg2$idnr)


###only data for bankrupt 19_06
dataBankrupt <-read.csv("Orbis_bankrupt_dataReg.csv", stringsAsFactors = F)
dataReg$bank <- (dataReg$idnr %in% dataBankrupt$BvD.ID.number); sum(dataReg$bank)
#
dataBankrupt2 <-read.csv("Orbis_bankrupt_dataReg2.csv", stringsAsFactors = F)
dataReg2$bank <- (dataReg2$idnr %in% dataBankrupt2$BvD.ID.number); sum(dataReg2$bank)
####
table(dataBankrupt$Last.avail..year)

# number of defaults in pe group = 368 the control 325
numUnique(subset(dataReg, treatControl==1)[subset(dataReg, treatControl == 1)$bank ==T,]$idnr)

defaultRates <- read.csv("defaultRates.csv", stringsAsFactors = F)
class(dataReg$defaultRate)
dataReg<- merge(dataReg, defaultRates, by.x = "closdate_year", by.y = "year", all = T)# 
dataReg2<- merge(dataReg2, defaultRates, by.x = "closdate_year", by.y = "year", all =T)# 


################bankruptcy
################
coeftest(simpleLogit , vcov = sandwich)                # robust; sandwich
coeftest(simpleLogit, vcov = vcovHC(simpleLogit, "HC0"))    # robust; HC0 
##

simpleLogit <- glm(bank ~ debt_totAssets  +  I(log(abs(toas)+1)) + etma + PEbacked, 
                   family=binomial,  data=subset(dataReg, closdate_year>2007))
simpleLogit1.5 <- glm(bank ~ debt_totAssets  +  I(log(abs(toas)+1)) + etma + PEbacked + I(PEbacked*debt_totAssets), 
                      family=binomial,  data=subset(dataReg, closdate_year>2007))
simpleLogit2 <- glm(bank ~ debt_totAssets  +  I(log(abs(toas)+1)) + etma + PEbacked + Zscore, 
                    family=binomial,  data=subset(dataReg, closdate_year>2007))
simpleLogit3 <- glm(bank ~ debt_totAssets  +  I(log(abs(toas)+1)) + etma + PEbacked + experienced, 
                    family=binomial,  data=subset(dataReg, closdate_year>2007))
simpleLogit4 <- glm(bank ~ debt_totAssets  +  I(log(abs(toas)+1)) + etma + PEbacked + favourable, 
                    family=binomial,  data=subset(dataReg, closdate_year>2007))

estclear() # clear all the eststo objects
eststo(simpleLogit) 
eststo(simpleLogit1.5)
eststo(simpleLogit2)
eststo(simpleLogit3)
eststo(simpleLogit4)

esttab(filename = "test21_06_bank",csv=T) # print it

getStdErr(simpleLogit)
getStdErr(simpleLogit1.5)
getStdErr(simpleLogit2)
getStdErr(simpleLogit3)
getStdErr(simpleLogit4)
#######

simpleLogit_2 <- glm(bank ~ debt_totAssets  +  I(log(abs(toas)+1)) + etma + PEbacked, 
                     family=binomial,  data=subset(dataReg2, closdate_year>2010))
simpleLogit_2_1.5 <- glm(bank ~ debt_totAssets  +  I(log(abs(toas)+1)) + etma + PEbacked + I(debt_totAssets*PEbacked), 
                         family=binomial,  data=subset(dataReg2, closdate_year>2010))
simpleLogit2_2 <- glm(bank ~ debt_totAssets  +  I(log(abs(toas)+1)) + etma + PEbacked + Zscore, 
                      family=binomial,  data=subset(dataReg2, closdate_year>2010))
simpleLogit3_2 <- glm(bank ~ debt_totAssets  +  I(log(abs(toas)+1)) + etma + PEbacked + experienced, 
                      family=binomial,  data=subset(dataReg2, closdate_year>2010))
simpleLogit4_2 <- glm(bank ~ debt_totAssets  +  I(log(abs(toas)+1)) + etma + PEbacked + favourable, 
                      family=binomial,  data=subset(dataReg2, closdate_year>2010))

estclear() # clear all the eststo objects
eststo(simpleLogit_2)
eststo(simpleLogit_2_1.5)
eststo(simpleLogit2_2)
eststo(simpleLogit3_2) # to check about 'experienced' - not working
eststo(simpleLogit4_2)

esttab(filename = "test21_06_bank_2",csv=T) # print it

getStdErr(simpleLogit_2)
getStdErr(simpleLogit_2_1.5)
getStdErr(simpleLogit2_2)
getStdErr(simpleLogit3_2)
getStdErr(simpleLogit4_2)


################### Cox Regressions
###################
library(survival)
dataReg$survObj <- with(dataReg, Surv(closdate_year, bank == 1)) # survival object
dataReg2$survObj <- with(dataReg2, Surv(closdate_year, bank == 1)) # survival object


res.cox <- coxph(survObj ~ debt_totAssets  +  I(log(abs(toas)+1)) + etma+ PEbacked, 
                   data=subset(dataReg, closdate_year>2007))
res.cox1.5 <- coxph(survObj ~ debt_totAssets  +  I(log(abs(toas)+1)) + etma+ PEbacked + I(debt_totAssets*PEbacked), 
                    data=subset(dataReg, closdate_year>2007))
res.cox_2 <- coxph(survObj ~ debt_totAssets  +  I(log(abs(toas)+1))+ etma+PEbacked + Zscore, 
                   data=subset(dataReg, closdate_year>2007))

res.cox_3 <- coxph(survObj ~ debt_totAssets  +  I(log(abs(toas)+1)) + etma + PEbacked + experienced, 
                   data=subset(dataReg, closdate_year>2007))

res.cox_4 <- coxph(survObj ~ debt_totAssets  +  I(log(abs(toas)+1)) + etma + PEbacked + favourable, 
                   data=subset(dataReg, closdate_year>2007))

res.cox; res.cox1.5; res.cox_2; res.cox_3; res.cox_4

#####
res.cox2 <- coxph(survObj ~ debt_totAssets  +  I(log(abs(toas)+1)) + etma+ PEbacked, 
                 data=subset(dataReg2, closdate_year>2010))
res.cox2_1.5 <- coxph(survObj ~ debt_totAssets  +  I(log(abs(toas)+1)) + etma+ PEbacked + I(debt_totAssets*PEbacked), 
                    data=subset(dataReg2, closdate_year>2010))

res.cox2_2 <- coxph(survObj ~ debt_totAssets  +  I(log(abs(toas)+1))+ etma+PEbacked + Zscore, 
                   data=subset(dataReg2, closdate_year>2010))

res.cox2_3 <- coxph(survObj ~ debt_totAssets  +  I(log(abs(toas)+1)) + etma + PEbacked + experienced, 
                   data=subset(dataReg2, closdate_year>2010))

res.cox2_4 <- coxph(survObj ~ debt_totAssets  +  I(log(abs(toas)+1)) + etma + PEbacked + favourable, 
                   data=subset(dataReg2, closdate_year>2010))

res.cox2; res.cox2_1.5; res.cox2_2; res.cox2_3; res.cox2_4


###############
###############

Part VIII. Additional calculation for the Zmijewski score (not part of the thesis)

######### Calculaions for the Zmijewski score


dataReg$zmij <- -4.336 -4.513*(((dataReg$gros-dataReg$oope)+(dataReg$fire-dataReg$fiex)- dataReg$taxa + (dataReg$exre-dataReg$exex))/dataReg$toas) + 
    5.679*((dataReg$ncli + dataReg$culi)/dataReg$toas) + 0.004*(dataReg$cuas/dataReg$culi)

dataReg$zmij <- winsorMy(dataReg$zmij,0.015)

hist(dataReg$zmij)
quantile(dataReg$zmij,.95,na.rm=T)

dataReg2$zmij <- -4.336 -4.513*(((dataReg2$gros-dataReg2$oope)+(dataReg2$fire-dataReg2$fiex)- dataReg2$taxa + (dataReg2$exre-dataReg2$exex))/dataReg2$toas) + 
    5.679*((dataReg2$ncli + dataReg2$culi)/dataReg2$toas) + 0.004*(dataReg2$cuas/dataReg2$culi)
dataReg2$zmij <- winsorMy(dataReg2$zmij,0.015)

hist(dataReg2$zmij)
sum(is.na(dataReg$zmij))
####
####
dataReg$zmij <- -4.336 -4.513*(((dataReg$ebit-dataReg$inte)- dataReg$taxa )/dataReg$toas) + 
    5.679*((dataReg$ncli + dataReg$culi)/dataReg$toas) + 0.004*(dataReg$cuas/dataReg$culi)
dataReg$zmij <- winsorMy(dataReg$zmij,0.015)
hist(dataReg$zmij)
sum(is.na(dataReg$zmij))

dataReg2$zmij <- -4.336 -4.513*(((dataReg2$ebit-dataReg2$inte)- dataReg2$taxa )/dataReg2$toas) + 
    5.679*((dataReg2$ncli + dataReg2$culi)/dataReg2$toas) + 0.004*(dataReg2$cuas/dataReg2$culi)
dataReg2$zmij <- winsorMy(dataReg2$zmij,0.015)

hist(dataReg2$zmij)
sum(is.na(dataReg2$zmij))


#####



modelOls <- lm(zmij ~ debt_totAssets + I(log(abs(turn)+1))+ etma + quoted+ I(log(ageFirm))+ PEbacked , data = dataReg)

modelPlm <- plm(zmij ~ I(log(abs(turn)+1)) + etma +debt_totAssets + quoted + I(log(ageFirm))+ PEbacked, 
                data = dataReg, model="random", index=c("id","year"))
vcovTemp =vcovHC(modelPlm, type="HC0",cluster="group") #getting the vcov
modelPlm$vcov <- vcovTemp

modelPlm_1.5 <- plm(zmij ~ I(log(abs(turn)+1)) + etma +debt_totAssets + quoted + I(log(ageFirm))+ PEbacked, 
                    data = dataReg, model="random", effect= "time", index=c("id","year"))
vcovTemp =vcovHC(modelPlm_1.5, type="HC0",cluster="group") #getting the vcov
modelPlm_1.5$vcov <- vcovTemp

modelPlm_2 <- plm(zmij ~ I(log(abs(turn)+1)) + etma +debt_totAssets + quoted + I(PEbacked*debt_totAssets) + I(log(ageFirm))+ PEbacked, 
                  data = dataReg, model="random", index=c("id","year"))
vcovTemp =vcovHC(modelPlm_2, type="HC0",cluster="group") #getting the vcov
modelPlm_2$vcov <- vcovTemp

modelPlm_3 <- plm(zmij ~  I(log(abs(turn)+1)) + etma + quoted + I(log(ageFirm))+ ic + PEbacked, 
                  data = dataReg, model="random", index=c("id","year"))
vcovTemp =vcovHC(modelPlm_3, type="HC0",cluster="group") #getting the vcov
modelPlm_3$vcov <- vcovTemp

modelPlm_4 <- plm(zmij ~ I(log(abs(turn)+1)) + etma + quoted + ic + I(PEbacked*ic) + I(log(ageFirm))+ PEbacked, 
                  data = dataReg, model="random", index=c("id","year"))
vcovTemp =vcovHC(modelPlm_4, type="HC0",cluster="group") #getting the vcov
modelPlm_4$vcov <- vcovTemp

estclear() # clear all the eststo objects
eststo(modelOls) 
eststo(modelPlm)
eststo(modelPlm_1.5)
eststo(modelPlm_2)
eststo(modelPlm_3)
eststo(modelPlm_4)
esttab(filename = "test22_06_3",csv=T) # print it

#####
#####

modelOlsPE <- lm(zmij ~ debt_totAssets + I(log(abs(turn)+1))+ etma + PEbacked2, 
                 data = subset(dataReg, treatControl == 1))

modelPlmPE <- plm(zmij ~ debt_totAssets + I(log(abs(turn)+1))+ etma + PEbacked2, 
                  data = subset(dataReg, treatControl == 1), model= "within", index = c("id","year"))
vcovTemp =vcovHC(modelPlmPE, type="HC0",cluster="group") #getting the vcov
modelPlmPE$vcov <- vcovTemp

modelPlmPE_2 <- plm(zmij ~ debt_totAssets + I(log(abs(turn)+1))+ etma + PEbacked2 + experienced, 
                    data = subset(dataReg, treatControl == 1), model= "within", index = c("id","year"))
vcovTemp =vcovHC(modelPlmPE_2, type="HC0",cluster="group") #getting the vcov
modelPlmPE_2$vcov <- vcovTemp

modelPlmPE_3 <- plm(zmij ~ debt_totAssets + I(log(abs(turn)+1))+ etma + PEbacked2 +  favourable, 
                    data = subset(dataReg, treatControl == 1), model= "within", index = c("id","year"))
vcovTemp =vcovHC(modelPlmPE_3, type="HC0",cluster="group") #getting the vcov
modelPlmPE_3$vcov <- vcovTemp

modelPlmPE_3_2 <- plm(zmij ~ debt_totAssets + I(log(abs(turn)+1))+ etma + PEbacked2 +  I(log(ageFirm)), 
                      data = subset(dataReg, treatControl == 1), model= "random", index = c("id","year"))
vcovTemp =vcovHC(modelPlmPE_3_2, type="HC0",cluster="group") #getting the vcov
modelPlmPE_3_2$vcov <- vcovTemp

modelPlmPE_4 <- plm(zmij ~ debt_totAssets + I(log(abs(turn)+1))+ etma + PEbacked2 + difYear_pePositive, 
                    data = subset(dataReg, treatControl == 1), model= "within", index = c("id","year"))
vcovTemp =vcovHC(modelPlmPE_4, type="HC0",cluster="group") #getting the vcov
modelPlmPE_4$vcov <- vcovTemp

modelPlmPE_5 <- plm(zmij ~ debt_totAssets + I(log(abs(turn)+1))+ etma + difYear_pePositive, 
                    data = subset(dataReg, treatControl == 1), model= "within", index = c("id","year"))
vcovTemp =vcovHC(modelPlmPE_5, type="HC0",cluster="group") #getting the vcov
modelPlmPE_5$vcov <- vcovTemp

estclear() # clear all the eststo objects
eststo(modelOlsPE) 
eststo(modelPlmPE)
eststo(modelPlmPE_2)
eststo(modelPlmPE_3)
eststo(modelPlmPE_3_2)
eststo(modelPlmPE_4)
eststo(modelPlmPE_5)
esttab(filename = "test22_06_PE_3",csv=T) # print it

################dataReg2


modelOls2 <- lm(zmij ~ debt_totAssets + I(log(abs(turn)+1))+ etma + quoted + I(log(ageFirm)) + PEbacked, data = dataReg2)

modelPlm2 <- plm(zmij ~ I(log(abs(turn)+1)) + etma +debt_totAssets + quoted + I(log(ageFirm))+ PEbacked, 
                 data = dataReg2, model="random", index=c("id","year"))
vcovTemp =vcovHC(modelPlm2, type="HC0",cluster="group") #getting the vcov
modelPlm2$vcov <- vcovTemp

modelPlm2_1.5 <- plm(zmij ~ I(log(abs(turn)+1)) + etma +debt_totAssets + quoted + I(log(ageFirm))+ PEbacked, 
                     data = dataReg2, model="random", effect = "time", index=c("id","year"))
vcovTemp =vcovHC(modelPlm2_1.5, type="HC0",cluster="group") #getting the vcov
modelPlm2_1.5$vcov <- vcovTemp

modelPlm2_2 <- plm(zmij ~ I(log(abs(turn)+1)) + etma +debt_totAssets + quoted+ I(log(ageFirm)) + I(PEbacked*debt_totAssets) + PEbacked, 
                   data = dataReg2, model="random", index=c("id","year"))
vcovTemp =vcovHC(modelPlm2_2, type="HC0",cluster="group") #getting the vcov
modelPlm2_2$vcov <- vcovTemp

modelPlm2_3 <- plm(zmij ~ I(log(abs(turn)+1)) + etma + quoted + ic + I(log(ageFirm))+ PEbacked, 
                   data = dataReg2, model="random", index=c("id","year"))
vcovTemp =vcovHC(modelPlm2_3, type="HC0",cluster="group") #getting the vcov
modelPlm2_3$vcov <- vcovTemp

modelPlm2_4 <- plm(zmij ~ I(log(abs(turn)+1)) + etma + quoted + ic + I(PEbacked*ic) + I(log(ageFirm))+ PEbacked, 
                   data = dataReg2, model="random", index=c("id","year"))
vcovTemp =vcovHC(modelPlm2_4, type="HC0",cluster="group") #getting the vcov
modelPlm2_4$vcov <- vcovTemp

estclear() # clear all the eststo objects
eststo(modelOls2) 
eststo(modelPlm2)
eststo(modelPlm2_1.5)
eststo(modelPlm2_2)
eststo(modelPlm2_3)
eststo(modelPlm2_4)
esttab(filename = "test22_06_3_2",csv=T) # print it

#===
modelOlsPE2 <- lm(zmij ~ debt_totAssets + I(log(abs(turn)+1))+ etma + PEbacked2, 
                  data = subset(dataReg2, treatControl == 1))

modelPlmPE2 <- plm(zmij ~ debt_totAssets + I(log(abs(turn)+1))+ etma + PEbacked2, 
                   data = subset(dataReg2, treatControl == 1), model= "within", index = c("id","year"))
vcovTemp =vcovHC(modelPlmPE2, type="HC0",cluster="group") #getting the vcov
modelPlmPE2$vcov <- vcovTemp

modelPlmPE2_2 <- plm(zmij ~ debt_totAssets + I(log(abs(turn)+1))+ etma + PEbacked2 + experienced, 
                     data = subset(dataReg2, treatControl == 1), model= "within", index = c("id","year"))
vcovTemp =vcovHC(modelPlmPE2_2, type="HC0",cluster="group") #getting the vcov
modelPlmPE2_2$vcov <- vcovTemp

modelPlmPE2_3 <- plm(zmij ~ debt_totAssets + I(log(abs(turn)+1))+ etma + PEbacked2 +  favourable, 
                     data = subset(dataReg2, treatControl == 1), model= "within", index = c("id","year"))
vcovTemp =vcovHC(modelPlmPE2_3, type="HC0",cluster="group") #getting the vcov
modelPlmPE2_3$vcov <- vcovTemp

modelPlmPE2_3_2 <- plm(zmij ~ debt_totAssets + I(log(abs(turn)+1))+ etma + PEbacked2 +  I(log(ageFirm)), 
                       data = subset(dataReg2, treatControl == 1), model= "random", index = c("id","year"))
vcovTemp =vcovHC(modelPlmPE2_3_2, type="HC0",cluster="group") #getting the vcov
modelPlmPE2_3_2$vcov <- vcovTemp

modelPlmPE2_4 <- plm(zmij ~ debt_totAssets + I(log(abs(turn)+1))+ etma + PEbacked2 + difYear_pePositive, 
                     data = subset(dataReg2, treatControl == 1), model= "within", index = c("id","year"))
vcovTemp =vcovHC(modelPlmPE2_4, type="HC0",cluster="group") #getting the vcov
modelPlmPE2_4$vcov <- vcovTemp

modelPlmPE2_5 <- plm(zmij ~ debt_totAssets + I(log(abs(turn)+1))+ etma + difYear_pePositive, 
                     data = subset(dataReg2, treatControl == 1), model= "within", index = c("id","year"))
vcovTemp =vcovHC(modelPlmPE2_5, type="HC0",cluster="group") #getting the vcov
modelPlmPE2_5$vcov <- vcovTemp

estclear() # clear all the eststo objects
eststo(modelOlsPE2) 
eststo(modelPlmPE2)
eststo(modelPlmPE2_2)
eststo(modelPlmPE2_3)
eststo(modelPlmPE2_3_2)
eststo(modelPlmPE2_4)
eststo(modelPlmPE2_5)
esttab(filename = "test22_06_PE_3_2",csv=T) # print it



##########
##########


######Trial of extra logit regressions plus the panel data fixed effects


install.packages("glmmML")
library(glmmML)
logitfemod1 <- glmmboot(bankDis ~ debt_totAssets  +  I(log(Total.assets))+ PEbacked,
                        family=binomial(link="logit"), data=dA2, cluster=BvD.ID.number)#not working
summary(logitfemod1)#not working!!!!


library(pglm)

po.logit <- pglm(bankDis ~ debt_totAssets  + I(log(toas)) + PEbacked, 
                 model=("pooling"), effect=("individual"), index=c("id", "year"), 
                 family=binomial(link="logit"),  data=dataReg)

summary(po.logit)#works ??

po.logit2 <- pglm(bankDis ~ debt_totAssets  + I(log(Total.assets)) + PEbacked + Zscore, 
                  model=("pooling"), effect=("individual"), index=c("id", "year"), 
                  family=binomial(link="logit"),  data=dataReg)
summary(po.logit2)
po.logit3 <- pglm(bankDis ~ debt_totAssets  + I(log(Total.assets)) + PEbacked + Interest.cover..x., 
                  model=("pooling"), effect=("individual"), index=c("id", "year"), 
                  family=binomial(link="logit"),  data=dA2_2)
summary(po.logit3)

po.logit4 <- pglm(bankDis ~ debt_totAssets  + I(log(Total.assets)) + PEbacked + Interest.cover..x.+ spread, 
                  model=("pooling"), effect=("individual"), index=c("id", "year"), 
                  family=binomial(link="logit"),  data=dA2_3)
summary(po.logit4)

ra.logit <- pglm(bankDis ~ debt_totAssets  + I(log(Total.assets)) + PEbacked, 
                 model=("between"), effect=("individual"), index=c("id", "year"), 
                 family=ordinal(link="logit"),  data=dA2_2)

Part IX. Additional tables that are present in the thesis

#=============tables

tableZscores <- rbind(
    summary(subset(dataReg, difYear_pe==-1)$Zscore, na.rm=T),
    summary(subset(dataReg, difYear_pe==0)$Zscore, na.rm=T),
    summary(subset(dataReg, difYear_pe==1)$Zscore, na.rm=T),
    summary(subset(dataReg, difYear_pe==2)$Zscore, na.rm=T))
tableZscores2 <- rbind(
    summary(subset(dataReg2, difYear_pe==-1)$Zscore, na.rm=T),
    summary(subset(dataReg2, difYear_pe==0)$Zscore, na.rm=T),
    summary(subset(dataReg2, difYear_pe==1)$Zscore, na.rm=T),
    summary(subset(dataReg2, difYear_pe==2)$Zscore, na.rm=T))
write.csv(cbind(tableZscores,tableZscores2), file="tableZscoresAll.csv")
#========= to check if I have to use dataReg or dataZ
countryTar <- as.data.frame(table( subset(dataReg, difYear_pe==0)$Target.country.code))
table1 <- countryTar[order(countryTar[,2], decreasing = T),][1:15,]
countryAcq <- as.data.frame(table( subset(dataReg, difYear_pe==0)$Acquiror.country.code))
table2 <- countryAcq[order(countryAcq[,2], decreasing = T),][1:15,]
countryTar2 <- as.data.frame(table( subset(dataReg2, difYear_pe==0)$Target.country.code))
table1_2 <- countryTar2[order(countryTar2[,2], decreasing = T),][1:15,]
countryAcq2 <- as.data.frame(table( subset(dataReg2, difYear_pe==0)$Acquiror.country.code))
table2_2 <- countryAcq2[order(countryAcq2[,2], decreasing = T),][1:15,]
tableAll <-as.data.frame(cbind(table1,table2,table1_2,table2_2))
#===========
countryTar <- as.data.frame(table( (dataZ)$Target.country.code))
table1 <- countryTar[order(countryTar[,2], decreasing = T),][1:15,]
countryAcq <- as.data.frame(table( (dataZ)$Acquiror.country.code))
table2 <- countryAcq[order(countryAcq[,2], decreasing = T),][1:15,]
countryTar2 <- as.data.frame(table( (dataZ2)$Target.country.code))
table1_2 <- countryTar2[order(countryTar2[,2], decreasing = T),][1:15,]
countryAcq2 <- as.data.frame(table( (dataZ2)$Acquiror.country.code))
table2_2 <- countryAcq2[order(countryAcq2[,2], decreasing = T),][1:15,]
tableAll <-as.data.frame(cbind(table1,table2,table1_2,table2_2))
write.csv(tableAll, file="tableAllcountryOrigin.csv")
#===========
industrySIC<-as.data.frame(table(as.factor(dataZ$Target.primary.US.SIC.description)))
industrySIC <- industrySIC[order(industrySIC[,2], decreasing = T),][1:12,]
names(industrySIC)<-c("SIC industry description", "Number of deals")

industrySIC2<-as.data.frame(table(as.factor(dataZ2$Target.primary.US.SIC.description)))
industrySIC2 <- industrySIC2[order(industrySIC2[,2], decreasing = T),][1:12,]
names(industrySIC)<-c("SIC industry description", "Number of deals")
cbind(industrySIC, industrySIC2)
write.csv(cbind(industrySIC, industrySIC2), file = "industrySIC_test.csv")
###
industrySIC<-as.data.frame(table(as.factor(dataZ$Target.primary.NACE.Rev.2.description)))
industrySIC <- industrySIC[order(industrySIC[,2], decreasing = T),][1:12,]
names(industrySIC)<-c("SIC industry description", "Number of deals")

industrySIC2<-as.data.frame(table(as.factor(dataZ2$Target.primary.NACE.Rev.2.description)))
industrySIC2 <- industrySIC2[order(industrySIC2[,2], decreasing = T),][1:12,]
names(industrySIC)<-c("SIC industry description", "Number of deals")
cbind(industrySIC, industrySIC2)
write.csv(cbind(industrySIC, industrySIC2), file = "industryNACErev2.csv")

#============

tableSum <- data.frame(rbind(
    summary(abs(subset(dataReg, difYear_pe==-1)$toas), na.rm=T),
    summary(subset(dataReg, difYear_pe==-1)$ebta, na.rm=T),
    summary(subset(dataReg, difYear_pe==-1)$turn, na.rm=T),
    summary(subset(dataReg, difYear_pe==-1)$enva, na.rm=T),
    summary(subset(dataReg, difYear_pe==-1)$ic, na.rm=T),
    summary(subset(dataReg, difYear_pe==-1)$debt_totAssets, na.rm=T),
    summary(subset(dataZ)$Acquired.stake...., na.rm=T),
    
    summary(abs(subset(dataReg2, difYear_pe==-1)$toas), na.rm=T),
    summary(subset(dataReg2, difYear_pe==-1)$ebta, na.rm=T),
    summary(subset(dataReg2, difYear_pe==-1)$turn, na.rm=T),
    summary(subset(dataReg2, difYear_pe==-1)$enva, na.rm=T),
    summary(subset(dataReg2, difYear_pe==-1)$ic, na.rm=T),
    summary(subset(dataReg2, difYear_pe==-1)$debt_totAssets, na.rm=T),
    summary(subset(dataZ2)$Acquired.stake...., na.rm=T)
))

write.csv(tableSum, file="tableSum2.csv")
#===================table treatment and controls

dataMatchL <- dataA
dataMatchL$debt_TotalAssets<- (dataMatchL$ncli +dataMatchL$loan)/dataMatchL$toas

## changing from long to wide format
x<-reshape(dataMatchL[,c("idnr","closdate_year","toas")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$aveTotal.assets<- apply(x[,2:ncol(x)],1,function(y) mean(y,na.rm=T))
dataMatch <- x[,c("idnr","aveTotal.assets")]
x<-reshape(dataMatchL[,c("idnr","closdate_year","ebta")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$ebitda<- apply(x[,2:ncol(x)],1,function(y) mean(y,na.rm=T))
dataMatch$ebitda <- x$ebitda
x<-reshape(dataMatchL[,c("idnr","closdate_year","etma")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$ebitdaMargin<- apply(x[,2:ncol(x)],1,function(y) mean(y,na.rm=T))
dataMatch$ebitdaMargin <- x$ebitdaMargin
x<-reshape(dataMatchL[,c("idnr","closdate_year","debt_TotalAssets")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$debt_TotalAssets<- apply(x[,2:ncol(x)],1,function(y) mean(y,na.rm=T))
dataMatch$debt_TotalAssets <- x$debt_TotalAssets

dataMatch[(!is.finite(dataMatch$debt_TotalAssets)), "debt_TotalAssets"]<- "NA"
dataMatch$debt_TotalAssets <-as.numeric(dataMatch$debt_TotalAssets)#making it back to numeric
dataMatch$debt_TotalAssets <- winsorMy(dataMatch$debt_TotalAssets,.001)

dataA_median <-apply(dataMatch[2:5],2,function(y) median(y,na.rm=T))
dataA_mean <-apply(dataMatch[2:5],2,function(y) mean(y,na.rm=T))
###


dataMatchL <-dataAcontrols
dataMatchL$debt_TotalAssets<- (dataMatchL$ncli +dataMatchL$loan)/dataMatchL$toas



## chanhing from long to wide format
x<-reshape(dataMatchL[,c("idnr","closdate_year","toas")] , idvar = "idnr",timevar = "closdate_year", direction="wide")
x$aveTotal.assets<- apply(x[,2:ncol(x)],1,function(y) mean(y,na.rm=T))
dataMatch <- x[,c("idnr","aveTotal.assets")]
x<-reshape(dataMatchL[,c("idnr","closdate_year","ebta")] , idvar = "idnr",timevar = "closdate_year", direction="wide")
x$ebitda<- apply(x[,2:ncol(x)],1,function(y) mean(y,na.rm=T))
dataMatch$ebitda <- x$ebitda
x<-reshape(dataMatchL[,c("idnr","closdate_year","etma")] , idvar = "idnr",timevar = "closdate_year", direction="wide")
x$ebitdaMargin<- apply(x[,2:ncol(x)],1,function(y) mean(y,na.rm=T))
dataMatch$ebitdaMargin <- x$ebitdaMargin
x<-reshape(dataMatchL[,c("idnr","closdate_year","debt_TotalAssets")] , idvar = "idnr",timevar = "closdate_year", direction="wide")
x$debt_TotalAssets<- apply(x[,2:ncol(x)],1,function(y) mean(y,na.rm=T))
dataMatch$debt_TotalAssets <- x$debt_TotalAssets

dataMatch[(!is.finite(dataMatch$debt_TotalAssets)), "debt_TotalAssets"]<- "NA"
dataMatch$debt_TotalAssets <-as.numeric(dataMatch$debt_TotalAssets)#making it back to numeric 
dataMatch$debt_TotalAssets <- winsorMy(dataMatch$debt_TotalAssets,.001)


dataAcontrols_median <-apply(dataMatch[2:5],2,function(y) median(y,na.rm=T))
dataAcontrols_mean <-apply(dataMatch[2:5],2,function(y) mean(y,na.rm=T))

#-----
dataMatchL <- dataA2
dataMatchL$debt_TotalAssets<- (dataMatchL$ncli +dataMatchL$loan)/dataMatchL$toas

## chanhing from long to wide format
x<-reshape(dataMatchL[,c("idnr","closdate_year","toas")] , idvar = "idnr",timevar = "closdate_year", direction="wide")
x$aveTotal.assets<- apply(x[,2:ncol(x)],1,function(y) mean(y,na.rm=T))
dataMatch <- x[,c("idnr","aveTotal.assets")]
x<-reshape(dataMatchL[,c("idnr","closdate_year","ebta")] , idvar = "idnr",timevar = "closdate_year", direction="wide")
x$ebitda<- apply(x[,2:ncol(x)],1,function(y) mean(y,na.rm=T))
dataMatch$ebitda <- x$ebitda
x<-reshape(dataMatchL[,c("idnr","closdate_year","etma")] , idvar = "idnr",timevar = "closdate_year", direction="wide")
x$ebitdaMargin<- apply(x[,2:ncol(x)],1,function(y) mean(y,na.rm=T))
dataMatch$ebitdaMargin <- x$ebitdaMargin
x<-reshape(dataMatchL[,c("idnr","closdate_year","debt_TotalAssets")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$debt_TotalAssets<- apply(x[,2:ncol(x)],1,function(y) mean(y,na.rm=T))
dataMatch$debt_TotalAssets <- x$debt_TotalAssets

dataMatch[(!is.finite(dataMatch$debt_TotalAssets)), "debt_TotalAssets"]<- "NA"
dataMatch$debt_TotalAssets <-as.numeric(dataMatch$debt_TotalAssets)#making it back to numeric
dataMatch$debt_TotalAssets <- winsorMy(dataMatch$debt_TotalAssets,.001)

dataA2_median <-apply(dataMatch[2:5],2,function(y) median(y,na.rm=T))
dataA2_mean <-apply(dataMatch[2:5],2,function(y) mean(y,na.rm=T))
###


dataMatchL <-dataA2controls
dataMatchL$debt_TotalAssets<- (dataMatchL$ncli +dataMatchL$loan)/dataMatchL$toas

## chanhing from long to wide format
x<-reshape(dataMatchL[,c("idnr","closdate_year","toas")] , idvar = "idnr",timevar = "closdate_year", direction="wide")
x$aveTotal.assets<- apply(x[,2:ncol(x)],1,function(y) mean(y,na.rm=T))
dataMatch <- x[,c("idnr","aveTotal.assets")]
x<-reshape(dataMatchL[,c("idnr","closdate_year","ebta")] , idvar = "idnr",timevar = "closdate_year", direction="wide")
x$ebitda<- apply(x[,2:ncol(x)],1,function(y) mean(y,na.rm=T))
dataMatch$ebitda <- x$ebitda
x<-reshape(dataMatchL[,c("idnr","closdate_year","etma")] , idvar = "idnr",timevar = "closdate_year", direction="wide")
x$ebitdaMargin<- apply(x[,2:ncol(x)],1,function(y) mean(y,na.rm=T))
dataMatch$ebitdaMargin <- x$ebitdaMargin
x<-reshape(dataMatchL[,c("idnr","closdate_year","debt_TotalAssets")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$debt_TotalAssets<- apply(x[,2:ncol(x)],1,function(y) mean(y,na.rm=T))
dataMatch$debt_TotalAssets <- x$debt_TotalAssets

dataMatch[(!is.finite(dataMatch$debt_TotalAssets)), "debt_TotalAssets"]<- "NA"
dataMatch$debt_TotalAssets <-as.numeric(dataMatch$debt_TotalAssets)#making it back to numeric  
dataMatch$debt_TotalAssets <- winsorMy(dataMatch$debt_TotalAssets,.001)


dataA2controls_median <-apply(dataMatch[2:5],2,function(y) median(y,na.rm=T))
dataA2controls_mean <-apply(dataMatch[2:5],2,function(y) mean(y,na.rm=T))

tableTreatControl <- as.data.frame(round(t(rbind(dataA_median, dataAcontrols_median, dataA_mean, dataAcontrols_mean,
                                                 dataA2_median, dataA2controls_median, dataA2_mean, dataA2controls_mean)),2))

write.csv(tableTreatControl, file="tableTreatControl.csv")
write.csv(as.data.frame(round(t(rbind(dataA_median, dataAcontrols_median, dataA_mean, dataAcontrols_mean)),2)),
          file= "tableTreatControl.csv")
write.csv(as.data.frame(round(t(rbind(dataA2_median, dataA2controls_median, dataA2_mean, dataA2controls_mean)),2)),
          file= "tableTreatControl2.csv")

rm(dataA_median, dataAcontrols_median, dataA_mean, dataAcontrols_mean,
   dataA2_median, dataA2controls_median, dataA2_mean, dataA2controls_mean)

#================

Part X. Constructing the control group

#================
### Constructing the control group // Begin //


#----digit 1
amadeusDigit1 <- read.dta("amadeusDigit1.dta")
amadeusDigit1$debt_TotalAssets<- (amadeusDigit1$ncli +amadeusDigit1$loan)/amadeusDigit1$toas

dataMatchL <- amadeusDigit1; rm(amadeusDigit1)
sum(dataZ$Target.BvD.ID.number %in% dataMatchL$idnr)

## chanhing from long to wide format
x<-reshape(dataMatchL[,1:3] , idvar = "idnr",timevar = "closdate_year", direction="wide")
x$aveTotal.assets<- apply(x[,2:18],1,function(y) mean(y,na.rm=T))
dataMatch <- x[,c("idnr","aveTotal.assets")]
x<-reshape(dataMatchL[,c("idnr","closdate_year","ebta")] , idvar = "idnr",timevar = "closdate_year", direction="wide")
x$ebitda<- apply(x[,2:18],1,function(y) mean(y,na.rm=T))
dataMatch$ebitda <- x$ebitda
x<-reshape(dataMatchL[,c("idnr","closdate_year","etma")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#
x$ebitdaMargin<- apply(x[,2:18],1,function(y) mean(y,na.rm=T))
dataMatch$ebitdaMargin <- x$ebitdaMargin
x<-reshape(dataMatchL[,c("idnr","closdate_year","debt_TotalAssets")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$debt_TotalAssets<- apply(x[,2:18],1,function(y) mean(y,na.rm=T))
dataMatch$debt_TotalAssets <- x$debt_TotalAssets

dataMatch[(!is.finite(dataMatch$debt_TotalAssets)), "debt_TotalAssets"]<- "NA"
dataMatch$debt_TotalAssets <-as.numeric(dataMatch$debt_TotalAssets)#making it back to numeric

dataMatch$pe <- dataMatch$idnr %in% dataZ$Target.BvD.ID.number
numUnique(subset(dataMatch, pe== TRUE)$idnr)#204
dataMatch <- na.omit(dataMatch)

model_match <- matchit(pe ~ aveTotal.assets + ebitda + debt_TotalAssets + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 1.5)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 2)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda, data = dataMatch,  method = "nearest", ratio = 2)

M <-match.data(model_match)
median(subset(M,pe==1)$aveTotal.assets)
median(subset(M,pe==0)$aveTotal.assets)
mean(subset(M,pe==1)$debt_TotalAssets)
mean(subset(M,pe==0)$debt_TotalAssets)
mean(subset(M,pe==1)$ebitda)
mean(subset(M,pe==0)$ebitda)
length(subset(M, pe==0)$idnr)

write(subset(M, pe==0)$idnr, file= "controlsZdigit1.txt")

#same but with dataZ2 -> SBOs
dataMatch$pe <- dataMatch$idnr %in% dataZ$Target.BvD.ID.number
numUnique(subset(dataMatch, pe== TRUE)$idnr)#274

model_match <- matchit(pe ~ aveTotal.assets + ebitda + debt_TotalAssets + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 1.5)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 2)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda, data = dataMatch,  method = "nearest", ratio = 2)

M <-match.data(model_match)
median(subset(M,pe==1)$aveTotal.assets)
median(subset(M,pe==0)$aveTotal.assets)
mean(subset(M,pe==1)$debt_TotalAssets)
mean(subset(M,pe==0)$debt_TotalAssets)
mean(subset(M,pe==1)$ebitda)
mean(subset(M,pe==0)$ebitda)
mean(subset(M,pe==1)$ebitdaMargin)
mean(subset(M,pe==0)$ebitdaMargin)


write(subset(M, pe==0)$idnr, file = "controlsZ2digit1.txt")

##############Digit 2
amadeusDigit2 <- read.dta("amadeusDigit2.dta")
amadeusDigit2$debt_TotalAssets<- (amadeusDigit2$ncli +amadeusDigit2$loan)/amadeusDigit2$toas

dataMatchL <- amadeusDigit2; rm(amadeusDigit2)
sum(dataZ$Target.BvD.ID.number %in% dataMatchL$idnr)

## chanhing from long to wide format
x<-reshape(dataMatchL[,1:3] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$aveTotal.assets<- apply(x[,2:18],1,function(y) mean(y,na.rm=T))
dataMatch <- x[,c("idnr","aveTotal.assets")]
x<-reshape(dataMatchL[,c("idnr","closdate_year","ebta")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$ebitda<- apply(x[,2:18],1,function(y) mean(y,na.rm=T))
dataMatch$ebitda <- x$ebitda
x<-reshape(dataMatchL[,c("idnr","closdate_year","etma")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$ebitdaMargin<- apply(x[,2:18],1,function(y) mean(y,na.rm=T))
dataMatch$ebitdaMargin <- x$ebitdaMargin
x<-reshape(dataMatchL[,c("idnr","closdate_year","debt_TotalAssets")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$debt_TotalAssets<- apply(x[,2:18],1,function(y) mean(y,na.rm=T))
dataMatch$debt_TotalAssets <- x$debt_TotalAssets

dataMatch[(!is.finite(dataMatch$debt_TotalAssets)), "debt_TotalAssets"]<- "NA"
dataMatch$debt_TotalAssets <-as.numeric(dataMatch$debt_TotalAssets)#making it back to numeric

dataMatch$pe <- dataMatch$idnr %in% dataZ$Target.BvD.ID.number
numUnique(subset(dataMatch, pe== TRUE)$idnr)#673
dataMatch <- na.omit(dataMatch)

model_match <- matchit(pe ~ aveTotal.assets + ebitda + debt_TotalAssets + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 1.5)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 2)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda, data = dataMatch,  method = "nearest", ratio = 2)

M <-match.data(model_match)
median(subset(M,pe==1)$aveTotal.assets)
median(subset(M,pe==0)$aveTotal.assets)
mean(subset(M,pe==1)$debt_TotalAssets)
mean(subset(M,pe==0)$debt_TotalAssets)
mean(subset(M,pe==1)$ebitda)
mean(subset(M,pe==0)$ebitda)
length(subset(M, pe==0)$idnr)

write(subset(M, pe==0)$idnr, file= "controlsZdigit2.txt")

#same but with dataZ2 -> SBOs
dataMatch$pe <- dataMatch$idnr %in% dataZ2$Target.BvD.ID.number
numUnique(subset(dataMatch, pe== TRUE)$idnr)#160

model_match <- matchit(pe ~ aveTotal.assets + ebitda + debt_TotalAssets + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 1.5)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 2)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda, data = dataMatch,  method = "nearest", ratio = 2)

M <-match.data(model_match)
median(subset(M,pe==1)$aveTotal.assets)
median(subset(M,pe==0)$aveTotal.assets)
mean(subset(M,pe==1)$debt_TotalAssets)
mean(subset(M,pe==0)$debt_TotalAssets)
mean(subset(M,pe==1)$ebitda)
mean(subset(M,pe==0)$ebitda)
mean(subset(M,pe==1)$ebitdaMargin)
mean(subset(M,pe==0)$ebitdaMargin)


write(subset(M, pe==0)$idnr, file = "controlsZ2digit2.txt")


############Digit3
amadeusDigit3 <- read.dta("amadeusDigit3.dta")
amadeusDigit3$debt_TotalAssets<- (amadeusDigit3$ncli +amadeusDigit3$loan)/amadeusDigit3$toas

dataMatchL <- amadeusDigit3; rm(amadeusDigit3)

## chanhing from long to wide format
x<-reshape(dataMatchL[,1:3] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$aveTotal.assets<- apply(x[,2:18],1,function(y) mean(y,na.rm=T))
dataMatch <- x[,c("idnr","aveTotal.assets")]
x<-reshape(dataMatchL[,c("idnr","closdate_year","ebta")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$ebitda<- apply(x[,2:18],1,function(y) mean(y,na.rm=T))
dataMatch$ebitda <- x$ebitda
x<-reshape(dataMatchL[,c("idnr","closdate_year","etma")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$ebitdaMargin<- apply(x[,2:18],1,function(y) mean(y,na.rm=T))
dataMatch$ebitdaMargin <- x$ebitdaMargin
x<-reshape(dataMatchL[,c("idnr","closdate_year","debt_TotalAssets")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$debt_TotalAssets<- apply(x[,2:18],1,function(y) mean(y,na.rm=T))
dataMatch$debt_TotalAssets <- x$debt_TotalAssets

dataMatch[(!is.finite(dataMatch$debt_TotalAssets)), "debt_TotalAssets"]<- "NA"
dataMatch$debt_TotalAssets <-as.numeric(dataMatch$debt_TotalAssets)#making it back to numeric

dataMatch$pe <- dataMatch$idnr %in% dataZ$Target.BvD.ID.number
numUnique(subset(dataMatch, pe== TRUE)$idnr)#180
dataMatch <- na.omit(dataMatch)

model_match <- matchit(pe ~ aveTotal.assets + ebitda + debt_TotalAssets + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 1.5)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 2)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda, data = dataMatch,  method = "nearest", ratio = 2)

M <-match.data(model_match)
median(subset(M,pe==1)$aveTotal.assets)
median(subset(M,pe==0)$aveTotal.assets)
mean(subset(M,pe==1)$debt_TotalAssets)
mean(subset(M,pe==0)$debt_TotalAssets)
mean(subset(M,pe==1)$ebitda)
mean(subset(M,pe==0)$ebitda)
mean(subset(M,pe==1)$ebitdaMargin)
mean(subset(M,pe==0)$ebitdaMargin)
length(subset(M, pe==0)$idnr)

write(subset(M, pe==0)$idnr, file= "controlsZdigit3.txt")

#same but with dataZ2 -> SBOs
dataMatch$pe <- dataMatch$idnr %in% dataZ2$Target.BvD.ID.number
numUnique(subset(dataMatch, pe== TRUE)$idnr)#49

model_match <- matchit(pe ~ aveTotal.assets + ebitda + debt_TotalAssets + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 1.5)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 2)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda, data = dataMatch,  method = "nearest", ratio = 2)

M <-match.data(model_match)
median(subset(M,pe==1)$aveTotal.assets)
median(subset(M,pe==0)$aveTotal.assets)
mean(subset(M,pe==1)$debt_TotalAssets)
mean(subset(M,pe==0)$debt_TotalAssets)
mean(subset(M,pe==1)$ebitda)
mean(subset(M,pe==0)$ebitda)
mean(subset(M,pe==1)$ebitdaMargin)
mean(subset(M,pe==0)$ebitdaMargin)


write(subset(M, pe==0)$idnr, file = "controlsZ2digit3.txt")

############Digit4
amadeusDigit4 <- read.dta("amadeusDigit4.dta")
amadeusDigit4$debt_TotalAssets<- (amadeusDigit4$ncli +amadeusDigit4$loan)/amadeusDigit4$toas

dataMatchL <- amadeusDigit4; rm(amadeusDigit4)

## chanhing from long to wide format
x<-reshape(dataMatchL[,1:3] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$aveTotal.assets<- apply(x[,2:18],1,function(y) mean(y,na.rm=T))
dataMatch <- x[,c("idnr","aveTotal.assets")]
x<-reshape(dataMatchL[,c("idnr","closdate_year","ebta")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$ebitda<- apply(x[,2:18],1,function(y) mean(y,na.rm=T))
dataMatch$ebitda <- x$ebitda
x<-reshape(dataMatchL[,c("idnr","closdate_year","etma")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$ebitdaMargin<- apply(x[,2:18],1,function(y) mean(y,na.rm=T))
dataMatch$ebitdaMargin <- x$ebitdaMargin
x<-reshape(dataMatchL[,c("idnr","closdate_year","debt_TotalAssets")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$debt_TotalAssets<- apply(x[,2:18],1,function(y) mean(y,na.rm=T))
dataMatch$debt_TotalAssets <- x$debt_TotalAssets

dataMatch[(!is.finite(dataMatch$debt_TotalAssets)), "debt_TotalAssets"]<- "NA"
dataMatch$debt_TotalAssets <-as.numeric(dataMatch$debt_TotalAssets)#making it back to numeric

dataMatch$pe <- dataMatch$idnr %in% dataZ$Target.BvD.ID.number
numUnique(subset(dataMatch, pe== TRUE)$idnr)#612
dataMatch <- na.omit(dataMatch)

model_match <- matchit(pe ~ aveTotal.assets + ebitda + debt_TotalAssets + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 1.5)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 2)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda, data = dataMatch,  method = "nearest", ratio = 2)

M <-match.data(model_match)
median(subset(M,pe==1)$aveTotal.assets)
median(subset(M,pe==0)$aveTotal.assets)
mean(subset(M,pe==1)$debt_TotalAssets)
mean(subset(M,pe==0)$debt_TotalAssets)
mean(subset(M,pe==1)$ebitda)
mean(subset(M,pe==0)$ebitda)
mean(subset(M,pe==1)$ebitdaMargin)
mean(subset(M,pe==0)$ebitdaMargin)
length(subset(M, pe==0)$idnr)

write(subset(M, pe==0)$idnr, file= "controlsZdigit4.txt")

#same but with dataZ2 -> SBOs

dataMatch$pe <- dataMatch$idnr %in% dataZ2$Target.BvD.ID.number
numUnique(subset(dataMatch, pe== TRUE)$idnr)#178

model_match <- matchit(pe ~ aveTotal.assets + ebitda + debt_TotalAssets + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 1.5)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 2)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda, data = dataMatch,  method = "nearest", ratio = 2)

M <-match.data(model_match)
median(subset(M,pe==1)$aveTotal.assets)
median(subset(M,pe==0)$aveTotal.assets)
mean(subset(M,pe==1)$debt_TotalAssets)
mean(subset(M,pe==0)$debt_TotalAssets)
mean(subset(M,pe==1)$ebitda)
mean(subset(M,pe==0)$ebitda)
mean(subset(M,pe==1)$ebitdaMargin)
mean(subset(M,pe==0)$ebitdaMargin)


write(subset(M, pe==0)$idnr, file = "controlsZ2digit4.txt")
write.csv(dataMatch, file="dataMatchDigit4.csv")
## for Z2 the debt ratio controls is .07 compared to .35 for targets//to check later

############Digit5
amadeusDigit5 <- read.dta("amadeusDigit5.dta")
amadeusDigit5$debt_TotalAssets<- (amadeusDigit5$ncli +amadeusDigit5$loan)/amadeusDigit5$toas

dataMatchL <- amadeusDigit5; rm(amadeusDigit5)

## chanhing from long to wide format
x<-reshape(dataMatchL[,1:3] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$aveTotal.assets<- apply(x[,2:18],1,function(y) mean(y,na.rm=T))
dataMatch <- x[,c("idnr","aveTotal.assets")]
x<-reshape(dataMatchL[,c("idnr","closdate_year","ebta")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$ebitda<- apply(x[,2:18],1,function(y) mean(y,na.rm=T))
dataMatch$ebitda <- x$ebitda
x<-reshape(dataMatchL[,c("idnr","closdate_year","etma")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$ebitdaMargin<- apply(x[,2:18],1,function(y) mean(y,na.rm=T))
dataMatch$ebitdaMargin <- x$ebitdaMargin
x<-reshape(dataMatchL[,c("idnr","closdate_year","debt_TotalAssets")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$debt_TotalAssets<- apply(x[,2:18],1,function(y) mean(y,na.rm=T))
dataMatch$debt_TotalAssets <- x$debt_TotalAssets

dataMatch[(!is.finite(dataMatch$debt_TotalAssets)), "debt_TotalAssets"]<- "NA"
dataMatch$debt_TotalAssets <-as.numeric(dataMatch$debt_TotalAssets)#making it back to numeric

dataMatch$pe <- dataMatch$idnr %in% dataZ$Target.BvD.ID.number
numUnique(subset(dataMatch, pe== TRUE)$idnr)#202
dataMatch <- na.omit(dataMatch)

model_match <- matchit(pe ~ aveTotal.assets + ebitda + debt_TotalAssets + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 1.5)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 2)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda, data = dataMatch,  method = "nearest", ratio = 2)

M <-match.data(model_match)
median(subset(M,pe==1)$aveTotal.assets)
median(subset(M,pe==0)$aveTotal.assets)
mean(subset(M,pe==1)$debt_TotalAssets)
mean(subset(M,pe==0)$debt_TotalAssets)
mean(subset(M,pe==1)$ebitda)
mean(subset(M,pe==0)$ebitda)
mean(subset(M,pe==1)$ebitdaMargin)
mean(subset(M,pe==0)$ebitdaMargin)
length(subset(M, pe==0)$idnr)

write(subset(M, pe==0)$idnr, file= "controlsZdigit5.txt")

#same but with dataZ2 -> SBOs

dataMatch$pe <- dataMatch$idnr %in% dataZ2$Target.BvD.ID.number
numUnique(subset(dataMatch, pe== TRUE)$idnr)#57

model_match <- matchit(pe ~ aveTotal.assets + ebitda + debt_TotalAssets + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 1.5)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 2)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda, data = dataMatch,  method = "nearest", ratio = 2)

M <-match.data(model_match)
median(subset(M,pe==1)$aveTotal.assets)
median(subset(M,pe==0)$aveTotal.assets)
mean(subset(M,pe==1)$debt_TotalAssets)
mean(subset(M,pe==0)$debt_TotalAssets)
mean(subset(M,pe==1)$ebitda)
mean(subset(M,pe==0)$ebitda)
mean(subset(M,pe==1)$ebitdaMargin)
mean(subset(M,pe==0)$ebitdaMargin)

write(subset(M, pe==0)$idnr, file = "controlsZ2digit5.txt")
write.csv(dataMatch, file="dataMatchDigit5.csv")
############Digit6
amadeusDigit6 <- read.dta("amadeusDigit6.dta")
amadeusDigit6$debt_TotalAssets<- (amadeusDigit6$ncli +amadeusDigit6$loan)/amadeusDigit6$toas

dataMatchL <- amadeusDigit6; rm(amadeusDigit6)

## chanhing from long to wide format
x<-reshape(dataMatchL[,1:3] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$aveTotal.assets<- apply(x[,2:18],1,function(y) mean(y,na.rm=T))
dataMatch <- x[,c("idnr","aveTotal.assets")]
x<-reshape(dataMatchL[,c("idnr","closdate_year","ebta")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$ebitda<- apply(x[,2:18],1,function(y) mean(y,na.rm=T))
dataMatch$ebitda <- x$ebitda
x<-reshape(dataMatchL[,c("idnr","closdate_year","etma")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$ebitdaMargin<- apply(x[,2:18],1,function(y) mean(y,na.rm=T))
dataMatch$ebitdaMargin <- x$ebitdaMargin
x<-reshape(dataMatchL[,c("idnr","closdate_year","debt_TotalAssets")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$debt_TotalAssets<- apply(x[,2:18],1,function(y) mean(y,na.rm=T))
dataMatch$debt_TotalAssets <- x$debt_TotalAssets

dataMatch[(!is.finite(dataMatch$debt_TotalAssets)), "debt_TotalAssets"]<- "NA"
dataMatch$debt_TotalAssets <-as.numeric(dataMatch$debt_TotalAssets)#making it back to numeric

dataMatch$pe <- dataMatch$idnr %in% dataZ$Target.BvD.ID.number
numUnique(subset(dataMatch, pe== TRUE)$idnr)#546
dataMatch <- na.omit(dataMatch)

model_match <- matchit(pe ~ aveTotal.assets + ebitda + debt_TotalAssets + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 1.5)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 2)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda, data = dataMatch,  method = "nearest", ratio = 2)

M <-match.data(model_match)
median(subset(M,pe==1)$aveTotal.assets)
median(subset(M,pe==0)$aveTotal.assets)
mean(subset(M,pe==1)$debt_TotalAssets)
mean(subset(M,pe==0)$debt_TotalAssets)
mean(subset(M,pe==1)$ebitda)
mean(subset(M,pe==0)$ebitda)
mean(subset(M,pe==1)$ebitdaMargin)
mean(subset(M,pe==0)$ebitdaMargin)
length(subset(M, pe==0)$idnr)

write(subset(M, pe==0)$idnr, file= "controlsZdigit6.txt")

#same but with dataZ2 -> SBOs

dataMatch$pe <- dataMatch$idnr %in% dataZ2$Target.BvD.ID.number
numUnique(subset(dataMatch, pe== TRUE)$idnr)#209

model_match <- matchit(pe ~ aveTotal.assets + ebitda + debt_TotalAssets + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 1.5)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 2)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda, data = dataMatch,  method = "nearest", ratio = 2)

M <-match.data(model_match)
median(subset(M,pe==1)$aveTotal.assets)
median(subset(M,pe==0)$aveTotal.assets)
mean(subset(M,pe==1)$debt_TotalAssets)
mean(subset(M,pe==0)$debt_TotalAssets)
mean(subset(M,pe==1)$ebitda)
mean(subset(M,pe==0)$ebitda)
mean(subset(M,pe==1)$ebitdaMargin)
mean(subset(M,pe==0)$ebitdaMargin)


write(subset(M, pe==0)$idnr, file = "controlsZ2digit6.txt")
write.csv(dataMatch, file="dataMatchDigit6.csv")

############Digit7
amadeusDigit7 <- read.dta("amadeusDigit7.dta")
amadeusDigit7$debt_TotalAssets<- (amadeusDigit7$ncli +amadeusDigit7$loan)/amadeusDigit7$toas

dataMatchL <- amadeusDigit7; rm(amadeusDigit7)

## chanhing from long to wide format
x<-reshape(dataMatchL[,1:3] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$aveTotal.assets<- apply(x[,2:18],1,function(y) mean(y,na.rm=T))
dataMatch <- x[,c("idnr","aveTotal.assets")]
x<-reshape(dataMatchL[,c("idnr","closdate_year","ebta")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$ebitda<- apply(x[,2:18],1,function(y) mean(y,na.rm=T))
dataMatch$ebitda <- x$ebitda
x<-reshape(dataMatchL[,c("idnr","closdate_year","etma")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$ebitdaMargin<- apply(x[,2:18],1,function(y) mean(y,na.rm=T))
dataMatch$ebitdaMargin <- x$ebitdaMargin
x<-reshape(dataMatchL[,c("idnr","closdate_year","debt_TotalAssets")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$debt_TotalAssets<- apply(x[,2:18],1,function(y) mean(y,na.rm=T))
dataMatch$debt_TotalAssets <- x$debt_TotalAssets

dataMatch[(!is.finite(dataMatch$debt_TotalAssets)), "debt_TotalAssets"]<- "NA"
dataMatch$debt_TotalAssets <-as.numeric(dataMatch$debt_TotalAssets)#making it back to numeric

dataMatch$pe <- dataMatch$idnr %in% dataZ$Target.BvD.ID.number
numUnique(subset(dataMatch, pe== TRUE)$idnr)#432
dataMatch <- na.omit(dataMatch)

model_match <- matchit(pe ~ aveTotal.assets + ebitda + debt_TotalAssets + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 1.5)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 2)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda, data = dataMatch,  method = "nearest", ratio = 2)

M <-match.data(model_match)
median(subset(M,pe==1)$aveTotal.assets)
median(subset(M,pe==0)$aveTotal.assets)
mean(subset(M,pe==1)$debt_TotalAssets)
mean(subset(M,pe==0)$debt_TotalAssets)
mean(subset(M,pe==1)$ebitda)
mean(subset(M,pe==0)$ebitda)
mean(subset(M,pe==1)$ebitdaMargin)
mean(subset(M,pe==0)$ebitdaMargin)
length(subset(M, pe==0)$idnr)

write(subset(M, pe==0)$idnr, file= "controlsZdigit7.txt")

#same but with dataZ2 -> SBOs

dataMatch$pe <- dataMatch$idnr %in% dataZ2$Target.BvD.ID.number
numUnique(subset(dataMatch, pe== TRUE)$idnr)#179

model_match <- matchit(pe ~ aveTotal.assets + ebitda + debt_TotalAssets + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 1.5)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 2)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda, data = dataMatch,  method = "nearest", ratio = 2)

M <-match.data(model_match)
median(subset(M,pe==1)$aveTotal.assets)
median(subset(M,pe==0)$aveTotal.assets)
mean(subset(M,pe==1)$debt_TotalAssets)
mean(subset(M,pe==0)$debt_TotalAssets)
mean(subset(M,pe==1)$ebitda)
mean(subset(M,pe==0)$ebitda)
mean(subset(M,pe==1)$ebitdaMargin)
mean(subset(M,pe==0)$ebitdaMargin)


write(subset(M, pe==0)$idnr, file = "controlsZ2digit7.txt")
write.csv(dataMatch, file="dataMatchDigit7.csv")
############Digit8
amadeusDigit8 <- read.dta("amadeusDigit8.dta")
amadeusDigit8$debt_TotalAssets<- (amadeusDigit8$ncli +amadeusDigit8$loan)/amadeusDigit8$toas

dataMatchL <- amadeusDigit8; rm(amadeusDigit8)

## chanhing from long to wide format
x<-reshape(dataMatchL[,1:3] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$aveTotal.assets<- apply(x[,2:ncol(x)],1,function(y) mean(y,na.rm=T))
dataMatch <- x[,c("idnr","aveTotal.assets")]
x<-reshape(dataMatchL[,c("idnr","closdate_year","ebta")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$ebitda<- apply(x[,2:ncol(x)],1,function(y) mean(y,na.rm=T))
dataMatch$ebitda <- x$ebitda
x<-reshape(dataMatchL[,c("idnr","closdate_year","etma")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$ebitdaMargin<- apply(x[,2:ncol(x)],1,function(y) mean(y,na.rm=T))
dataMatch$ebitdaMargin <- x$ebitdaMargin
x<-reshape(dataMatchL[,c("idnr","closdate_year","debt_TotalAssets")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$debt_TotalAssets<- apply(x[,2:ncol(x)],1,function(y) mean(y,na.rm=T))
dataMatch$debt_TotalAssets <- x$debt_TotalAssets

dataMatch[(!is.finite(dataMatch$debt_TotalAssets)), "debt_TotalAssets"]<- "NA"
dataMatch$debt_TotalAssets <-as.numeric(dataMatch$debt_TotalAssets)#making it back to numeric

dataMatch$pe <- dataMatch$idnr %in% dataZ$Target.BvD.ID.number
numUnique(subset(dataMatch, pe== TRUE)$idnr)#243
dataMatch <- na.omit(dataMatch)

model_match <- matchit(pe ~ aveTotal.assets + ebitda + debt_TotalAssets + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 1.5)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 2)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda, data = dataMatch,  method = "nearest", ratio = 2)

M <-match.data(model_match)
median(subset(M,pe==1)$aveTotal.assets)
median(subset(M,pe==0)$aveTotal.assets)
mean(subset(M,pe==1)$debt_TotalAssets)
mean(subset(M,pe==0)$debt_TotalAssets)
mean(subset(M,pe==1)$ebitda)
mean(subset(M,pe==0)$ebitda)
mean(subset(M,pe==1)$ebitdaMargin)
mean(subset(M,pe==0)$ebitdaMargin)
length(subset(M, pe==0)$idnr)

write(subset(M, pe==0)$idnr, file= "controlsZdigit8.txt")

#same but with dataZ2 -> SBOs

dataMatch$pe <- dataMatch$idnr %in% dataZ2$Target.BvD.ID.number
numUnique(subset(dataMatch, pe== TRUE)$idnr)#80

model_match <- matchit(pe ~ aveTotal.assets + ebitda + debt_TotalAssets + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 1.5)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 2)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda, data = dataMatch,  method = "nearest", ratio = 2)

M <-match.data(model_match)
median(subset(M,pe==1)$aveTotal.assets)
median(subset(M,pe==0)$aveTotal.assets)
mean(subset(M,pe==1)$debt_TotalAssets)
mean(subset(M,pe==0)$debt_TotalAssets)
mean(subset(M,pe==1)$ebitda)
mean(subset(M,pe==0)$ebitda)
mean(subset(M,pe==1)$ebitdaMargin)
mean(subset(M,pe==0)$ebitdaMargin)

write(subset(M, pe==0)$idnr, file = "controlsZ2digit8.txt")
write.csv(dataMatch, file="dataMatchDigit8.csv")
############Digit9
amadeusDigit9 <- read.dta("amadeusDigit9.dta")
amadeusDigit9$debt_TotalAssets<- (amadeusDigit9$ncli +amadeusDigit9$loan)/amadeusDigit9$toas

dataMatchL <- amadeusDigit9; rm(amadeusDigit9)

## chanhing from long to wide format
x<-reshape(dataMatchL[,1:3] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$aveTotal.assets<- apply(x[,2:ncol(x)],1,function(y) mean(y,na.rm=T))
dataMatch <- x[,c("idnr","aveTotal.assets")]
x<-reshape(dataMatchL[,c("idnr","closdate_year","ebta")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$ebitda<- apply(x[,2:ncol(x)],1,function(y) mean(y,na.rm=T))
dataMatch$ebitda <- x$ebitda
x<-reshape(dataMatchL[,c("idnr","closdate_year","etma")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$ebitdaMargin<- apply(x[,2:ncol(x)],1,function(y) mean(y,na.rm=T))
dataMatch$ebitdaMargin <- x$ebitdaMargin
x<-reshape(dataMatchL[,c("idnr","closdate_year","debt_TotalAssets")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$debt_TotalAssets<- apply(x[,2:ncol(x)],1,function(y) mean(y,na.rm=T))
dataMatch$debt_TotalAssets <- x$debt_TotalAssets

dataMatch[(!is.finite(dataMatch$debt_TotalAssets)), "debt_TotalAssets"]<- "NA"
dataMatch$debt_TotalAssets <-as.numeric(dataMatch$debt_TotalAssets)#making it back to numeric

dataMatch$pe <- dataMatch$idnr %in% dataZ$Target.BvD.ID.number
numUnique(subset(dataMatch, pe== TRUE)$idnr)#63
dataMatch <- na.omit(dataMatch)

model_match <- matchit(pe ~ aveTotal.assets + ebitda + debt_TotalAssets + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 1.5)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 2)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda, data = dataMatch,  method = "nearest", ratio = 2)

M <-match.data(model_match)
median(subset(M,pe==1)$aveTotal.assets)
median(subset(M,pe==0)$aveTotal.assets)
mean(subset(M,pe==1)$debt_TotalAssets)
mean(subset(M,pe==0)$debt_TotalAssets)
mean(subset(M,pe==1)$ebitda)
mean(subset(M,pe==0)$ebitda)
mean(subset(M,pe==1)$ebitdaMargin)
mean(subset(M,pe==0)$ebitdaMargin)
length(subset(M, pe==0)$idnr)

write(subset(M, pe==0)$idnr, file= "controlsZdigit9.txt")

#same but with dataZ2 -> SBOs

dataMatch$pe <- dataMatch$idnr %in% dataZ2$Target.BvD.ID.number
numUnique(subset(dataMatch, pe== TRUE)$idnr)#24

model_match <- matchit(pe ~ aveTotal.assets + ebitda + debt_TotalAssets + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 1.5)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 2)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda, data = dataMatch,  method = "nearest", ratio = 2)

M <-match.data(model_match)
median(subset(M,pe==1)$aveTotal.assets)
median(subset(M,pe==0)$aveTotal.assets)
mean(subset(M,pe==1)$debt_TotalAssets)
mean(subset(M,pe==0)$debt_TotalAssets)
mean(subset(M,pe==1)$ebitda)
mean(subset(M,pe==0)$ebitda)
mean(subset(M,pe==1)$ebitdaMargin)
mean(subset(M,pe==0)$ebitdaMargin)

write(subset(M, pe==0)$idnr, file = "controlsZ2digit9.txt")
write.csv(dataMatch, file="dataMatchDigit9.csv")

############Digit0
amadeusDigit0 <- read.dta("amadeusDigit0.dta")
amadeusDigit0$debt_TotalAssets<- (amadeusDigit0$ncli +amadeusDigit0$loan)/amadeusDigit0$toas

dataMatchL <- amadeusDigit0; rm(amadeusDigit0)

## chanhing from long to wide format
x<-reshape(dataMatchL[,1:3] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$aveTotal.assets<- apply(x[,2:ncol(x)],1,function(y) mean(y,na.rm=T))
dataMatch <- x[,c("idnr","aveTotal.assets")]
x<-reshape(dataMatchL[,c("idnr","closdate_year","ebta")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$ebitda<- apply(x[,2:ncol(x)],1,function(y) mean(y,na.rm=T))
dataMatch$ebitda <- x$ebitda
x<-reshape(dataMatchL[,c("idnr","closdate_year","etma")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$ebitdaMargin<- apply(x[,2:ncol(x)],1,function(y) mean(y,na.rm=T))
dataMatch$ebitdaMargin <- x$ebitdaMargin
x<-reshape(dataMatchL[,c("idnr","closdate_year","debt_TotalAssets")] , idvar = "idnr",timevar = "closdate_year", direction="wide")#actually it is working
x$debt_TotalAssets<- apply(x[,2:ncol(x)],1,function(y) mean(y,na.rm=T))
dataMatch$debt_TotalAssets <- x$debt_TotalAssets

dataMatch[(!is.finite(dataMatch$debt_TotalAssets)), "debt_TotalAssets"]<- "NA"
dataMatch$debt_TotalAssets <-as.numeric(dataMatch$debt_TotalAssets)#making it back to numeric

dataMatch$pe <- dataMatch$idnr %in% dataZ$Target.BvD.ID.number
numUnique(subset(dataMatch, pe== TRUE)$idnr)#25
dataMatch <- na.omit(dataMatch)

model_match <- matchit(pe ~ aveTotal.assets + ebitda + debt_TotalAssets + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 1.5)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 2)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda, data = dataMatch,  method = "nearest", ratio = 2)

M <-match.data(model_match)
median(subset(M,pe==1)$aveTotal.assets)
median(subset(M,pe==0)$aveTotal.assets)
mean(subset(M,pe==1)$debt_TotalAssets)
mean(subset(M,pe==0)$debt_TotalAssets)
mean(subset(M,pe==1)$ebitda)
mean(subset(M,pe==0)$ebitda)
mean(subset(M,pe==1)$ebitdaMargin)
mean(subset(M,pe==0)$ebitdaMargin)
length(subset(M, pe==0)$idnr)

write(subset(M, pe==0)$idnr, file= "controlsZdigit0.txt")

#same but with dataZ2 -> SBOs

dataMatch$pe <- dataMatch$idnr %in% dataZ2$Target.BvD.ID.number
numUnique(subset(dataMatch, pe== TRUE)$idnr)#5

model_match <- matchit(pe ~ aveTotal.assets + ebitda + debt_TotalAssets + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 1.5)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda + ebitdaMargin, data = dataMatch,  method = "nearest", ratio = 2)
model_match <- matchit(pe ~ aveTotal.assets +I(debt_TotalAssets*aveTotal.assets)+ ebitda, data = dataMatch,  method = "nearest", ratio = 2)

M <-match.data(model_match)
median(subset(M,pe==1)$aveTotal.assets)
median(subset(M,pe==0)$aveTotal.assets)
mean(subset(M,pe==1)$debt_TotalAssets)
mean(subset(M,pe==0)$debt_TotalAssets)
mean(subset(M,pe==1)$ebitda)
mean(subset(M,pe==0)$ebitda)
mean(subset(M,pe==1)$ebitdaMargin)
mean(subset(M,pe==0)$ebitdaMargin)

write(subset(M, pe==0)$idnr, file = "controlsZ2digit0.txt")
write.csv(dataMatch, file="dataMatchDigit0.csv")

#####==== End constructing control groups

######## To select countries from the data  #### t check later
sum(dataMatchL$countryCode %in% unique(dataZ$Target.country.code))
######

## combining the text files
x1<- read.delim(file="controlsZdigit1.txt", header = F)
x2<- read.delim(file="controlsZdigit2.txt", header = F)
x3<- read.delim(file="controlsZdigit3.txt", header = F)
x4<- read.delim(file="controlsZdigit4.txt", header = F)
x5<- read.delim(file="controlsZdigit5.txt", header = F)
x6<- read.delim(file="controlsZdigit6.txt", header = F)
x7<- read.delim(file="controlsZdigit7.txt", header = F)
x8<- read.delim(file="controlsZdigit8.txt", header = F)
x9<- read.delim(file="controlsZdigit9.txt", header = F)
x0<- read.delim(file="controlsZdigit0.txt", header = F)

controlsZ<-data.frame(rbind(x1,x2, x3,x4,x5,x6,x7,x8,x9,x0))
write(as.character(controlsZ[,1]), file="controlsZ04_06.txt")

x1<- read.delim(file="controlsZ2digit1.txt", header = F)
x2<- read.delim(file="controlsZ2digit2.txt", header = F)
x3<- read.delim(file="controlsZ2digit3.txt", header = F)
x4<- read.delim(file="controlsZ2digit4.txt", header = F)
x5<- read.delim(file="controlsZ2digit5.txt", header = F)
x6<- read.delim(file="controlsZ2digit6.txt", header = F)
x7<- read.delim(file="controlsZ2digit7.txt", header = F)
x8<- read.delim(file="controlsZ2digit8.txt", header = F)
x9<- read.delim(file="controlsZ2digit9.txt", header = F)
x0<- read.delim(file="controlsZ2digit0.txt", header = F)

controlsZ2<-rbind(x1,x2, x3,x4,x5,x6,x7,x8,x9,x0)
write(as.character(controlsZ2[,1]), file="controlsZ2_04_06.txt")