Toky registrovaných UoZ

Ta sme si povedali, že sa pozrieme na populáciu UoZ, pritekajúcich do evidencie v roku 2017, v strednej vekovej skupine (20-55 rokov),bez zdravotného postiihnutia, ktorých evidencia nepresiahla 3 roky (1095 dní).

library(dplyr)
library(tidyr)
library(knitr)
library(networkD3)
library(bigrquery)
library(zoo)
library(kableExtra)
#install.packages("jtools")
library(jtools)
#install.packages("huxtable")
library(huxtable)
library(ggeffects)
library(ggplot2)

load("D:\\Dropbox\\UPSVaR\\Export_2021\\export2021.RData")
rm(list=ls()[!ls() %in% c('df','almps','dfh')])
dovod<- read.csv("D:/Dropbox/UPSVaR/Export_2021/CISELNIKY/cDovod_Vyradenia.txt", header=TRUE, sep="|", encoding = "UTF-8")


# podmienky df -> veková skupina, čas pozorovania, zdravie... 
df <- subset(df, age>20 & age <= 55)
df <- subset(df, as.Date(df$entry)>= "2017-01-01" & as.Date(df$entry)>= "2017-12-31") #Pritok iba v roku 2017
df <- subset(df, exit-entry < 1095) # Max. dlzka evidencie v dnoch
df <- subset(df, healthy == 1)

df <- df %>% mutate_all(na_if,"") #ak su empty cells tak sa zmenia na NA


df$dovod_vyradenia_kod[is.na(df$dovod_vyradenia_kod)]<-"NA"
td<-table(df$dovod_vyradenia_kod)
td<-td[order(-td)]
rownames(td)<-dovod[match(rownames(td),dovod$DOV_VYRAD_KOD_U1),"DOV_VYRAD_NAYOV_U1"]
rownames(td)[is.na(rownames(td))]<-"Dôvod neuvedený"

Evidencie týchto UoZ boli ukončené na základe nasledujúcich dôvodov.

kbl(cbind(td, format(round(td/sum(td)*100,digits=2), nsmall=2, scientific=FALSE)), col.names = c("N", "%")) %>% 
kable_paper("hover", full_width = F)
N %
vzniku pracovného pomeru alebo obdobného pracovného vzťahu 320332 46.66
Dôvod neuvedený 148595 21.65
vyradenie dňom zistenia: 54346 7.92
začatia vykonávania zárobkovej činnosti v EÚ a cudzine 38399 5.59
vzniku oprávnenia prevádzkovať alebo vykonávať SZČ 32093 4.68
prestal spĺňať podmienku podľa § 6 ods. 2 písm. a) a b) a ods. 3. 28506 4.15
požiadanie o vyradenie (vlastná žiadosť) 18159 2.65
odchod do členského štátu EÚ na obdobie dlhšie ako 15 kal. dní 12273 1.79
požiadanie o vyradenie z dôvodu: 9478 1.38
narodenie dieťaťa 9451 1.38
nástup na sústavnú prípravu na povolanie 5525 0.80
odchod do cudziny na obdobie dlhšie ako 15 kal. dní 3788 0.55
skončenie pozastavenia SZČ 2484 0.36
úmrtie UoZ 1001 0.15
vznik PPV (dohody mimo PP) alebo podľa osobitného predpisu 733 0.11
vzatie do výkonu väzby 539 0.08
nástup na výkon trestu odňatia slobody 481 0.07
zaradenia do evidencie uchádzačov o zamestnanie 92 0.01
neschopnosť plniť povinnosti UoZ podľa posudku posudkového lekára 86 0.01
vykonával zárobkovú činnosť v pracovnom pomere alebo v inom právnom vzťahu po 31.7.2017 46 0.01
zániku dlhodobého pobytu, ak ide o štátneho príslušníka tretej krajiny 28 0.00
priznanie starobného dôchodku alebo dovŕšenie veku 15 0.00
nadobudnutie právoplatnosti rozsudku o neplatnosti skončenia zamestnania 8 0.00

Z 507 267 uchádzačov o zamestnanie zaregistrovaných počas roka 2017, 236 782 malo trvalý pobyt v obciach s podielom rómskej populácie viac ako 5% (46.7 percent).

# Podiel romov v obci
#df_with_roma <- filter(df, roma_share > 0.05) 
#df_without_roma <- filter(df, roma_share < 0.05)  

Toky UoZ do jendotlivých nástrojov AOTP, vyradenia, či umiestnenia na trh práce. Ide o kohortu pritekajúcu do evidencie počas roka 2017. 6/12/18/24 zobrazujú počty UoZ zotrvávajúcich v databáze aj po uplynutí 6/12/18/24 mesiacov.

Graf 1: Toky UoZ s trvalým bydliskom v obci s podielom rómov do 5% (z evidencie a do opatrení APTP)

##################################
# WITHout ROMA ####
##################################

Sdf <- filter(df, roma_share < 0.05) 
u0<-nrow(Sdf)
u6<-nrow(Sdf[as.logical((Sdf$exit-Sdf$entry)>182),])
u12<-nrow(Sdf[(Sdf$exit-Sdf$entry)>365,])
u18<-nrow(Sdf[(Sdf$exit-Sdf$entry)>548,])
u24<-nrow(Sdf[(Sdf$exit-Sdf$entry)>730,])
  
# Urobím si dva dataframe, tí, ktorý na ALMPS neboli a tí ktorí boli 
IDs<-unique(Sdf$klient_id)

Salmps <- filter(almps, entrya>="2017-01-01")
Salmps <- merge(select(Salmps, klient_id, entrya, exita, nastroj), 
                select(Sdf, klient_id, entry, exit, dovod_vyradenia_kod), 
                by = "klient_id")  
# Podmienka prieniku času pri databáze pri nezamestnaní a v programe 
Salmps <- subset(Salmps, entry <= entrya & entrya <= (exit +7))

IDsALMPs<-unique(Salmps$klient_id)
IDsNoALMPs<-IDs[!IDs %in% IDsALMPs]

u0<-nrow(Salmps)
u6<-nrow(Salmps[as.logical((Salmps$exit-Salmps$entry)>182),])
u12<-nrow(Salmps[(Salmps$exit-Salmps$entry)>365,])
u18<-nrow(Salmps[(Salmps$exit-Salmps$entry)>548,])
u24<-nrow(Salmps[(Salmps$exit-Salmps$entry)>730,])
  
# DOVOD VYRADENIA ####
dovod_vyradenia = c('V01','V02','V03','V1','V12','V15') #zamestnali sa 

# NASTROJE ####
Tnastroj<-table(Salmps$nastroj)
Tnastroj<-Tnastroj[order(-Tnastroj)]
rownames(Tnastroj[1:10])
##  [1] "P053"  "P54D"  "P54O"  "P54Rp" "P054"  "P54P"  "P54Kp" "P051"  "P52A" 
## [10] "P032"
nastroj_kod <- c('another reason','employed', rownames(Tnastroj[1:10])) #nástroje pre mladých 
#another reason a employed treba vždy nechať špecifikované aby sa nám to potom nespojilo s tými ostatnými opatreniami do ktorých evidovaní išli 

# Urobím si dva dataframe, tí, ktorý na ALMPS neboli a tí ktorí boli 
df_no_almps <- Sdf[Sdf$klient_id %in% IDsNoALMPs, c("klient_id", "entry", "exit", "dovod_vyradenia_kod")]
df_no_almps$entrya<-NA
df_no_almps$exita<-NA
df_no_almps$nastroj<-NA

df_almps_ <- Salmps[Salmps$klient_id %in% IDsALMPs, ]

# Spojíme dataframe s účastníkmi ALMPS a s tými ktorý sa nezúčastnili na ALMPS
df_almps <- rbind(df_almps_, df_no_almps)
remove(df_almps_, df_no_almps)
#Calculating the coefficient of inflation due to multiple ALMP participations 
InfCoef<-nrow(Sdf)/nrow(df_almps)

# upravíme si dáta ktoré budeme používať pri grafe
df_s <- select(df_almps, klient_id, entrya, exita, entry, exit, nastroj, dovod_vyradenia_kod)
df_s <- relocate(df_s, c(entry, exit), .after = klient_id)
df_s$days <- as.numeric(difftime(df_s$entrya, df_s$entry, units = 'days'))
df_s$days <- ifelse(is.na(df_s$nastroj),as.numeric(difftime(df_s$exit, df_s$entry, units = 'days')), df_s$days)

df_s_almps <- subset(df_s, !is.na(df_s$nastroj))
df_s_noalmps <- subset(df_s, is.na(df_s$nastroj))

df_s_noalmps <- df_s_noalmps %>% 
  mutate(nastroj = case_when(df_s_noalmps$dovod_vyradenia_kod %in% dovod_vyradenia ~ 'employed',
                             !df_s_noalmps$dovod_vyradenia_kod %in% dovod_vyradenia ~ 'another reason')
  )

df_s <- rbind(df_s_almps, df_s_noalmps)
remove(df_almps, df_s_almps, df_s_noalmps)

df_s$years <- round(df_s$days/365) 

Sankey <- select(df_s, nastroj, days, years) #dataframe, ktorý budem používať pri tvorbe grafu

Sankey <- Sankey %>%mutate(
  time = case_when(
    Sankey$days %in% seq(0,182,1)  ~ 6,
    Sankey$days %in% seq(183,365,1)  ~ 12, 
    Sankey$days %in% seq(366,548,1)  ~ 18,
    Sankey$days %in% seq(549,730,1)  ~ 24,
    Sankey$days %in% seq(731,max(Sankey$days),1)  ~ 30,
  ) 
)



Sankey <- select(Sankey, nastroj, time)

Sankey$sources <- ifelse(Sankey$time == 6 | Sankey$time == 12 |
                           Sankey$time == 18 | Sankey$time == 24 | Sankey$time == 30, 
                         Sankey$time - 6, Sankey$time)


#zosumarizuj, koľkí išli v ktorom roku do ktorého opatrenia
San <- Sankey %>%  group_by(sources, nastroj , time) %>% summarise(num = n(), .groups = 'drop') 

# rozdeľ opatrenia, na tie ostatné almps - OTHER ALMPS 
San_aplmps <- subset(San, nastroj %in% nastroj_kod)
San_other_aplmps <- subset(San, !nastroj %in% nastroj_kod)

San_other_aplmps <- San_other_aplmps %>%  group_by(sources , time) %>% summarise(num = sum(num), .groups = 'drop') 
San_other_aplmps$nastroj <- 'OTHER ALMPS'
San_other_aplmps <- relocate(San_other_aplmps, nastroj, .after = sources)

San <- rbind(San_aplmps, San_other_aplmps)
remove(San_aplmps, San_other_aplmps)

# uzly grafu (jedinečné), musia tu byť všetky opatrenia aj časové pásma 
node <- data.frame(
  name=c(as.character(San$nastroj),as.character(San$sources))%>% unique()
)

# definovanie koľko registrovaných bude medzi tými rokmi  
U<-seq(6,24,6)
UN<-c(u6, u12, u18, u24)
velky_df <- data.frame(cbind(sources=U, num=UN))


# definovanie koľko registrovaných bude medzi tými rokmi  
#velky_df <- data.frame()
#for (i in seq(6,30,6)){
#  pocet <- San %>%  group_by( 'sources' = sources >= i) %>% summarise(num = sum(num)*InfCoef, .groups = 'drop') 
#  pocet <- subset(pocet, sources == TRUE)
#  pocet$sources <- i
#  velky_df <- rbind(velky_df, pocet)
#}



# musím si velky_df prisposobiť tak, aby roky boli ako nodes aby som to mohla spojiť s dataframe San s ktorým potom budem ďalej robiť graf
# preto sources budu ako nastroj -> aby som spravila nodes, years su sources ale sources su years -1 v skutočnosti (v san grafe)
colnames(velky_df) <- c('nastroj', 'num')
velky_df$time <- velky_df$nastroj
velky_df$sources <- San$sources[match(velky_df$time, San$time)] 
velky_df <- relocate(velky_df, sources, .before = nastroj)
velky_df <- relocate(velky_df, time, .before = num)

San <- rbind(San, velky_df)

#urobím IDsources a ID target podľa uzlov aby garf vedel ten flow medzi jednotlivími uzlami 
San$IDsource <- match(San$sources, node$name)-1 
San$IDtarget <- match(San$nastroj, node$name)-1

San <- as.data.frame(San)

Sankey_df_without_roma <- sankeyNetwork(Links = San, Nodes = node,
                                     Source = "IDsource", Target = "IDtarget",
                                     Value = "num", NodeID = "name", 
                                     sinksRight=F, fontSize = 14,
                                     fontFamily = "sans-serif",nodePadding=10)
Sankey_df_without_roma

Graf 2: Toky UoZ s trvalým bydliskom v obci s podielom rómov nad 5% (z evidencie a do opatrení APTP)

##################################
# WITH ROMA ####
##################################

Sdf <- filter(df, roma_share > 0.05) 
u0<-nrow(Sdf)
u6<-nrow(Sdf[as.logical((Sdf$exit-Sdf$entry)>182),])
u12<-nrow(Sdf[(Sdf$exit-Sdf$entry)>365,])
u18<-nrow(Sdf[(Sdf$exit-Sdf$entry)>548,])
u24<-nrow(Sdf[(Sdf$exit-Sdf$entry)>730,])
  
# Urobím si dva dataframe, tí, ktorý na ALMPS neboli a tí ktorí boli 
IDs<-unique(Sdf$klient_id)

Salmps <- filter(almps, entrya>="2017-01-01")
Salmps <- merge(select(Salmps, klient_id, entrya, exita, nastroj), 
                select(Sdf, klient_id, entry, exit, dovod_vyradenia_kod), 
                by = "klient_id")  
# Podmienka prieniku času pri databáze pri nezamestnaní a v programe 
Salmps <- subset(Salmps, entry <= entrya & entrya <= (exit +7))

IDsALMPs<-unique(Salmps$klient_id)
IDsNoALMPs<-IDs[!IDs %in% IDsALMPs]

u0<-nrow(Salmps)
u6<-nrow(Salmps[as.logical((Salmps$exit-Salmps$entry)>182),])
u12<-nrow(Salmps[(Salmps$exit-Salmps$entry)>365,])
u18<-nrow(Salmps[(Salmps$exit-Salmps$entry)>548,])
u24<-nrow(Salmps[(Salmps$exit-Salmps$entry)>730,])
  
# DOVOD VYRADENIA ####
dovod_vyradenia = c('V01','V02','V03','V1','V12','V15') #zamestnali sa 

# NASTROJE ####
Tnastroj<-table(Salmps$nastroj)
Tnastroj<-Tnastroj[order(-Tnastroj)]
rownames(Tnastroj[1:10])
##  [1] "P54D"  "P054"  "P54O"  "P053"  "P54P"  "P54Rp" "P052"  "P52A"  "P54Kp"
## [10] "P032"
nastroj_kod <- c('another reason','employed', rownames(Tnastroj[1:10])) #nástroje pre mladých 
#another reason a employed treba vždy nechať špecifikované aby sa nám to potom nespojilo s tými ostatnými opatreniami do ktorých evidovaní išli 

# Urobím si dva dataframe, tí, ktorý na ALMPS neboli a tí ktorí boli 
df_no_almps <- Sdf[Sdf$klient_id %in% IDsNoALMPs, c("klient_id", "entry", "exit", "dovod_vyradenia_kod")]
df_no_almps$entrya<-NA
df_no_almps$exita<-NA
df_no_almps$nastroj<-NA

df_almps_ <- Salmps[Salmps$klient_id %in% IDsALMPs, ]

# Spojíme dataframe s účastníkmi ALMPS a s tými ktorý sa nezúčastnili na ALMPS
df_almps <- rbind(df_almps_, df_no_almps)
remove(df_almps_, df_no_almps)
#Calculating the coefficient of inflation due to multiple ALMP participations 
InfCoef<-nrow(Sdf)/nrow(df_almps)

# upravíme si dáta ktoré budeme používať pri grafe
df_s <- select(df_almps, klient_id, entrya, exita, entry, exit, nastroj, dovod_vyradenia_kod)
df_s <- relocate(df_s, c(entry, exit), .after = klient_id)
df_s$days <- as.numeric(difftime(df_s$entrya, df_s$entry, units = 'days'))
df_s$days <- ifelse(is.na(df_s$nastroj),as.numeric(difftime(df_s$exit, df_s$entry, units = 'days')), df_s$days)

df_s_almps <- subset(df_s, !is.na(df_s$nastroj))
df_s_noalmps <- subset(df_s, is.na(df_s$nastroj))

df_s_noalmps <- df_s_noalmps %>% 
  mutate(nastroj = case_when(df_s_noalmps$dovod_vyradenia_kod %in% dovod_vyradenia ~ 'employed',
                             !df_s_noalmps$dovod_vyradenia_kod %in% dovod_vyradenia ~ 'another reason')
  )

df_s <- rbind(df_s_almps, df_s_noalmps)
remove(df_almps, df_s_almps, df_s_noalmps)

df_s$years <- round(df_s$days/365) 

Sankey <- select(df_s, nastroj, days, years) #dataframe, ktorý budem používať pri tvorbe grafu

Sankey <- Sankey %>%mutate(
  time = case_when(
    Sankey$days %in% seq(0,182,1)  ~ 6,
    Sankey$days %in% seq(183,365,1)  ~ 12, 
    Sankey$days %in% seq(366,548,1)  ~ 18,
    Sankey$days %in% seq(549,730,1)  ~ 24,
    Sankey$days %in% seq(731,max(Sankey$days),1)  ~ 30,
  ) 
)



Sankey <- select(Sankey, nastroj, time)

Sankey$sources <- ifelse(Sankey$time == 6 | Sankey$time == 12 |
                           Sankey$time == 18 | Sankey$time == 24 | Sankey$time == 30, 
                         Sankey$time - 6, Sankey$time)


#zosumarizuj, koľkí išli v ktorom roku do ktorého opatrenia
San <- Sankey %>%  group_by(sources, nastroj , time) %>% summarise(num = n(), .groups = 'drop') 

# rozdeľ opatrenia, na tie ostatné almps - OTHER ALMPS 
San_aplmps <- subset(San, nastroj %in% nastroj_kod)
San_other_aplmps <- subset(San, !nastroj %in% nastroj_kod)

San_other_aplmps <- San_other_aplmps %>%  group_by(sources , time) %>% summarise(num = sum(num), .groups = 'drop') 
San_other_aplmps$nastroj <- 'OTHER ALMPS'
San_other_aplmps <- relocate(San_other_aplmps, nastroj, .after = sources)

San <- rbind(San_aplmps, San_other_aplmps)
remove(San_aplmps, San_other_aplmps)

# uzly grafu (jedinečné), musia tu byť všetky opatrenia aj časové pásma 
node <- data.frame(
  name=c(as.character(San$nastroj),as.character(San$sources))%>% unique()
)

# definovanie koľko registrovaných bude medzi tými rokmi  
U<-seq(6,24,6)
UN<-c(u6, u12, u18, u24)
velky_df <- data.frame(cbind(sources=U, num=UN))


# definovanie koľko registrovaných bude medzi tými rokmi  
#velky_df <- data.frame()
#for (i in seq(6,30,6)){
#  pocet <- San %>%  group_by( 'sources' = sources >= i) %>% summarise(num = sum(num)*InfCoef, .groups = 'drop') 
#  pocet <- subset(pocet, sources == TRUE)
#  pocet$sources <- i
#  velky_df <- rbind(velky_df, pocet)
#}



# musím si velky_df prisposobiť tak, aby roky boli ako nodes aby som to mohla spojiť s dataframe San s ktorým potom budem ďalej robiť graf
# preto sources budu ako nastroj -> aby som spravila nodes, years su sources ale sources su years -1 v skutočnosti (v san grafe)
colnames(velky_df) <- c('nastroj', 'num')
velky_df$time <- velky_df$nastroj
velky_df$sources <- San$sources[match(velky_df$time, San$time)] 
velky_df <- relocate(velky_df, sources, .before = nastroj)
velky_df <- relocate(velky_df, time, .before = num)

San <- rbind(San, velky_df)

#urobím IDsources a ID target podľa uzlov aby garf vedel ten flow medzi jednotlivími uzlami 
San$IDsource <- match(San$sources, node$name)-1 
San$IDtarget <- match(San$nastroj, node$name)-1

San <- as.data.frame(San)

Sankey_df_with_roma <- sankeyNetwork(Links = San, Nodes = node,
                                     Source = "IDsource", Target = "IDtarget",
                                     Value = "num", NodeID = "name", 
                                     sinksRight=F, fontSize = 14,
                                     fontFamily = "sans-serif",nodePadding=10)
Sankey_df_with_roma

Graf 3: Toky UoZ s trvalým bydliskom v obci s podielom rómov nad 50% (z evidencie a do opatrení APTP)

##################################
# WITH ROMA SHARE over 50%####
##################################

Sdf <- filter(df, roma_share > 0.5) 
u0<-nrow(Sdf)
u6<-nrow(Sdf[as.logical((Sdf$exit-Sdf$entry)>182),])
u12<-nrow(Sdf[(Sdf$exit-Sdf$entry)>365,])
u18<-nrow(Sdf[(Sdf$exit-Sdf$entry)>548,])
u24<-nrow(Sdf[(Sdf$exit-Sdf$entry)>730,])
  
# Urobím si dva dataframe, tí, ktorý na ALMPS neboli a tí ktorí boli 
IDs<-unique(Sdf$klient_id)

Salmps <- filter(almps, entrya>="2017-01-01")
Salmps <- merge(select(Salmps, klient_id, entrya, exita, nastroj), 
                select(Sdf, klient_id, entry, exit, dovod_vyradenia_kod), 
                by = "klient_id")  
# Podmienka prieniku času pri databáze pri nezamestnaní a v programe 
Salmps <- subset(Salmps, entry <= entrya & entrya <= (exit +7))

IDsALMPs<-unique(Salmps$klient_id)
IDsNoALMPs<-IDs[!IDs %in% IDsALMPs]

u0<-nrow(Salmps)
u6<-nrow(Salmps[as.logical((Salmps$exit-Salmps$entry)>182),])
u12<-nrow(Salmps[(Salmps$exit-Salmps$entry)>365,])
u18<-nrow(Salmps[(Salmps$exit-Salmps$entry)>548,])
u24<-nrow(Salmps[(Salmps$exit-Salmps$entry)>730,])
  
# DOVOD VYRADENIA ####
dovod_vyradenia = c('V01','V02','V03','V1','V12','V15') #zamestnali sa 

# NASTROJE ####
Tnastroj<-table(Salmps$nastroj)
Tnastroj<-Tnastroj[order(-Tnastroj)]
rownames(Tnastroj[1:10])
##  [1] "P54D"  "P54O"  "P054"  "P54P"  "P052"  "P54Rp" "P053"  "P52A"  "P54Kp"
## [10] "P50J"
nastroj_kod <- c('another reason','employed', rownames(Tnastroj[1:10])) #nástroje pre mladých 
#another reason a employed treba vždy nechať špecifikované aby sa nám to potom nespojilo s tými ostatnými opatreniami do ktorých evidovaní išli 

# Urobím si dva dataframe, tí, ktorý na ALMPS neboli a tí ktorí boli 
df_no_almps <- Sdf[Sdf$klient_id %in% IDsNoALMPs, c("klient_id", "entry", "exit", "dovod_vyradenia_kod")]
df_no_almps$entrya<-NA
df_no_almps$exita<-NA
df_no_almps$nastroj<-NA

df_almps_ <- Salmps[Salmps$klient_id %in% IDsALMPs, ]

# Spojíme dataframe s účastníkmi ALMPS a s tými ktorý sa nezúčastnili na ALMPS
df_almps <- rbind(df_almps_, df_no_almps)
remove(df_almps_, df_no_almps)
#Calculating the coefficient of inflation due to multiple ALMP participations 
InfCoef<-nrow(Sdf)/nrow(df_almps)

# upravíme si dáta ktoré budeme používať pri grafe
df_s <- select(df_almps, klient_id, entrya, exita, entry, exit, nastroj, dovod_vyradenia_kod)
df_s <- relocate(df_s, c(entry, exit), .after = klient_id)
df_s$days <- as.numeric(difftime(df_s$entrya, df_s$entry, units = 'days'))
df_s$days <- ifelse(is.na(df_s$nastroj),as.numeric(difftime(df_s$exit, df_s$entry, units = 'days')), df_s$days)

df_s_almps <- subset(df_s, !is.na(df_s$nastroj))
df_s_noalmps <- subset(df_s, is.na(df_s$nastroj))

df_s_noalmps <- df_s_noalmps %>% 
  mutate(nastroj = case_when(df_s_noalmps$dovod_vyradenia_kod %in% dovod_vyradenia ~ 'employed',
                             !df_s_noalmps$dovod_vyradenia_kod %in% dovod_vyradenia ~ 'another reason')
  )

df_s <- rbind(df_s_almps, df_s_noalmps)
remove(df_almps, df_s_almps, df_s_noalmps)

df_s$years <- round(df_s$days/365) 

Sankey <- select(df_s, nastroj, days, years) #dataframe, ktorý budem používať pri tvorbe grafu

Sankey <- Sankey %>%mutate(
  time = case_when(
    Sankey$days %in% seq(0,182,1)  ~ 6,
    Sankey$days %in% seq(183,365,1)  ~ 12, 
    Sankey$days %in% seq(366,548,1)  ~ 18,
    Sankey$days %in% seq(549,730,1)  ~ 24,
    Sankey$days %in% seq(731,max(Sankey$days),1)  ~ 30,
  ) 
)



Sankey <- select(Sankey, nastroj, time)

Sankey$sources <- ifelse(Sankey$time == 6 | Sankey$time == 12 |
                           Sankey$time == 18 | Sankey$time == 24 | Sankey$time == 30, 
                         Sankey$time - 6, Sankey$time)


#zosumarizuj, koľkí išli v ktorom roku do ktorého opatrenia
San <- Sankey %>%  group_by(sources, nastroj , time) %>% summarise(num = n(), .groups = 'drop') 

# rozdeľ opatrenia, na tie ostatné almps - OTHER ALMPS 
San_aplmps <- subset(San, nastroj %in% nastroj_kod)
San_other_aplmps <- subset(San, !nastroj %in% nastroj_kod)

San_other_aplmps <- San_other_aplmps %>%  group_by(sources , time) %>% summarise(num = sum(num), .groups = 'drop') 
San_other_aplmps$nastroj <- 'OTHER ALMPS'
San_other_aplmps <- relocate(San_other_aplmps, nastroj, .after = sources)

San <- rbind(San_aplmps, San_other_aplmps)
remove(San_aplmps, San_other_aplmps)

# uzly grafu (jedinečné), musia tu byť všetky opatrenia aj časové pásma 
node <- data.frame(
  name=c(as.character(San$nastroj),as.character(San$sources))%>% unique()
)

# definovanie koľko registrovaných bude medzi tými rokmi  
U<-seq(6,24,6)
UN<-c(u6, u12, u18, u24)
velky_df <- data.frame(cbind(sources=U, num=UN))


# definovanie koľko registrovaných bude medzi tými rokmi  
#velky_df <- data.frame()
#for (i in seq(6,30,6)){
#  pocet <- San %>%  group_by( 'sources' = sources >= i) %>% summarise(num = sum(num)*InfCoef, .groups = 'drop') 
#  pocet <- subset(pocet, sources == TRUE)
#  pocet$sources <- i
#  velky_df <- rbind(velky_df, pocet)
#}



# musím si velky_df prisposobiť tak, aby roky boli ako nodes aby som to mohla spojiť s dataframe San s ktorým potom budem ďalej robiť graf
# preto sources budu ako nastroj -> aby som spravila nodes, years su sources ale sources su years -1 v skutočnosti (v san grafe)
colnames(velky_df) <- c('nastroj', 'num')
velky_df$time <- velky_df$nastroj
velky_df$sources <- San$sources[match(velky_df$time, San$time)] 
velky_df <- relocate(velky_df, sources, .before = nastroj)
velky_df <- relocate(velky_df, time, .before = num)

San <- rbind(San, velky_df)

#urobím IDsources a ID target podľa uzlov aby garf vedel ten flow medzi jednotlivími uzlami 
San$IDsource <- match(San$sources, node$name)-1 
San$IDtarget <- match(San$nastroj, node$name)-1

San <- as.data.frame(San)

Sankey_df_with_roma10 <- sankeyNetwork(Links = San, Nodes = node,
                                     Source = "IDsource", Target = "IDtarget",
                                     Value = "num", NodeID = "name", 
                                     sinksRight=F, fontSize = 14,
                                     fontFamily = "sans-serif",nodePadding=10)
Sankey_df_with_roma10

Probability model estimations

Probability of being placed from registered unemployment to a job (in contrast to exitting the register of job seekers for another reason, e.g. non-compliance, …)

We run probability models (probit) predicting the probability of: (i) job placement and (ii) participation in an ALMPs programmes.

First we look at the probability of being placed into a job after the end of the registration in the register of jobseekers. We split the population of registered jobseekers based on the declared reason of ending the registration. Based on this indication, we estimate the probability of exiting unemployment to a job.

# DOVOD VYRADENIA ####
dovod_vyradenia = c('V01','V02','V03','V1','V12','V15') #zamestnali sa 

Pdf<-df
Pdf$part<-as.numeric(Pdf$dovod_vyradenia_kod %in% dovod_vyradenia)
Pdf$roma_share100<-Pdf$roma_share*100
Pdf$roma_share100_2<-Pdf$roma_share100*Pdf$roma_share100

dfh$cumempl<-rowSums(dfh[,c(42:125)])
dfh$fulength<-ifelse(as.Date(dfh$entry1)<as.Date("2017-01-01"), as.Date(dfh$exit1)-as.Date(dfh$entry1), 0)
Pdf<-merge(Pdf, dfh[,c("klient_id", "cumempl", "fulength")], by="klient_id", all = FALSE)

#Estimation

model1<-as.formula(part ~ roma_share100)
model2<-as.formula(part ~ roma_share100+male+age+noedu+primary+lsec+usec+single+kids+roma+hungarian+isco1+cumempl+fulength)
model3<-as.formula(part ~ roma_share100+UR_region_2017+population+min_urad+min_BA)
model4<-as.formula(part ~ roma_share100+male+age+noedu+primary+lsec+usec+single+kids+roma+hungarian+isco1+cumempl+fulength+UR_region_2017+population+min_urad+min_BA)
#model6<-as.formula(part ~ roma_share100+male+age+noedu+primary+lsec+usec+single+kids+roma+hungarian+isco1+cumempl+fulength+UR_region_2017+population+min_urad+min_BA+urad)

m1<-glm(model1, family= binomial,data=Pdf)
m2<-glm(model2, family= binomial,data=Pdf)
m3<-glm(model3, family= binomial,data=Pdf)
m4<-glm(model4, family= binomial,data=Pdf)

M1<-summ(m1)
M2<-summ(m2)
M3<-summ(m3)
M4<-summ(m4)

Rtab_vyradenie<-jtools::export_summs(m1,m2,m3,m4, model.names=c("Single", "Individual", "Regional", "Full"), coefs= c("Share of Roma"="roma_share100"), robust = TRUE)



#Lag identification
#for (i in 1:10) {
#  assign(paste("PModel",i, sep=""), 
#         glm(part ~ poly(roma_share100,i)+male+age+noedu+primary+lsec+usec+single+kids+roma+hungarian+isco1+cumempl+fulength+UR_region_2017+population+min_urad+min_BA, family= binomial,data=Pdf))
#  }
#print(anova(PModel1, PModel2, PModel3, PModel4, PModel5, PModel6, PModel7, PModel8, PModel9, PModel10))

nP<-3
PModel<-glm(part ~ poly(roma_share100,nP)+male+age+noedu+primary+lsec+usec+single+kids+roma+hungarian+isco1+UR_region_2017+population+min_urad+min_BA, family= binomial,data=Pdf)

predDF<-ggpredict(PModel, terms="roma_share100 [all]")

plot_vyradenie<-ggplot(predDF, aes(x, predicted)) +
  geom_line() +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = .1)

First we predict the probability of being placed to a job as a function of the share of the Roma population in the total population of the settlement of individual´s place of residence. While controlling for a set of individual (model Individual) and regional characteristics (model Regional), or alltogether (Full model) the probability of the unemployment spell being followed by a job placement declines with the increase in the share of the Roma population in the place of residence. This association appeared to be linear and declining, regardless of the model specification.

Predicted probability of job placement as a function of the share of the Roma population in the municipality/settlement

plot_vyradenie+xlab("The share of the Roma population \n in the place of residence (in %)")+ylab("Predicted probability \n of job placement")

Results of the probit model predicting probability of job placement, accross model specificatons

Rtab_vyradenie
SingleIndividualRegionalFull
Share of Roma-0.01 ***-0.01 ***-0.01 ***-0.01 ***
(0.00)   (0.00)   (0.00)   (0.00)   
N686405       673243       686405       673243       
AIC923968.82    880770.32    923771.66    880648.87    
BIC923991.69    881032.98    923840.30    880957.20    
Pseudo R20.02    0.07    0.02    0.07    
Standard errors are heteroskedasticity robust. *** p < 0.001; ** p < 0.01; * p < 0.05.

Probability of participation in an ALMP programme during the unemployment spell

The share of the Roma population in the settlement of permanent residence was also included as one of the predictors in another model, predicting the ALMP participation. In the case of this model, the association between the share of Roma and the probability of ALMP participation appears to be sensitive to the model specification. It is positive, if we do not control for regional characteristics (such as the regional unemployment rate). Once we include the set of regional variables into he model, the associaton turns negative, suggesting higher ALMP participation of individuals residing in settlements with a lower share of the Roma population.

When displaying the shape of the association, we have also included the 2nd and 3rd order polynomial of the share of Roma, since the association did not appeared to be linear.

Predicted probability of participation in ALMPs as a function of the share of the Roma population in the municipality/settlement

plot_aptp+xlab("The share of the Roma population \n in the place of residence (in %)")+ylab("Predicted probability \n of participation in ALMPs")

Heterogeneity potentially driving the association of the share of Roma population and the ALMP participation

We explored the sensitivity of the regression coefficients estimated for the share of Roma population due to different model specification. We can see, that adding the regional unemployment rate (or regional LO dummies) twists the association between the share of Roma population and the probability of participation in ALMPs from positive to negative. This suggests that, ALMP participation is higher in the regions with higher unemployment, but the share of Roma in the place of residence appears to be increasing ALMP participation. Since these settlements are concentrated in the regions with higher unemployment, first after controlling for regional unemployment rate reveals the different pattern.

Predicted probability of participation in ALMPs as a function of the share of the Roma population in the municipality/settlement, in regions with above and under average unemployment rate

plot_aptp_UR+xlab("The share of the Roma population \n in the place of residence (in %)")+ylab("Predicted probability \n of participation in ALMPs")

Results of the probit model predicting probability of participation in ALMPs

Rtab_aptp
SingleIndividualRegionalFull
Share of Roma0.01 ***0.01 ***-0.00 ***-0.00 ***
(0.00)   (0.00)   (0.00)   (0.00)   
N686405       673243       686405       673243       
AIC737284.36    707354.83    720780.32    692528.82    
BIC737307.24    707617.49    720848.96    692837.16    
Pseudo R20.01    0.04    0.04    0.07    
Standard errors are heteroskedasticity robust. *** p < 0.001; ** p < 0.01; * p < 0.05.

Slovak Public works shelter a substantial part of ALMP participation of Roma (paper Etnologovia) and at the same time often lack the activation element (Duel and Kureková,). In 2017 they were in practice jointly with more intensive (as well as more expensive) programmes of supported employment. Therefore, we explore the differences in the access to ALMP programess excluding the Public works. This should reveal potential barriers in access to the standard “activating” ALMP measures implemented in Slovakia.

IDs<-unique(df$klient_id)

Salmps <- merge(select(almps, klient_id, entrya, exita, nastroj), 
                select(df, klient_id, entry, exit, dovod_vyradenia_kod), 
                by = "klient_id")  
# Podmienka prieniku času pri databáze pri nezamestnaní a v programe 
Salmps <- subset(Salmps, entry <= entrya & entrya <= (exit +7))
#Droppng the Public works

Salmps<-Salmps[Salmps$nastroj!="P052",]


IDsALMPs<-unique(Salmps$klient_id)
IDsNoALMPs<-IDs[!IDs %in% IDsALMPs]


# Urobím si dva dataframe, tí, ktorý na ALMPS neboli a tí ktorí boli 
df_almps<-df[df$klient_id %in% IDsALMPs,]
df_NOalmps<-df[df$klient_id %in% IDsNoALMPs,]

df_almps$part<-1
df_NOalmps$part<-0

Pdf<-rbind(df_almps, df_NOalmps)
Pdf$roma_share100<-Pdf$roma_share*100
Pdf$roma_share100_2<-Pdf$roma_share100*Pdf$roma_share100
Pdf$highUR<-as.numeric(Pdf$UR_region_2017>mean(Pdf$UR_region_2017))


dfh$cumempl<-rowSums(dfh[,c(42:125)])
dfh$fulength<-ifelse(as.Date(dfh$entry1)<as.Date("2017-01-01"), as.Date(dfh$exit1)-as.Date(dfh$entry1), 0)
Pdf<-merge(Pdf, dfh[,c("klient_id", "cumempl", "fulength")], by="klient_id", all = FALSE)



#Lag identification
#for (i in 1:10) {
#  assign(paste("PModel",i, sep=""), 
#         glm(part ~ poly(roma_share100,i)+male+age+noedu+primary+lsec+usec+single+kids+roma+hungarian+isco1+cumempl+fulength+UR_region_2017+population+min_urad+min_BA, family= binomial,data=Pdf))
#  }
#print(anova(PModel1, PModel2, PModel3, PModel4, PModel5, PModel6, PModel7, PModel8, PModel9, PModel10))

nP<-3
PModel<-glm(part ~ poly(roma_share100,nP)+male+age+noedu+primary+lsec+usec+single+kids+roma+hungarian+isco1+UR_region_2017+population+min_urad+min_BA, family= binomial,data=Pdf)

predDF<-ggpredict(PModel, terms=c("roma_share100 [all]"))

plot_aptp_noPW<-ggplot(predDF, aes(x, predicted)) +
  geom_line(aes(group=group)) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = .1)

Pdf_hUR<-Pdf[Pdf$highUR==1,]
Pdf_lUR<-Pdf[Pdf$highUR==0,]


#Unemployment rate above the average
PModel_hUR<-glm(part ~ poly(roma_share100,nP)+male+age+noedu+primary+lsec+usec+single+kids+roma+hungarian+isco1+UR_region_2017+population+min_urad+min_BA, family= binomial,data=Pdf_hUR)

predDF_hUR<-ggpredict(PModel_hUR, terms=c("roma_share100 [all]"))

#plot_aptp_hUR<-ggplot(predDF, aes(x, predicted)) +
#  geom_line(aes(group=group)) +
#  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = .1)

#Unemployment rate below the average
PModel_lUR<-glm(part ~ poly(roma_share100,nP)+male+age+noedu+primary+lsec+usec+single+kids+roma+hungarian+isco1+UR_region_2017+population+min_urad+min_BA, family= binomial,data=Pdf_lUR)

predDF_lUR<-ggpredict(PModel_lUR, terms=c("roma_share100 [all]"))

plot_aptp_UR_noPW<-ggplot() +
  geom_line(data=predDF_lUR, aes(x=x,y=predicted, colour="Under the average")) +
  geom_ribbon(data=predDF_lUR, aes(x=x,y=predicted,ymin = conf.low, ymax = conf.high), alpha = .1) +
  geom_line(data=predDF_hUR, aes(x=x, y=predicted, colour="Over the average")) + 
  geom_ribbon(data=predDF_hUR, aes(x=x, y=predicted, ymin = conf.low, ymax = conf.high), alpha = .1) +
  labs(colour = "Regional unemployment rate")+theme(legend.position = "top")


plot_aptp_noPW+xlab("The share of the Roma population \n in the place of residence (in %)")+ylab("Predicted probability \n of participation in ALMPs")

plot_aptp_UR_noPW+xlab("The share of the Roma population \n in the place of residence (in %)")+ylab("Predicted probability \n of participation in ALMPs")  

Predicted probability of participation in ALMPs other than the Public works as a function of the share of the Roma population in the municipality/settlement, in regions with above and under average unemployment rate

plot_aptp_noPW+xlab("The share of the Roma population \n in the place of residence (in %)")+ylab("Predicted probability \n of participation in ALMPs")

plot_aptp_UR_noPW+xlab("The share of the Roma population \n in the place of residence (in %)")+ylab("Predicted probability \n of participation in ALMPs")

Annexe: Complete results of the probit model used in the analysis

Complete results of the probit model predicting probability of job placement

Complete results of the probit model predicting probability of job placement

Rtab_vyradenie
SingleIndividualRegionalFull
Share of Roma-0.01 ***-0.01 ***-0.01 ***-0.01 ***
(0.00)   (0.00)   (0.00)   (0.00)   
N686405       673243       686405       673243       
AIC923968.82    880770.32    923771.66    880648.87    
BIC923991.69    881032.98    923840.30    880957.20    
Pseudo R20.02    0.07    0.02    0.07    
Standard errors are heteroskedasticity robust. *** p < 0.001; ** p < 0.01; * p < 0.05.
M1
Observations 686405
Dependent variable part
Type Generalized linear model
Family binomial
Link logit
χ2(1) 12494.32
Pseudo-R2 (Cragg-Uhler) 0.02
Pseudo-R2 (McFadden) 0.01
AIC 923968.82
BIC 923991.69
Est. S.E. z val. p
(Intercept) 0.45 0.00 159.97 0.00
roma_share100 -0.01 0.00 -108.90 0.00
Standard errors: MLE
M2
Observations 673243 (13162 missing obs. deleted)
Dependent variable part
Type Generalized linear model
Family binomial
Link logit
χ2(22) 35516.67
Pseudo-R2 (Cragg-Uhler) 0.07
Pseudo-R2 (McFadden) 0.04
AIC 880770.32
BIC 881032.98
Est. S.E. z val. p
(Intercept) 0.19 0.02 8.86 0.00
roma_share100 -0.01 0.00 -40.05 0.00
male 0.06 0.01 10.24 0.00
age -0.01 0.00 -32.03 0.00
noedu -0.70 0.03 -21.83 0.00
primary -0.71 0.01 -72.42 0.00
lsec -0.23 0.01 -28.41 0.00
usec -0.10 0.01 -13.67 0.00
single -0.19 0.01 -29.68 0.00
kids -0.37 0.01 -49.43 0.00
roma -0.41 0.06 -6.34 0.00
hungarian -0.07 0.01 -7.01 0.00
isco11 0.59 0.02 32.11 0.00
isco12 0.72 0.01 54.99 0.00
isco13 0.63 0.01 50.39 0.00
isco14 0.56 0.01 49.19 0.00
isco15 0.48 0.01 48.89 0.00
isco16 0.38 0.03 13.67 0.00
isco17 0.54 0.01 45.59 0.00
isco18 0.66 0.01 59.77 0.00
isco19 0.36 0.01 36.55 0.00
cumempl 0.01 0.00 39.70 0.00
fulength -0.00 0.00 -8.52 0.00
Standard errors: MLE
M3
Observations 686405
Dependent variable part
Type Generalized linear model
Family binomial
Link logit
χ2(5) 12699.47
Pseudo-R2 (Cragg-Uhler) 0.02
Pseudo-R2 (McFadden) 0.01
AIC 923771.66
BIC 923840.30
Est. S.E. z val. p
(Intercept) 0.51 0.01 73.50 0.00
roma_share100 -0.01 0.00 -85.40 0.00
UR_region_2017 -0.00 0.00 -4.33 0.00
population 0.00 0.00 1.52 0.13
min_urad -0.00 0.00 -4.98 0.00
min_BA -0.00 0.00 -5.16 0.00
Standard errors: MLE
M4
Observations 673243 (13162 missing obs. deleted)
Dependent variable part
Type Generalized linear model
Family binomial
Link logit
χ2(26) 35646.12
Pseudo-R2 (Cragg-Uhler) 0.07
Pseudo-R2 (McFadden) 0.04
AIC 880648.87
BIC 880957.20
Est. S.E. z val. p
(Intercept) 0.18 0.02 7.53 0.00
roma_share100 -0.01 0.00 -39.33 0.00
male 0.06 0.01 10.35 0.00
age -0.01 0.00 -31.22 0.00
noedu -0.70 0.03 -21.71 0.00
primary -0.72 0.01 -72.53 0.00
lsec -0.24 0.01 -28.91 0.00
usec -0.10 0.01 -14.00 0.00
single -0.19 0.01 -29.27 0.00
kids -0.37 0.01 -49.62 0.00
roma -0.41 0.06 -6.25 0.00
hungarian -0.09 0.01 -8.95 0.00
isco11 0.59 0.02 32.23 0.00
isco12 0.72 0.01 55.21 0.00
isco13 0.63 0.01 50.62 0.00
isco14 0.56 0.01 49.34 0.00
isco15 0.48 0.01 48.97 0.00
isco16 0.37 0.03 13.27 0.00
isco17 0.54 0.01 45.23 0.00
isco18 0.66 0.01 59.45 0.00
isco19 0.35 0.01 36.12 0.00
cumempl 0.01 0.00 40.01 0.00
fulength -0.00 0.00 -8.84 0.00
UR_region_2017 0.01 0.00 8.22 0.00
population -0.00 0.00 -5.25 0.00
min_urad 0.00 0.00 1.00 0.32
min_BA -0.00 0.00 -5.36 0.00
Standard errors: MLE

Complete results of the probit model predicting probability of ALMP participation

Complete results of the probit model predicting probability of ALMP participation

Rtab_aptp
SingleIndividualRegionalFull
Share of Roma0.01 ***0.01 ***-0.00 ***-0.00 ***
(0.00)   (0.00)   (0.00)   (0.00)   
N686405       673243       686405       673243       
AIC737284.36    707354.83    720780.32    692528.82    
BIC737307.24    707617.49    720848.96    692837.16    
Pseudo R20.01    0.04    0.04    0.07    
Standard errors are heteroskedasticity robust. *** p < 0.001; ** p < 0.01; * p < 0.05.
M1_almp
Observations 686405
Dependent variable part
Type Generalized linear model
Family binomial
Link logit
χ2(1) 3547.95
Pseudo-R2 (Cragg-Uhler) 0.01
Pseudo-R2 (McFadden) 0.00
AIC 737284.36
BIC 737307.24
Est. S.E. z val. p
(Intercept) -1.30 0.00 -391.63 0.00
roma_share100 0.01 0.00 61.01 0.00
Standard errors: MLE
M2_almp
Observations 673243 (13162 missing obs. deleted)
Dependent variable part
Type Generalized linear model
Family binomial
Link logit
χ2(22) 16211.90
Pseudo-R2 (Cragg-Uhler) 0.04
Pseudo-R2 (McFadden) 0.02
AIC 707354.83
BIC 707617.49
Est. S.E. z val. p
(Intercept) 0.49 0.03 19.38 0.00
roma_share100 0.01 0.00 41.55 0.00
male -0.30 0.01 -46.12 0.00
age -0.02 0.00 -61.95 0.00
noedu -0.49 0.03 -14.64 0.00
primary -0.40 0.01 -34.72 0.00
lsec -0.21 0.01 -22.17 0.00
usec -0.03 0.01 -4.06 0.00
single -0.03 0.01 -4.01 0.00
kids -0.10 0.01 -11.20 0.00
roma -0.13 0.07 -1.85 0.06
hungarian -0.06 0.01 -5.76 0.00
isco11 -0.36 0.02 -15.99 0.00
isco12 -0.13 0.01 -8.75 0.00
isco13 -0.12 0.01 -8.56 0.00
isco14 -0.06 0.01 -4.61 0.00
isco15 -0.23 0.01 -20.57 0.00
isco16 -0.17 0.03 -4.92 0.00
isco17 -0.16 0.01 -11.65 0.00
isco18 -0.07 0.01 -5.80 0.00
isco19 -0.06 0.01 -5.92 0.00
cumempl -0.01 0.00 -41.08 0.00
fulength 0.00 0.00 12.61 0.00
Standard errors: MLE
M3_almp
Observations 686405
Dependent variable part
Type Generalized linear model
Family binomial
Link logit
χ2(5) 20059.98
Pseudo-R2 (Cragg-Uhler) 0.04
Pseudo-R2 (McFadden) 0.03
AIC 720780.32
BIC 720848.96
Est. S.E. z val. p
(Intercept) -1.72 0.01 -205.04 0.00
roma_share100 -0.00 0.00 -14.14 0.00
UR_region_2017 0.06 0.00 57.02 0.00
population -0.00 0.00 -37.23 0.00
min_urad -0.00 0.00 -13.10 0.00
min_BA 0.00 0.00 40.89 0.00
Standard errors: MLE
M4_almp
Observations 673243 (13162 missing obs. deleted)
Dependent variable part
Type Generalized linear model
Family binomial
Link logit
χ2(26) 31045.91
Pseudo-R2 (Cragg-Uhler) 0.07
Pseudo-R2 (McFadden) 0.04
AIC 692528.82
BIC 692837.16
Est. S.E. z val. p
(Intercept) -0.26 0.03 -9.41 0.00
roma_share100 -0.00 0.00 -13.86 0.00
male -0.32 0.01 -48.62 0.00
age -0.02 0.00 -49.88 0.00
noedu -0.44 0.03 -13.08 0.00
primary -0.38 0.01 -33.09 0.00
lsec -0.26 0.01 -26.79 0.00
usec -0.09 0.01 -10.71 0.00
single 0.03 0.01 3.76 0.00
kids -0.14 0.01 -15.22 0.00
roma -0.10 0.07 -1.42 0.15
hungarian -0.06 0.01 -5.35 0.00
isco11 -0.23 0.02 -9.90 0.00
isco12 -0.04 0.01 -2.98 0.00
isco13 -0.02 0.01 -1.60 0.11
isco14 0.02 0.01 1.54 0.12
isco15 -0.21 0.01 -18.84 0.00
isco16 -0.25 0.03 -7.38 0.00
isco17 -0.19 0.01 -13.71 0.00
isco18 -0.07 0.01 -5.58 0.00
isco19 -0.10 0.01 -8.77 0.00
cumempl -0.01 0.00 -27.73 0.00
fulength 0.00 0.00 10.64 0.00
UR_region_2017 0.06 0.00 54.30 0.00
population -0.00 0.00 -39.17 0.00
min_urad -0.00 0.00 -12.12 0.00
min_BA 0.00 0.00 36.24 0.00
Standard errors: MLE