Toky registrovaných UoZ

Ta sme si povedali, že sa pozrieme na populáciu UoZ, pritekajúcich do evidencie v roku 2017, v strednej vekovej skupine (20-55 rokov),bez zdravotného postiihnutia, ktorých evidencia nepresiahla 3 roky (1095 dní).

library(dplyr)
library(tidyr)
library(knitr)
library(networkD3)
library(bigrquery)
library(zoo)
library(kableExtra)
#install.packages("jtools")
library(jtools)
#install.packages("huxtable")
library(huxtable)
library(ggeffects)
library(ggplot2)

load("D:\\Dropbox\\UPSVaR\\Export_2021\\export2021.RData")
rm(list=ls()[!ls() %in% c('df','almps','dfh')])
dovod<- read.csv("D:/Dropbox/UPSVaR/Export_2021/CISELNIKY/cDovod_Vyradenia.txt", header=TRUE, sep="|", encoding = "UTF-8")


# podmienky df -> veková skupina, čas pozorovania, zdravie... 
df <- subset(df, age>20 & age <= 55)
df <- subset(df, as.Date(df$entry)>= "2017-01-01" & as.Date(df$entry)>= "2017-12-31") #Pritok iba v roku 2017
df <- subset(df, exit-entry < 1095) # Max. dlzka evidencie v dnoch
df <- subset(df, healthy == 1)

df <- df %>% mutate_all(na_if,"") #ak su empty cells tak sa zmenia na NA


df$dovod_vyradenia_kod[is.na(df$dovod_vyradenia_kod)]<-"NA"
td<-table(df$dovod_vyradenia_kod)
td<-td[order(-td)]
rownames(td)<-dovod[match(rownames(td),dovod$DOV_VYRAD_KOD_U1),"DOV_VYRAD_NAYOV_U1"]
rownames(td)[is.na(rownames(td))]<-"Dôvod neuvedený"

Evidencie týchto UoZ boli ukončené na základe nasledujúcich dôvodov.

kbl(cbind(td, format(round(td/sum(td)*100,digits=2), nsmall=2, scientific=FALSE)), col.names = c("N", "%")) %>% 
kable_paper("hover", full_width = F)
N %
vzniku pracovného pomeru alebo obdobného pracovného vzťahu 320332 46.66
Dôvod neuvedený 148595 21.65
vyradenie dňom zistenia: 54346 7.92
začatia vykonávania zárobkovej činnosti v EÚ a cudzine 38399 5.59
vzniku oprávnenia prevádzkovať alebo vykonávať SZČ 32093 4.68
prestal spĺňať podmienku podľa § 6 ods. 2 písm. a) a b) a ods. 3. 28506 4.15
požiadanie o vyradenie (vlastná žiadosť) 18159 2.65
odchod do členského štátu EÚ na obdobie dlhšie ako 15 kal. dní 12273 1.79
požiadanie o vyradenie z dôvodu: 9478 1.38
narodenie dieťaťa 9451 1.38
nástup na sústavnú prípravu na povolanie 5525 0.80
odchod do cudziny na obdobie dlhšie ako 15 kal. dní 3788 0.55
skončenie pozastavenia SZČ 2484 0.36
úmrtie UoZ 1001 0.15
vznik PPV (dohody mimo PP) alebo podľa osobitného predpisu 733 0.11
vzatie do výkonu väzby 539 0.08
nástup na výkon trestu odňatia slobody 481 0.07
zaradenia do evidencie uchádzačov o zamestnanie 92 0.01
neschopnosť plniť povinnosti UoZ podľa posudku posudkového lekára 86 0.01
vykonával zárobkovú činnosť v pracovnom pomere alebo v inom právnom vzťahu po 31.7.2017 46 0.01
zániku dlhodobého pobytu, ak ide o štátneho príslušníka tretej krajiny 28 0.00
priznanie starobného dôchodku alebo dovŕšenie veku 15 0.00
nadobudnutie právoplatnosti rozsudku o neplatnosti skončenia zamestnania 8 0.00

Z 507 267 uchádzačov o zamestnanie zaregistrovaných počas roka 2017, 236 782 malo trvalý pobyt v obciach s podielom rómskej populácie viac ako 5% (46.7 percent).

# Podiel romov v obci
#df_with_roma <- filter(df, roma_share > 0.05) 
#df_without_roma <- filter(df, roma_share < 0.05)  

Toky UoZ do jendotlivých nástrojov AOTP, vyradenia, či umiestnenia na trh práce. Ide o kohortu pritekajúcu do evidencie počas roka 2017. 6/12/18/24 zobrazujú počty UoZ zotrvávajúcich v databáze aj po uplynutí 6/12/18/24 mesiacov.

Graf 1: Toky UoZ s trvalým bydliskom v obci s podielom rómov do 5% (z evidencie a do opatrení APTP)

##################################
# WITHout ROMA ####
##################################

Sdf <- filter(df, roma_share < 0.05) 
u0<-nrow(Sdf)
u6<-nrow(Sdf[as.logical((Sdf$exit-Sdf$entry)>182),])
u12<-nrow(Sdf[(Sdf$exit-Sdf$entry)>365,])
u18<-nrow(Sdf[(Sdf$exit-Sdf$entry)>548,])
u24<-nrow(Sdf[(Sdf$exit-Sdf$entry)>730,])
  
# Urobím si dva dataframe, tí, ktorý na ALMPS neboli a tí ktorí boli 
IDs<-unique(Sdf$klient_id)

Salmps <- filter(almps, entrya>="2017-01-01")
Salmps <- merge(select(Salmps, klient_id, entrya, exita, nastroj), 
                select(Sdf, klient_id, entry, exit, dovod_vyradenia_kod), 
                by = "klient_id")  
# Podmienka prieniku času pri databáze pri nezamestnaní a v programe 
Salmps <- subset(Salmps, entry <= entrya & entrya <= (exit +7))

IDsALMPs<-unique(Salmps$klient_id)
IDsNoALMPs<-IDs[!IDs %in% IDsALMPs]

u0<-nrow(Salmps)
u6<-nrow(Salmps[as.logical((Salmps$exit-Salmps$entry)>182),])
u12<-nrow(Salmps[(Salmps$exit-Salmps$entry)>365,])
u18<-nrow(Salmps[(Salmps$exit-Salmps$entry)>548,])
u24<-nrow(Salmps[(Salmps$exit-Salmps$entry)>730,])
  
# DOVOD VYRADENIA ####
dovod_vyradenia = c('V01','V02','V03','V1','V12','V15') #zamestnali sa 

# NASTROJE ####
Tnastroj<-table(Salmps$nastroj)
Tnastroj<-Tnastroj[order(-Tnastroj)]
rownames(Tnastroj[1:10])
##  [1] "P053"  "P54D"  "P54O"  "P54Rp" "P054"  "P54P"  "P54Kp" "P051"  "P52A" 
## [10] "P032"
nastroj_kod <- c('another reason','employed', rownames(Tnastroj[1:10])) #nástroje pre mladých 
#another reason a employed treba vždy nechať špecifikované aby sa nám to potom nespojilo s tými ostatnými opatreniami do ktorých evidovaní išli 

# Urobím si dva dataframe, tí, ktorý na ALMPS neboli a tí ktorí boli 
df_no_almps <- Sdf[Sdf$klient_id %in% IDsNoALMPs, c("klient_id", "entry", "exit", "dovod_vyradenia_kod")]
df_no_almps$entrya<-NA
df_no_almps$exita<-NA
df_no_almps$nastroj<-NA

df_almps_ <- Salmps[Salmps$klient_id %in% IDsALMPs, ]

# Spojíme dataframe s účastníkmi ALMPS a s tými ktorý sa nezúčastnili na ALMPS
df_almps <- rbind(df_almps_, df_no_almps)
remove(df_almps_, df_no_almps)
#Calculating the coefficient of inflation due to multiple ALMP participations 
InfCoef<-nrow(Sdf)/nrow(df_almps)

# upravíme si dáta ktoré budeme používať pri grafe
df_s <- select(df_almps, klient_id, entrya, exita, entry, exit, nastroj, dovod_vyradenia_kod)
df_s <- relocate(df_s, c(entry, exit), .after = klient_id)
df_s$days <- as.numeric(difftime(df_s$entrya, df_s$entry, units = 'days'))
df_s$days <- ifelse(is.na(df_s$nastroj),as.numeric(difftime(df_s$exit, df_s$entry, units = 'days')), df_s$days)

df_s_almps <- subset(df_s, !is.na(df_s$nastroj))
df_s_noalmps <- subset(df_s, is.na(df_s$nastroj))

df_s_noalmps <- df_s_noalmps %>% 
  mutate(nastroj = case_when(df_s_noalmps$dovod_vyradenia_kod %in% dovod_vyradenia ~ 'employed',
                             !df_s_noalmps$dovod_vyradenia_kod %in% dovod_vyradenia ~ 'another reason')
  )

df_s <- rbind(df_s_almps, df_s_noalmps)
remove(df_almps, df_s_almps, df_s_noalmps)

df_s$years <- round(df_s$days/365) 

Sankey <- select(df_s, nastroj, days, years) #dataframe, ktorý budem používať pri tvorbe grafu

Sankey <- Sankey %>%mutate(
  time = case_when(
    Sankey$days %in% seq(0,182,1)  ~ 6,
    Sankey$days %in% seq(183,365,1)  ~ 12, 
    Sankey$days %in% seq(366,548,1)  ~ 18,
    Sankey$days %in% seq(549,730,1)  ~ 24,
    Sankey$days %in% seq(731,max(Sankey$days),1)  ~ 30,
  ) 
)



Sankey <- select(Sankey, nastroj, time)

Sankey$sources <- ifelse(Sankey$time == 6 | Sankey$time == 12 |
                           Sankey$time == 18 | Sankey$time == 24 | Sankey$time == 30, 
                         Sankey$time - 6, Sankey$time)


#zosumarizuj, koľkí išli v ktorom roku do ktorého opatrenia
San <- Sankey %>%  group_by(sources, nastroj , time) %>% summarise(num = n(), .groups = 'drop') 

# rozdeľ opatrenia, na tie ostatné almps - OTHER ALMPS 
San_aplmps <- subset(San, nastroj %in% nastroj_kod)
San_other_aplmps <- subset(San, !nastroj %in% nastroj_kod)

San_other_aplmps <- San_other_aplmps %>%  group_by(sources , time) %>% summarise(num = sum(num), .groups = 'drop') 
San_other_aplmps$nastroj <- 'OTHER ALMPS'
San_other_aplmps <- relocate(San_other_aplmps, nastroj, .after = sources)

San <- rbind(San_aplmps, San_other_aplmps)
remove(San_aplmps, San_other_aplmps)

# uzly grafu (jedinečné), musia tu byť všetky opatrenia aj časové pásma 
node <- data.frame(
  name=c(as.character(San$nastroj),as.character(San$sources))%>% unique()
)

# definovanie koľko registrovaných bude medzi tými rokmi  
U<-seq(6,24,6)
UN<-c(u6, u12, u18, u24)
velky_df <- data.frame(cbind(sources=U, num=UN))


# definovanie koľko registrovaných bude medzi tými rokmi  
#velky_df <- data.frame()
#for (i in seq(6,30,6)){
#  pocet <- San %>%  group_by( 'sources' = sources >= i) %>% summarise(num = sum(num)*InfCoef, .groups = 'drop') 
#  pocet <- subset(pocet, sources == TRUE)
#  pocet$sources <- i
#  velky_df <- rbind(velky_df, pocet)
#}



# musím si velky_df prisposobiť tak, aby roky boli ako nodes aby som to mohla spojiť s dataframe San s ktorým potom budem ďalej robiť graf
# preto sources budu ako nastroj -> aby som spravila nodes, years su sources ale sources su years -1 v skutočnosti (v san grafe)
colnames(velky_df) <- c('nastroj', 'num')
velky_df$time <- velky_df$nastroj
velky_df$sources <- San$sources[match(velky_df$time, San$time)] 
velky_df <- relocate(velky_df, sources, .before = nastroj)
velky_df <- relocate(velky_df, time, .before = num)

San <- rbind(San, velky_df)

#urobím IDsources a ID target podľa uzlov aby garf vedel ten flow medzi jednotlivími uzlami 
San$IDsource <- match(San$sources, node$name)-1 
San$IDtarget <- match(San$nastroj, node$name)-1

San <- as.data.frame(San)

Sankey_df_without_roma <- sankeyNetwork(Links = San, Nodes = node,
                                     Source = "IDsource", Target = "IDtarget",
                                     Value = "num", NodeID = "name", 
                                     sinksRight=F, fontSize = 14,
                                     fontFamily = "sans-serif",nodePadding=10)
Sankey_df_without_roma
0 → employed
156,518 
0 → another reason
86,760 
6 → another reason
39,988 
0 → 6
39,711 
6 → employed
26,512 
6 → 12
14,452 
0 → P54D
13,053 
12 → another reason
11,131 
0 → P053
9,954 
6 → P053
6,806 
12 → 18
6,789 
0 → P54Rp
5,222 
18 → another reason
3,997 
0 → P054
3,818 
0 → P54O
3,782 
18 → 24
3,519 
6 → P54D
3,263 
0 → P54P
3,226 
0 → OTHER ALMPS
3,034 
0 → P051
2,922 
12 → employed
2,917 
0 → P54Kp
2,630 
6 → OTHER ALMPS
2,615 
12 → P54O
1,977 
0 → P52A
1,954 
24 → another reason
1,854 
6 → P054
1,716 
0 → P032
1,513 
6 → P54O
1,312 
6 → P54Rp
1,243 
12 → OTHER ALMPS
1,014 
12 → P053
979 
12 → P54P
937 
12 → P054
575 
12 → P54D
518 
18 → employed
462 
6 → P52A
457 
6 → P54Kp
409 
6 → P032
390 
12 → P54Rp
319 
18 → OTHER ALMPS
250 
6 → P54P
227 
18 → P54O
226 
18 → P054
196 
18 → P053
188 
12 → P52A
163 
6 → P051
150 
24 → employed
102 
24 → OTHER ALMPS
101 
18 → P54D
100 
12 → P54Kp
96 
18 → P54Rp
90 
18 → P54P
81 
12 → P032
67 
24 → P054
59 
18 → P54Kp
44 
24 → P053
32 
18 → P52A
29 
24 → P54D
29 
24 → P54Rp
15 
12 → P051
14 
18 → P032
12 
24 → P54O
8 
24 → P54Kp
6 
24 → P54P
6 
24 → P52A
3 
another reason
143,730
another reason
employed
186,511
employed
P032
1,982
P032
P051
3,086
P051
P053
17,959
P053
P054
6,364
P054
P52A
2,606
P52A
P54D
16,963
P54D
P54Kp
3,185
P54Kp
P54O
7,305
P54O
P54P
4,477
P54P
P54Rp
6,889
P54Rp
OTHER ALMPS
7,014
OTHER ALMPS
0
334,097
0
6
99,540
6
12
27,496
12
18
9,194
18
24
3,519
24

Graf 2: Toky UoZ s trvalým bydliskom v obci s podielom rómov nad 5% (z evidencie a do opatrení APTP)

##################################
# WITH ROMA ####
##################################

Sdf <- filter(df, roma_share > 0.05) 
u0<-nrow(Sdf)
u6<-nrow(Sdf[as.logical((Sdf$exit-Sdf$entry)>182),])
u12<-nrow(Sdf[(Sdf$exit-Sdf$entry)>365,])
u18<-nrow(Sdf[(Sdf$exit-Sdf$entry)>548,])
u24<-nrow(Sdf[(Sdf$exit-Sdf$entry)>730,])
  
# Urobím si dva dataframe, tí, ktorý na ALMPS neboli a tí ktorí boli 
IDs<-unique(Sdf$klient_id)

Salmps <- filter(almps, entrya>="2017-01-01")
Salmps <- merge(select(Salmps, klient_id, entrya, exita, nastroj), 
                select(Sdf, klient_id, entry, exit, dovod_vyradenia_kod), 
                by = "klient_id")  
# Podmienka prieniku času pri databáze pri nezamestnaní a v programe 
Salmps <- subset(Salmps, entry <= entrya & entrya <= (exit +7))

IDsALMPs<-unique(Salmps$klient_id)
IDsNoALMPs<-IDs[!IDs %in% IDsALMPs]

u0<-nrow(Salmps)
u6<-nrow(Salmps[as.logical((Salmps$exit-Salmps$entry)>182),])
u12<-nrow(Salmps[(Salmps$exit-Salmps$entry)>365,])
u18<-nrow(Salmps[(Salmps$exit-Salmps$entry)>548,])
u24<-nrow(Salmps[(Salmps$exit-Salmps$entry)>730,])
  
# DOVOD VYRADENIA ####
dovod_vyradenia = c('V01','V02','V03','V1','V12','V15') #zamestnali sa 

# NASTROJE ####
Tnastroj<-table(Salmps$nastroj)
Tnastroj<-Tnastroj[order(-Tnastroj)]
rownames(Tnastroj[1:10])
##  [1] "P54D"  "P054"  "P54O"  "P053"  "P54P"  "P54Rp" "P052"  "P52A"  "P54Kp"
## [10] "P032"
nastroj_kod <- c('another reason','employed', rownames(Tnastroj[1:10])) #nástroje pre mladých 
#another reason a employed treba vždy nechať špecifikované aby sa nám to potom nespojilo s tými ostatnými opatreniami do ktorých evidovaní išli 

# Urobím si dva dataframe, tí, ktorý na ALMPS neboli a tí ktorí boli 
df_no_almps <- Sdf[Sdf$klient_id %in% IDsNoALMPs, c("klient_id", "entry", "exit", "dovod_vyradenia_kod")]
df_no_almps$entrya<-NA
df_no_almps$exita<-NA
df_no_almps$nastroj<-NA

df_almps_ <- Salmps[Salmps$klient_id %in% IDsALMPs, ]

# Spojíme dataframe s účastníkmi ALMPS a s tými ktorý sa nezúčastnili na ALMPS
df_almps <- rbind(df_almps_, df_no_almps)
remove(df_almps_, df_no_almps)
#Calculating the coefficient of inflation due to multiple ALMP participations 
InfCoef<-nrow(Sdf)/nrow(df_almps)

# upravíme si dáta ktoré budeme používať pri grafe
df_s <- select(df_almps, klient_id, entrya, exita, entry, exit, nastroj, dovod_vyradenia_kod)
df_s <- relocate(df_s, c(entry, exit), .after = klient_id)
df_s$days <- as.numeric(difftime(df_s$entrya, df_s$entry, units = 'days'))
df_s$days <- ifelse(is.na(df_s$nastroj),as.numeric(difftime(df_s$exit, df_s$entry, units = 'days')), df_s$days)

df_s_almps <- subset(df_s, !is.na(df_s$nastroj))
df_s_noalmps <- subset(df_s, is.na(df_s$nastroj))

df_s_noalmps <- df_s_noalmps %>% 
  mutate(nastroj = case_when(df_s_noalmps$dovod_vyradenia_kod %in% dovod_vyradenia ~ 'employed',
                             !df_s_noalmps$dovod_vyradenia_kod %in% dovod_vyradenia ~ 'another reason')
  )

df_s <- rbind(df_s_almps, df_s_noalmps)
remove(df_almps, df_s_almps, df_s_noalmps)

df_s$years <- round(df_s$days/365) 

Sankey <- select(df_s, nastroj, days, years) #dataframe, ktorý budem používať pri tvorbe grafu

Sankey <- Sankey %>%mutate(
  time = case_when(
    Sankey$days %in% seq(0,182,1)  ~ 6,
    Sankey$days %in% seq(183,365,1)  ~ 12, 
    Sankey$days %in% seq(366,548,1)  ~ 18,
    Sankey$days %in% seq(549,730,1)  ~ 24,
    Sankey$days %in% seq(731,max(Sankey$days),1)  ~ 30,
  ) 
)



Sankey <- select(Sankey, nastroj, time)

Sankey$sources <- ifelse(Sankey$time == 6 | Sankey$time == 12 |
                           Sankey$time == 18 | Sankey$time == 24 | Sankey$time == 30, 
                         Sankey$time - 6, Sankey$time)


#zosumarizuj, koľkí išli v ktorom roku do ktorého opatrenia
San <- Sankey %>%  group_by(sources, nastroj , time) %>% summarise(num = n(), .groups = 'drop') 

# rozdeľ opatrenia, na tie ostatné almps - OTHER ALMPS 
San_aplmps <- subset(San, nastroj %in% nastroj_kod)
San_other_aplmps <- subset(San, !nastroj %in% nastroj_kod)

San_other_aplmps <- San_other_aplmps %>%  group_by(sources , time) %>% summarise(num = sum(num), .groups = 'drop') 
San_other_aplmps$nastroj <- 'OTHER ALMPS'
San_other_aplmps <- relocate(San_other_aplmps, nastroj, .after = sources)

San <- rbind(San_aplmps, San_other_aplmps)
remove(San_aplmps, San_other_aplmps)

# uzly grafu (jedinečné), musia tu byť všetky opatrenia aj časové pásma 
node <- data.frame(
  name=c(as.character(San$nastroj),as.character(San$sources))%>% unique()
)

# definovanie koľko registrovaných bude medzi tými rokmi  
U<-seq(6,24,6)
UN<-c(u6, u12, u18, u24)
velky_df <- data.frame(cbind(sources=U, num=UN))


# definovanie koľko registrovaných bude medzi tými rokmi  
#velky_df <- data.frame()
#for (i in seq(6,30,6)){
#  pocet <- San %>%  group_by( 'sources' = sources >= i) %>% summarise(num = sum(num)*InfCoef, .groups = 'drop') 
#  pocet <- subset(pocet, sources == TRUE)
#  pocet$sources <- i
#  velky_df <- rbind(velky_df, pocet)
#}



# musím si velky_df prisposobiť tak, aby roky boli ako nodes aby som to mohla spojiť s dataframe San s ktorým potom budem ďalej robiť graf
# preto sources budu ako nastroj -> aby som spravila nodes, years su sources ale sources su years -1 v skutočnosti (v san grafe)
colnames(velky_df) <- c('nastroj', 'num')
velky_df$time <- velky_df$nastroj
velky_df$sources <- San$sources[match(velky_df$time, San$time)] 
velky_df <- relocate(velky_df, sources, .before = nastroj)
velky_df <- relocate(velky_df, time, .before = num)

San <- rbind(San, velky_df)

#urobím IDsources a ID target podľa uzlov aby garf vedel ten flow medzi jednotlivími uzlami 
San$IDsource <- match(San$sources, node$name)-1 
San$IDtarget <- match(San$nastroj, node$name)-1

San <- as.data.frame(San)

Sankey_df_with_roma <- sankeyNetwork(Links = San, Nodes = node,
                                     Source = "IDsource", Target = "IDtarget",
                                     Value = "num", NodeID = "name", 
                                     sinksRight=F, fontSize = 14,
                                     fontFamily = "sans-serif",nodePadding=10)
Sankey_df_with_roma
0 → employed
63,466 
0 → another reason
51,831 
0 → 6
35,687 
6 → another reason
24,581 
6 → 12
19,586 
12 → 18
12,333 
6 → employed
11,500 
12 → another reason
10,254 
0 → P54D
9,554 
18 → 24
7,667 
0 → P054
4,710 
18 → another reason
4,096 
0 → P54Rp
3,727 
0 → OTHER ALMPS
3,599 
0 → P54P
3,408 
0 → P053
3,306 
6 → P54D
3,060 
0 → P54O
2,900 
12 → P54O
2,511 
24 → another reason
2,327 
6 → P053
2,185 
12 → employed
1,806 
0 → P52A
1,781 
6 → OTHER ALMPS
1,781 
6 → P054
1,746 
6 → P54O
1,440 
0 → P54Kp
1,311 
12 → P052
1,305 
6 → P54P
1,078 
0 → P032
1,077 
6 → P54Rp
1,036 
12 → P054
1,015 
12 → P54P
952 
18 → P052
770 
12 → OTHER ALMPS
716 
12 → P54D
622 
18 → P054
501 
6 → P52A
439 
12 → P053
380 
18 → P54O
353 
12 → P54Rp
343 
18 → employed
338 
24 → P054
316 
6 → P032
307 
6 → P54Kp
271 
24 → P052
215 
18 → P54D
181 
18 → P54P
176 
18 → OTHER ALMPS
150 
12 → P52A
132 
0 → P052
102 
24 → employed
95 
18 → P54Rp
94 
12 → P54Kp
83 
12 → P032
68 
18 → P053
60 
24 → OTHER ALMPS
54 
24 → P54D
53 
18 → P52A
34 
6 → P052
28 
18 → P032
26 
18 → P54Kp
21 
24 → P54Rp
21 
24 → P54P
14 
24 → P053
9 
24 → P52A
9 
24 → P54O
9 
24 → P032
2 
24 → P54Kp
1 
another reason
93,089
another reason
employed
77,205
employed
P032
1,480
P032
P052
2,420
P052
P053
5,940
P053
P054
8,288
P054
P52A
2,395
P52A
P54D
13,470
P54D
P54Kp
1,687
P54Kp
P54O
7,213
P54O
P54P
5,628
P54P
P54Rp
5,221
P54Rp
OTHER ALMPS
6,300
OTHER ALMPS
0
186,459
0
6
69,038
6
12
32,520
12
18
14,467
18
24
7,667
24

Graf 3: Toky UoZ s trvalým bydliskom v obci s podielom rómov nad 50% (z evidencie a do opatrení APTP)

##################################
# WITH ROMA SHARE over 50%####
##################################

Sdf <- filter(df, roma_share > 0.5) 
u0<-nrow(Sdf)
u6<-nrow(Sdf[as.logical((Sdf$exit-Sdf$entry)>182),])
u12<-nrow(Sdf[(Sdf$exit-Sdf$entry)>365,])
u18<-nrow(Sdf[(Sdf$exit-Sdf$entry)>548,])
u24<-nrow(Sdf[(Sdf$exit-Sdf$entry)>730,])
  
# Urobím si dva dataframe, tí, ktorý na ALMPS neboli a tí ktorí boli 
IDs<-unique(Sdf$klient_id)

Salmps <- filter(almps, entrya>="2017-01-01")
Salmps <- merge(select(Salmps, klient_id, entrya, exita, nastroj), 
                select(Sdf, klient_id, entry, exit, dovod_vyradenia_kod), 
                by = "klient_id")  
# Podmienka prieniku času pri databáze pri nezamestnaní a v programe 
Salmps <- subset(Salmps, entry <= entrya & entrya <= (exit +7))

IDsALMPs<-unique(Salmps$klient_id)
IDsNoALMPs<-IDs[!IDs %in% IDsALMPs]

u0<-nrow(Salmps)
u6<-nrow(Salmps[as.logical((Salmps$exit-Salmps$entry)>182),])
u12<-nrow(Salmps[(Salmps$exit-Salmps$entry)>365,])
u18<-nrow(Salmps[(Salmps$exit-Salmps$entry)>548,])
u24<-nrow(Salmps[(Salmps$exit-Salmps$entry)>730,])
  
# DOVOD VYRADENIA ####
dovod_vyradenia = c('V01','V02','V03','V1','V12','V15') #zamestnali sa 

# NASTROJE ####
Tnastroj<-table(Salmps$nastroj)
Tnastroj<-Tnastroj[order(-Tnastroj)]
rownames(Tnastroj[1:10])
##  [1] "P54D"  "P54O"  "P054"  "P54P"  "P052"  "P54Rp" "P053"  "P52A"  "P54Kp"
## [10] "P50J"
nastroj_kod <- c('another reason','employed', rownames(Tnastroj[1:10])) #nástroje pre mladých 
#another reason a employed treba vždy nechať špecifikované aby sa nám to potom nespojilo s tými ostatnými opatreniami do ktorých evidovaní išli 

# Urobím si dva dataframe, tí, ktorý na ALMPS neboli a tí ktorí boli 
df_no_almps <- Sdf[Sdf$klient_id %in% IDsNoALMPs, c("klient_id", "entry", "exit", "dovod_vyradenia_kod")]
df_no_almps$entrya<-NA
df_no_almps$exita<-NA
df_no_almps$nastroj<-NA

df_almps_ <- Salmps[Salmps$klient_id %in% IDsALMPs, ]

# Spojíme dataframe s účastníkmi ALMPS a s tými ktorý sa nezúčastnili na ALMPS
df_almps <- rbind(df_almps_, df_no_almps)
remove(df_almps_, df_no_almps)
#Calculating the coefficient of inflation due to multiple ALMP participations 
InfCoef<-nrow(Sdf)/nrow(df_almps)

# upravíme si dáta ktoré budeme používať pri grafe
df_s <- select(df_almps, klient_id, entrya, exita, entry, exit, nastroj, dovod_vyradenia_kod)
df_s <- relocate(df_s, c(entry, exit), .after = klient_id)
df_s$days <- as.numeric(difftime(df_s$entrya, df_s$entry, units = 'days'))
df_s$days <- ifelse(is.na(df_s$nastroj),as.numeric(difftime(df_s$exit, df_s$entry, units = 'days')), df_s$days)

df_s_almps <- subset(df_s, !is.na(df_s$nastroj))
df_s_noalmps <- subset(df_s, is.na(df_s$nastroj))

df_s_noalmps <- df_s_noalmps %>% 
  mutate(nastroj = case_when(df_s_noalmps$dovod_vyradenia_kod %in% dovod_vyradenia ~ 'employed',
                             !df_s_noalmps$dovod_vyradenia_kod %in% dovod_vyradenia ~ 'another reason')
  )

df_s <- rbind(df_s_almps, df_s_noalmps)
remove(df_almps, df_s_almps, df_s_noalmps)

df_s$years <- round(df_s$days/365) 

Sankey <- select(df_s, nastroj, days, years) #dataframe, ktorý budem používať pri tvorbe grafu

Sankey <- Sankey %>%mutate(
  time = case_when(
    Sankey$days %in% seq(0,182,1)  ~ 6,
    Sankey$days %in% seq(183,365,1)  ~ 12, 
    Sankey$days %in% seq(366,548,1)  ~ 18,
    Sankey$days %in% seq(549,730,1)  ~ 24,
    Sankey$days %in% seq(731,max(Sankey$days),1)  ~ 30,
  ) 
)



Sankey <- select(Sankey, nastroj, time)

Sankey$sources <- ifelse(Sankey$time == 6 | Sankey$time == 12 |
                           Sankey$time == 18 | Sankey$time == 24 | Sankey$time == 30, 
                         Sankey$time - 6, Sankey$time)


#zosumarizuj, koľkí išli v ktorom roku do ktorého opatrenia
San <- Sankey %>%  group_by(sources, nastroj , time) %>% summarise(num = n(), .groups = 'drop') 

# rozdeľ opatrenia, na tie ostatné almps - OTHER ALMPS 
San_aplmps <- subset(San, nastroj %in% nastroj_kod)
San_other_aplmps <- subset(San, !nastroj %in% nastroj_kod)

San_other_aplmps <- San_other_aplmps %>%  group_by(sources , time) %>% summarise(num = sum(num), .groups = 'drop') 
San_other_aplmps$nastroj <- 'OTHER ALMPS'
San_other_aplmps <- relocate(San_other_aplmps, nastroj, .after = sources)

San <- rbind(San_aplmps, San_other_aplmps)
remove(San_aplmps, San_other_aplmps)

# uzly grafu (jedinečné), musia tu byť všetky opatrenia aj časové pásma 
node <- data.frame(
  name=c(as.character(San$nastroj),as.character(San$sources))%>% unique()
)

# definovanie koľko registrovaných bude medzi tými rokmi  
U<-seq(6,24,6)
UN<-c(u6, u12, u18, u24)
velky_df <- data.frame(cbind(sources=U, num=UN))


# definovanie koľko registrovaných bude medzi tými rokmi  
#velky_df <- data.frame()
#for (i in seq(6,30,6)){
#  pocet <- San %>%  group_by( 'sources' = sources >= i) %>% summarise(num = sum(num)*InfCoef, .groups = 'drop') 
#  pocet <- subset(pocet, sources == TRUE)
#  pocet$sources <- i
#  velky_df <- rbind(velky_df, pocet)
#}



# musím si velky_df prisposobiť tak, aby roky boli ako nodes aby som to mohla spojiť s dataframe San s ktorým potom budem ďalej robiť graf
# preto sources budu ako nastroj -> aby som spravila nodes, years su sources ale sources su years -1 v skutočnosti (v san grafe)
colnames(velky_df) <- c('nastroj', 'num')
velky_df$time <- velky_df$nastroj
velky_df$sources <- San$sources[match(velky_df$time, San$time)] 
velky_df <- relocate(velky_df, sources, .before = nastroj)
velky_df <- relocate(velky_df, time, .before = num)

San <- rbind(San, velky_df)

#urobím IDsources a ID target podľa uzlov aby garf vedel ten flow medzi jednotlivími uzlami 
San$IDsource <- match(San$sources, node$name)-1 
San$IDtarget <- match(San$nastroj, node$name)-1

San <- as.data.frame(San)

Sankey_df_with_roma10 <- sankeyNetwork(Links = San, Nodes = node,
                                     Source = "IDsource", Target = "IDtarget",
                                     Value = "num", NodeID = "name", 
                                     sinksRight=F, fontSize = 14,
                                     fontFamily = "sans-serif",nodePadding=10)
Sankey_df_with_roma10
0 → another reason
9,692 
0 → 6
7,670 
0 → employed
6,755 
6 → 12
5,459 
6 → another reason
5,221 
12 → 18
3,967 
12 → another reason
2,871 
18 → 24
2,750 
6 → employed
1,483 
18 → another reason
1,193 
0 → P54D
1,136 
24 → another reason
781 
0 → P54P
744 
0 → P054
721 
12 → P54O
686 
12 → P052
618 
0 → P54O
594 
0 → P54Rp
550 
6 → P54D
498 
18 → P052
368 
6 → P54P
342 
6 → P54O
332 
12 → employed
310 
0 → P52A
294 
0 → OTHER ALMPS
290 
12 → P054
272 
0 → P053
270 
6 → P054
268 
0 → P54Kp
258 
12 → P54P
226 
6 → P54Rp
191 
6 → P053
177 
18 → P054
175 
24 → P054
145 
6 → OTHER ALMPS
141 
0 → P50J
131 
12 → P54D
130 
18 → P54O
106 
12 → P54Rp
99 
6 → P52A
97 
6 → P50J
95 
24 → P052
92 
18 → employed
68 
12 → P50J
59 
6 → P54Kp
58 
0 → P052
57 
12 → OTHER ALMPS
54 
12 → P053
53 
18 → P54P
48 
18 → P54D
47 
18 → P54Rp
35 
12 → P52A
33 
24 → employed
26 
24 → P54D
20 
6 → P052
17 
18 → P50J
13 
12 → P54Kp
12 
24 → P54Rp
12 
18 → OTHER ALMPS
10 
18 → P053
9 
18 → P52A
7 
24 → P54O
4 
24 → OTHER ALMPS
4 
24 → P053
3 
24 → P50J
2 
24 → P52A
2 
24 → P54P
2 
18 → P54Kp
1 
another reason
19,758
another reason
employed
8,642
employed
P052
1,152
P052
P053
512
P053
P054
1,581
P054
P50J
300
P50J
P52A
433
P52A
P54D
1,831
P54D
P54Kp
329
P54Kp
P54O
1,722
P54O
P54P
1,362
P54P
P54Rp
887
P54Rp
OTHER ALMPS
499
OTHER ALMPS
0
29,162
0
6
14,379
6
12
9,390
12
18
4,830
18
24
2,750
24

Probability model estimations

Probability of being placed from registered unemployment to a job (in contrast to exitting the register of job seekers for another reason, e.g. non-compliance, …)

We run probability models (probit) predicting the probability of: (i) job placement and (ii) participation in an ALMPs programmes.

First we look at the probability of being placed into a job after the end of the registration in the register of jobseekers. We split the population of registered jobseekers based on the declared reason of ending the registration. Based on this indication, we estimate the probability of exiting unemployment to a job.

# DOVOD VYRADENIA ####
dovod_vyradenia = c('V01','V02','V03','V1','V12','V15') #zamestnali sa 

Pdf<-df
Pdf$part<-as.numeric(Pdf$dovod_vyradenia_kod %in% dovod_vyradenia)
Pdf$roma_share100<-Pdf$roma_share*100
Pdf$roma_share100_2<-Pdf$roma_share100*Pdf$roma_share100

dfh$cumempl<-rowSums(dfh[,c(42:125)])
dfh$fulength<-ifelse(as.Date(dfh$entry1)<as.Date("2017-01-01"), as.Date(dfh$exit1)-as.Date(dfh$entry1), 0)
Pdf<-merge(Pdf, dfh[,c("klient_id", "cumempl", "fulength")], by="klient_id", all = FALSE)

#Estimation

model1<-as.formula(part ~ roma_share100)
model2<-as.formula(part ~ roma_share100+male+age+noedu+primary+lsec+usec+single+kids+roma+hungarian+isco1+cumempl+fulength)
model3<-as.formula(part ~ roma_share100+UR_region_2017+population+min_urad+min_BA)
model4<-as.formula(part ~ roma_share100+male+age+noedu+primary+lsec+usec+single+kids+roma+hungarian+isco1+cumempl+fulength+UR_region_2017+population+min_urad+min_BA)
#model6<-as.formula(part ~ roma_share100+male+age+noedu+primary+lsec+usec+single+kids+roma+hungarian+isco1+cumempl+fulength+UR_region_2017+population+min_urad+min_BA+urad)

m1<-glm(model1, family= binomial,data=Pdf)
m2<-glm(model2, family= binomial,data=Pdf)
m3<-glm(model3, family= binomial,data=Pdf)
m4<-glm(model4, family= binomial,data=Pdf)

M1<-summ(m1)
M2<-summ(m2)
M3<-summ(m3)
M4<-summ(m4)

Rtab_vyradenie<-jtools::export_summs(m1,m2,m3,m4, model.names=c("Single", "Individual", "Regional", "Full"), coefs= c("Share of Roma"="roma_share100"), robust = TRUE)



#Lag identification
#for (i in 1:10) {
#  assign(paste("PModel",i, sep=""), 
#         glm(part ~ poly(roma_share100,i)+male+age+noedu+primary+lsec+usec+single+kids+roma+hungarian+isco1+cumempl+fulength+UR_region_2017+population+min_urad+min_BA, family= binomial,data=Pdf))
#  }
#print(anova(PModel1, PModel2, PModel3, PModel4, PModel5, PModel6, PModel7, PModel8, PModel9, PModel10))

nP<-3
PModel<-glm(part ~ poly(roma_share100,nP)+male+age+noedu+primary+lsec+usec+single+kids+roma+hungarian+isco1+UR_region_2017+population+min_urad+min_BA, family= binomial,data=Pdf)

predDF<-ggpredict(PModel, terms="roma_share100 [all]")

plot_vyradenie<-ggplot(predDF, aes(x, predicted)) +
  geom_line() +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = .1)

First we predict the probability of being placed to a job as a function of the share of the Roma population in the total population of the settlement of individual´s place of residence. While controlling for a set of individual (model Individual) and regional characteristics (model Regional), or alltogether (Full model) the probability of the unemployment spell being followed by a job placement declines with the increase in the share of the Roma population in the place of residence. This association appeared to be linear and declining, regardless of the model specification.

Predicted probability of job placement as a function of the share of the Roma population in the municipality/settlement

plot_vyradenie+xlab("The share of the Roma population \n in the place of residence (in %)")+ylab("Predicted probability \n of job placement")

Results of the probit model predicting probability of job placement, accross model specificatons

Rtab_vyradenie
SingleIndividualRegionalFull
Share of Roma-0.01 ***-0.01 ***-0.01 ***-0.01 ***
(0.00)   (0.00)   (0.00)   (0.00)   
N686405       673243       686405       673243       
AIC923968.82    880770.32    923771.66    880648.87    
BIC923991.69    881032.98    923840.30    880957.20    
Pseudo R20.02    0.07    0.02    0.07    
Standard errors are heteroskedasticity robust. *** p < 0.001; ** p < 0.01; * p < 0.05.

Probability of participation in an ALMP programme during the unemployment spell

The share of the Roma population in the settlement of permanent residence was also included as one of the predictors in another model, predicting the ALMP participation. In the case of this model, the association between the share of Roma and the probability of ALMP participation appears to be sensitive to the model specification. It is positive, if we do not control for regional characteristics (such as the regional unemployment rate). Once we include the set of regional variables into he model, the associaton turns negative, suggesting higher ALMP participation of individuals residing in settlements with a lower share of the Roma population.

When displaying the shape of the association, we have also included the 2nd and 3rd order polynomial of the share of Roma, since the association did not appeared to be linear.

Predicted probability of participation in ALMPs as a function of the share of the Roma population in the municipality/settlement

plot_aptp+xlab("The share of the Roma population \n in the place of residence (in %)")+ylab("Predicted probability \n of participation in ALMPs")

Heterogeneity potentially driving the association of the share of Roma population and the ALMP participation

We explored the sensitivity of the regression coefficients estimated for the share of Roma population due to different model specification. We can see, that adding the regional unemployment rate (or regional LO dummies) twists the association between the share of Roma population and the probability of participation in ALMPs from positive to negative. This suggests that, ALMP participation is higher in the regions with higher unemployment, but the share of Roma in the place of residence appears to be increasing ALMP participation. Since these settlements are concentrated in the regions with higher unemployment, first after controlling for regional unemployment rate reveals the different pattern.

Predicted probability of participation in ALMPs as a function of the share of the Roma population in the municipality/settlement, in regions with above and under average unemployment rate

plot_aptp_UR+xlab("The share of the Roma population \n in the place of residence (in %)")+ylab("Predicted probability \n of participation in ALMPs")

Results of the probit model predicting probability of participation in ALMPs

Rtab_aptp
SingleIndividualRegionalFull
Share of Roma0.01 ***0.01 ***-0.00 ***-0.00 ***
(0.00)   (0.00)   (0.00)   (0.00)   
N686405       673243       686405       673243       
AIC737284.36    707354.83    720780.32    692528.82    
BIC737307.24    707617.49    720848.96    692837.16    
Pseudo R20.01    0.04    0.04    0.07    
Standard errors are heteroskedasticity robust. *** p < 0.001; ** p < 0.01; * p < 0.05.

Slovak Public works shelter a substantial part of ALMP participation of Roma (paper Etnologovia) and at the same time often lack the activation element (Duel and Kureková,). In 2017 they were in practice jointly with more intensive (as well as more expensive) programmes of supported employment. Therefore, we explore the differences in the access to ALMP programess excluding the Public works. This should reveal potential barriers in access to the standard “activating” ALMP measures implemented in Slovakia.

IDs<-unique(df$klient_id)

Salmps <- merge(select(almps, klient_id, entrya, exita, nastroj), 
                select(df, klient_id, entry, exit, dovod_vyradenia_kod), 
                by = "klient_id")  
# Podmienka prieniku času pri databáze pri nezamestnaní a v programe 
Salmps <- subset(Salmps, entry <= entrya & entrya <= (exit +7))
#Droppng the Public works

Salmps<-Salmps[Salmps$nastroj!="P052",]


IDsALMPs<-unique(Salmps$klient_id)
IDsNoALMPs<-IDs[!IDs %in% IDsALMPs]


# Urobím si dva dataframe, tí, ktorý na ALMPS neboli a tí ktorí boli 
df_almps<-df[df$klient_id %in% IDsALMPs,]
df_NOalmps<-df[df$klient_id %in% IDsNoALMPs,]

df_almps$part<-1
df_NOalmps$part<-0

Pdf<-rbind(df_almps, df_NOalmps)
Pdf$roma_share100<-Pdf$roma_share*100
Pdf$roma_share100_2<-Pdf$roma_share100*Pdf$roma_share100
Pdf$highUR<-as.numeric(Pdf$UR_region_2017>mean(Pdf$UR_region_2017))


dfh$cumempl<-rowSums(dfh[,c(42:125)])
dfh$fulength<-ifelse(as.Date(dfh$entry1)<as.Date("2017-01-01"), as.Date(dfh$exit1)-as.Date(dfh$entry1), 0)
Pdf<-merge(Pdf, dfh[,c("klient_id", "cumempl", "fulength")], by="klient_id", all = FALSE)



#Lag identification
#for (i in 1:10) {
#  assign(paste("PModel",i, sep=""), 
#         glm(part ~ poly(roma_share100,i)+male+age+noedu+primary+lsec+usec+single+kids+roma+hungarian+isco1+cumempl+fulength+UR_region_2017+population+min_urad+min_BA, family= binomial,data=Pdf))
#  }
#print(anova(PModel1, PModel2, PModel3, PModel4, PModel5, PModel6, PModel7, PModel8, PModel9, PModel10))

nP<-3
PModel<-glm(part ~ poly(roma_share100,nP)+male+age+noedu+primary+lsec+usec+single+kids+roma+hungarian+isco1+UR_region_2017+population+min_urad+min_BA, family= binomial,data=Pdf)

predDF<-ggpredict(PModel, terms=c("roma_share100 [all]"))

plot_aptp_noPW<-ggplot(predDF, aes(x, predicted)) +
  geom_line(aes(group=group)) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = .1)

Pdf_hUR<-Pdf[Pdf$highUR==1,]
Pdf_lUR<-Pdf[Pdf$highUR==0,]


#Unemployment rate above the average
PModel_hUR<-glm(part ~ poly(roma_share100,nP)+male+age+noedu+primary+lsec+usec+single+kids+roma+hungarian+isco1+UR_region_2017+population+min_urad+min_BA, family= binomial,data=Pdf_hUR)

predDF_hUR<-ggpredict(PModel_hUR, terms=c("roma_share100 [all]"))

#plot_aptp_hUR<-ggplot(predDF, aes(x, predicted)) +
#  geom_line(aes(group=group)) +
#  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = .1)

#Unemployment rate below the average
PModel_lUR<-glm(part ~ poly(roma_share100,nP)+male+age+noedu+primary+lsec+usec+single+kids+roma+hungarian+isco1+UR_region_2017+population+min_urad+min_BA, family= binomial,data=Pdf_lUR)

predDF_lUR<-ggpredict(PModel_lUR, terms=c("roma_share100 [all]"))

plot_aptp_UR_noPW<-ggplot() +
  geom_line(data=predDF_lUR, aes(x=x,y=predicted, colour="Under the average")) +
  geom_ribbon(data=predDF_lUR, aes(x=x,y=predicted,ymin = conf.low, ymax = conf.high), alpha = .1) +
  geom_line(data=predDF_hUR, aes(x=x, y=predicted, colour="Over the average")) + 
  geom_ribbon(data=predDF_hUR, aes(x=x, y=predicted, ymin = conf.low, ymax = conf.high), alpha = .1) +
  labs(colour = "Regional unemployment rate")+theme(legend.position = "top")


plot_aptp_noPW+xlab("The share of the Roma population \n in the place of residence (in %)")+ylab("Predicted probability \n of participation in ALMPs")

plot_aptp_UR_noPW+xlab("The share of the Roma population \n in the place of residence (in %)")+ylab("Predicted probability \n of participation in ALMPs")  

Predicted probability of participation in ALMPs other than the Public works as a function of the share of the Roma population in the municipality/settlement, in regions with above and under average unemployment rate

plot_aptp_noPW+xlab("The share of the Roma population \n in the place of residence (in %)")+ylab("Predicted probability \n of participation in ALMPs")

plot_aptp_UR_noPW+xlab("The share of the Roma population \n in the place of residence (in %)")+ylab("Predicted probability \n of participation in ALMPs")

Annexe: Complete results of the probit model used in the analysis

Complete results of the probit model predicting probability of job placement

Complete results of the probit model predicting probability of job placement

Rtab_vyradenie
SingleIndividualRegionalFull
Share of Roma-0.01 ***-0.01 ***-0.01 ***-0.01 ***
(0.00)   (0.00)   (0.00)   (0.00)   
N686405       673243       686405       673243       
AIC923968.82    880770.32    923771.66    880648.87    
BIC923991.69    881032.98    923840.30    880957.20    
Pseudo R20.02    0.07    0.02    0.07    
Standard errors are heteroskedasticity robust. *** p < 0.001; ** p < 0.01; * p < 0.05.
M1
Observations 686405
Dependent variable part
Type Generalized linear model
Family binomial
Link logit
χ2(1) 12494.32
Pseudo-R2 (Cragg-Uhler) 0.02
Pseudo-R2 (McFadden) 0.01
AIC 923968.82
BIC 923991.69
Est. S.E. z val. p
(Intercept) 0.45 0.00 159.97 0.00
roma_share100 -0.01 0.00 -108.90 0.00
Standard errors: MLE
M2
Observations 673243 (13162 missing obs. deleted)
Dependent variable part
Type Generalized linear model
Family binomial
Link logit
χ2(22) 35516.67
Pseudo-R2 (Cragg-Uhler) 0.07
Pseudo-R2 (McFadden) 0.04
AIC 880770.32
BIC 881032.98
Est. S.E. z val. p
(Intercept) 0.19 0.02 8.86 0.00
roma_share100 -0.01 0.00 -40.05 0.00
male 0.06 0.01 10.24 0.00
age -0.01 0.00 -32.03 0.00
noedu -0.70 0.03 -21.83 0.00
primary -0.71 0.01 -72.42 0.00
lsec -0.23 0.01 -28.41 0.00
usec -0.10 0.01 -13.67 0.00
single -0.19 0.01 -29.68 0.00
kids -0.37 0.01 -49.43 0.00
roma -0.41 0.06 -6.34 0.00
hungarian -0.07 0.01 -7.01 0.00
isco11 0.59 0.02 32.11 0.00
isco12 0.72 0.01 54.99 0.00
isco13 0.63 0.01 50.39 0.00
isco14 0.56 0.01 49.19 0.00
isco15 0.48 0.01 48.89 0.00
isco16 0.38 0.03 13.67 0.00
isco17 0.54 0.01 45.59 0.00
isco18 0.66 0.01 59.77 0.00
isco19 0.36 0.01 36.55 0.00
cumempl 0.01 0.00 39.70 0.00
fulength -0.00 0.00 -8.52 0.00
Standard errors: MLE
M3
Observations 686405
Dependent variable part
Type Generalized linear model
Family binomial
Link logit
χ2(5) 12699.47
Pseudo-R2 (Cragg-Uhler) 0.02
Pseudo-R2 (McFadden) 0.01
AIC 923771.66
BIC 923840.30
Est. S.E. z val. p
(Intercept) 0.51 0.01 73.50 0.00
roma_share100 -0.01 0.00 -85.40 0.00
UR_region_2017 -0.00 0.00 -4.33 0.00
population 0.00 0.00 1.52 0.13
min_urad -0.00 0.00 -4.98 0.00
min_BA -0.00 0.00 -5.16 0.00
Standard errors: MLE
M4
Observations 673243 (13162 missing obs. deleted)
Dependent variable part
Type Generalized linear model
Family binomial
Link logit
χ2(26) 35646.12
Pseudo-R2 (Cragg-Uhler) 0.07
Pseudo-R2 (McFadden) 0.04
AIC 880648.87
BIC 880957.20
Est. S.E. z val. p
(Intercept) 0.18 0.02 7.53 0.00
roma_share100 -0.01 0.00 -39.33 0.00
male 0.06 0.01 10.35 0.00
age -0.01 0.00 -31.22 0.00
noedu -0.70 0.03 -21.71 0.00
primary -0.72 0.01 -72.53 0.00
lsec -0.24 0.01 -28.91 0.00
usec -0.10 0.01 -14.00 0.00
single -0.19 0.01 -29.27 0.00
kids -0.37 0.01 -49.62 0.00
roma -0.41 0.06 -6.25 0.00
hungarian -0.09 0.01 -8.95 0.00
isco11 0.59 0.02 32.23 0.00
isco12 0.72 0.01 55.21 0.00
isco13 0.63 0.01 50.62 0.00
isco14 0.56 0.01 49.34 0.00
isco15 0.48 0.01 48.97 0.00
isco16 0.37 0.03 13.27 0.00
isco17 0.54 0.01 45.23 0.00
isco18 0.66 0.01 59.45 0.00
isco19 0.35 0.01 36.12 0.00
cumempl 0.01 0.00 40.01 0.00
fulength -0.00 0.00 -8.84 0.00
UR_region_2017 0.01 0.00 8.22 0.00
population -0.00 0.00 -5.25 0.00
min_urad 0.00 0.00 1.00 0.32
min_BA -0.00 0.00 -5.36 0.00
Standard errors: MLE

Complete results of the probit model predicting probability of ALMP participation

Complete results of the probit model predicting probability of ALMP participation

Rtab_aptp
SingleIndividualRegionalFull
Share of Roma0.01 ***0.01 ***-0.00 ***-0.00 ***
(0.00)   (0.00)   (0.00)   (0.00)   
N686405       673243       686405       673243       
AIC737284.36    707354.83    720780.32    692528.82    
BIC737307.24    707617.49    720848.96    692837.16    
Pseudo R20.01    0.04    0.04    0.07    
Standard errors are heteroskedasticity robust. *** p < 0.001; ** p < 0.01; * p < 0.05.
M1_almp
Observations 686405
Dependent variable part
Type Generalized linear model
Family binomial
Link logit
χ2(1) 3547.95
Pseudo-R2 (Cragg-Uhler) 0.01
Pseudo-R2 (McFadden) 0.00
AIC 737284.36
BIC 737307.24
Est. S.E. z val. p
(Intercept) -1.30 0.00 -391.63 0.00
roma_share100 0.01 0.00 61.01 0.00
Standard errors: MLE
M2_almp
Observations 673243 (13162 missing obs. deleted)
Dependent variable part
Type Generalized linear model
Family binomial
Link logit
χ2(22) 16211.90
Pseudo-R2 (Cragg-Uhler) 0.04
Pseudo-R2 (McFadden) 0.02
AIC 707354.83
BIC 707617.49
Est. S.E. z val. p
(Intercept) 0.49 0.03 19.38 0.00
roma_share100 0.01 0.00 41.55 0.00
male -0.30 0.01 -46.12 0.00
age -0.02 0.00 -61.95 0.00
noedu -0.49 0.03 -14.64 0.00
primary -0.40 0.01 -34.72 0.00
lsec -0.21 0.01 -22.17 0.00
usec -0.03 0.01 -4.06 0.00
single -0.03 0.01 -4.01 0.00
kids -0.10 0.01 -11.20 0.00
roma -0.13 0.07 -1.85 0.06
hungarian -0.06 0.01 -5.76 0.00
isco11 -0.36 0.02 -15.99 0.00
isco12 -0.13 0.01 -8.75 0.00
isco13 -0.12 0.01 -8.56 0.00
isco14 -0.06 0.01 -4.61 0.00
isco15 -0.23 0.01 -20.57 0.00
isco16 -0.17 0.03 -4.92 0.00
isco17 -0.16 0.01 -11.65 0.00
isco18 -0.07 0.01 -5.80 0.00
isco19 -0.06 0.01 -5.92 0.00
cumempl -0.01 0.00 -41.08 0.00
fulength 0.00 0.00 12.61 0.00
Standard errors: MLE
M3_almp
Observations 686405
Dependent variable part
Type Generalized linear model
Family binomial
Link logit
χ2(5) 20059.98
Pseudo-R2 (Cragg-Uhler) 0.04
Pseudo-R2 (McFadden) 0.03
AIC 720780.32
BIC 720848.96
Est. S.E. z val. p
(Intercept) -1.72 0.01 -205.04 0.00
roma_share100 -0.00 0.00 -14.14 0.00
UR_region_2017 0.06 0.00 57.02 0.00
population -0.00 0.00 -37.23 0.00
min_urad -0.00 0.00 -13.10 0.00
min_BA 0.00 0.00 40.89 0.00
Standard errors: MLE
M4_almp
Observations 673243 (13162 missing obs. deleted)
Dependent variable part
Type Generalized linear model
Family binomial
Link logit
χ2(26) 31045.91
Pseudo-R2 (Cragg-Uhler) 0.07
Pseudo-R2 (McFadden) 0.04
AIC 692528.82
BIC 692837.16
Est. S.E. z val. p
(Intercept) -0.26 0.03 -9.41 0.00
roma_share100 -0.00 0.00 -13.86 0.00
male -0.32 0.01 -48.62 0.00
age -0.02 0.00 -49.88 0.00
noedu -0.44 0.03 -13.08 0.00
primary -0.38 0.01 -33.09 0.00
lsec -0.26 0.01 -26.79 0.00
usec -0.09 0.01 -10.71 0.00
single 0.03 0.01 3.76 0.00
kids -0.14 0.01 -15.22 0.00
roma -0.10 0.07 -1.42 0.15
hungarian -0.06 0.01 -5.35 0.00
isco11 -0.23 0.02 -9.90 0.00
isco12 -0.04 0.01 -2.98 0.00
isco13 -0.02 0.01 -1.60 0.11
isco14 0.02 0.01 1.54 0.12
isco15 -0.21 0.01 -18.84 0.00
isco16 -0.25 0.03 -7.38 0.00
isco17 -0.19 0.01 -13.71 0.00
isco18 -0.07 0.01 -5.58 0.00
isco19 -0.10 0.01 -8.77 0.00
cumempl -0.01 0.00 -27.73 0.00
fulength 0.00 0.00 10.64 0.00
UR_region_2017 0.06 0.00 54.30 0.00
population -0.00 0.00 -39.17 0.00
min_urad -0.00 0.00 -12.12 0.00
min_BA 0.00 0.00 36.24 0.00
Standard errors: MLE