This script was used for the paper “The cost of late payments in public procurement”, presented in the Open Data Research Symposium 2018. The datasets used for this analysis can be downloaded from this link: Datasets. The full paper can be found here: Paper. This paper was possible thanks to the support by Hivos.
library(dplyr)
library(lubridate)
library(ggplot2)
library(readxl)
library(gmodels)
library(Hmisc)
library(ggthemes)
library(readr)
library(survminer)
library(survival)
library(kableExtra)
library(tidyr)
library(stringr)
library(flexsurv)
library(xts)
library(scales)
options(scipen = 999)
theme_Publication <- function(base_size = 14, base_family = "Helvetica") {
library(grid)
library(ggthemes)
(theme_foundation(base_size = base_size, base_family = base_family) + theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5), text = element_text(), panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA), panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold", size = rel(1)), axis.title.y = element_text(angle = 90,
vjust = 2), axis.title.x = element_text(vjust = -0.2), axis.text = element_text(),
axis.line = element_line(colour = "black"), axis.ticks = element_line(),
panel.grid.major = element_line(colour = "#f0f0f0"), panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA), legend.position = "bottom",
legend.direction = "horizontal", legend.title = element_text(face = "italic"),
legend.key.size = unit(0.5, "cm"), legend.margin = unit(0, "cm"), plot.margin = unit(c(10,
5, 5, 5), "mm"), strip.background = element_rect(colour = "#f0f0f0",
fill = "#f0f0f0"), strip.text = element_text(face = "bold")))
}
scale_fill_Publication <- function(...) {
library(scales)
discrete_scale("fill", "Publication", manual_pal(values = c("#386cb0", "#fdb462",
"#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")),
...)
}
scale_colour_Publication <- function(...) {
library(scales)
discrete_scale("colour", "Publication", manual_pal(values = c("#386cb0",
"#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3",
"#ffff33")), ...)
}
# Import files
base2 <- read.csv("finaldataset.csv", stringsAsFactors = FALSE, encoding = "UTF-8")
# Transform dates
dates_vector <- c("obl_fecha_elaboracion", "obl_fecha_aprobacion", "obl_fecha_factura",
"str_fecha_ingreso", "str_fecha_aprobacion", "str_fecha_recepcion_tesoro",
"ot_fecha_generacion", "ot_fecha_deposito", "contr_fecha_firma_contrato")
for (i in dates_vector) {
base2[, i] <- parse_date_time(base2[, i], orders = c("dmy", "mdy", "ymd"))
}
# Leave contracts in Guaranies (delete 10 obs), Delete observations without
# STR and OT
base2 <- base2 %>% filter(obl_moneda_descripcion == "GUARANIES", !is.na(str_numero_str),
!is.na(ot_numero_ot)) %>% # Delete obs where contract date>invoice date
mutate(prueba2 = ifelse(obl_fecha_factura < contr_fecha_firma_contrato, 1, 0)) %>%
filter(prueba2 == 0 | is.na(prueba2)) %>% # Create new variables
mutate(total = difftime(ot_fecha_deposito, obl_fecha_factura, units = c("days")),
total2 = difftime(ot_fecha_deposito, obl_fecha_aprobacion, units = c("days")),
r_factura = difftime(obl_fecha_aprobacion, obl_fecha_factura, units = c("days")),
r_str = difftime(str_fecha_recepcion_tesoro, obl_fecha_aprobacion, units = c("days")),
r_ot = difftime(ot_fecha_deposito, str_fecha_recepcion_tesoro, units = c("days")),
r_carga = difftime(obl_fecha_elaboracion, obl_fecha_factura, units = c("days")),
r_apro = difftime(obl_fecha_aprobacion, obl_fecha_elaboracion, units = c("days")),
r_carga_str = difftime(str_fecha_ingreso, obl_fecha_aprobacion, units = c("days")),
r_apro_str = difftime(str_fecha_aprobacion, str_fecha_ingreso, units = c("days")),
r_tesoro_str = difftime(str_fecha_recepcion_tesoro, str_fecha_aprobacion,
units = c("days")), r_carga_ot = difftime(ot_fecha_generacion, str_fecha_recepcion_tesoro,
units = c("days")), r_pago_ot = difftime(ot_fecha_deposito, ot_fecha_generacion,
units = c("days")), r_entidad = difftime(str_fecha_recepcion_tesoro,
obl_fecha_factura, units = c("days")), r_hacienda = difftime(ot_fecha_deposito,
str_fecha_recepcion_tesoro, units = c("days")), total_cat = ifelse(total <
30, "< 30", ifelse(total < 45, "30-<45", ifelse(total < 60, "45-<60",
ifelse(total < 75, "60-<75", "+ 75")))))
base2[, 29:42] <- lapply(base2[, 29:42], as.numeric)
base2$obl_entidad_descripcion <- gsub("PBLICA", "PUBLICA", base2$obl_entidad_descripcion,
fixed = T)
base2$obl_entidad_descripcion <- gsub("TRNSITO", "TRANSITO", base2$obl_entidad_descripcion,
fixed = T)
base2$obl_entidad_descripcion <- gsub("ESPRITU", "ESPIRITU", base2$obl_entidad_descripcion,
fixed = T)
# Identify extreme values
base_limpia <- base2 %>% filter(total > 10, total2 > 0, r_factura >= 0, r_str >=
0, r_ot >= 0, r_carga >= 0, r_apro >= 0, r_carga_str >= 0, r_apro_str >=
0, r_tesoro_str >= 0, r_carga_ot >= 0, r_pago_ot >= 0, r_entidad > 0, r_hacienda >=
0, obl_monto_obligado > 50000) %>% mutate(dataset = ifelse((r_factura >
110 | r_str > 37 | r_ot > 47), "B", "A"))
## Exploratory Data Analysis
indicadores <- read_excel("indicadores.xlsx")
datos <- base_limpia %>% left_join(indicadores, by = c(obl_anho_obligacion = "anio")) %>%
mutate(monto_real = obl_monto_obligado/deflator * 100, monto_dolar = (monto_real/5618.933)/1000000)
CrossTable(datos$total_cat, datos$dataset, prop.chisq = F, prop.t = F)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Row Total |
## | N / Col Total |
## |-------------------------|
##
##
## Total Observations in Table: 377520
##
##
## | datos$dataset
## datos$total_cat | A | B | Row Total |
## ----------------|-----------|-----------|-----------|
## + 75 | 78056 | 59199 | 137255 |
## | 0.569 | 0.431 | 0.364 |
## | 0.247 | 0.962 | |
## ----------------|-----------|-----------|-----------|
## < 30 | 70402 | 0 | 70402 |
## | 1.000 | 0.000 | 0.186 |
## | 0.223 | 0.000 | |
## ----------------|-----------|-----------|-----------|
## 30-<45 | 67553 | 1 | 67554 |
## | 1.000 | 0.000 | 0.179 |
## | 0.214 | 0.000 | |
## ----------------|-----------|-----------|-----------|
## 45-<60 | 57418 | 423 | 57841 |
## | 0.993 | 0.007 | 0.153 |
## | 0.182 | 0.007 | |
## ----------------|-----------|-----------|-----------|
## 60-<75 | 42554 | 1914 | 44468 |
## | 0.957 | 0.043 | 0.118 |
## | 0.135 | 0.031 | |
## ----------------|-----------|-----------|-----------|
## Column Total | 315983 | 61537 | 377520 |
## | 0.837 | 0.163 | |
## ----------------|-----------|-----------|-----------|
##
##
summary(datos$total)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 11.00 35.00 57.00 76.22 97.00 1533.00
suma <- datos %>% group_by(total_cat) %>% summarise(sum(monto_real))
# Density plots
datos %>% filter(dataset == "A") %>% ggplot(aes(x = total)) + theme_Publication() +
labs(title = "Payment duration", subtitle = "Dataset A", x = "Total duration (days)",
y = "Number of invoices", caption = "Based on data from datos.hacienda.gov.py") +
geom_histogram(alpha = 0.8, fill = "#b2b2b2") + geom_vline(aes(xintercept = 30)) +
geom_vline(aes(xintercept = 60)) + annotate("text", x = 28, y = 8000, label = "optimal deadline",
angle = 90, size = 4) + annotate("text", x = 58, y = 8000, label = "local regulation deadline",
angle = 90, size = 4) + scale_x_continuous(breaks = seq(0, 185, 10))
datos %>% filter(dataset == "B") %>% ggplot(aes(x = total)) + theme_Publication() +
labs(title = "Payment duration", subtitle = "Dataset B", x = "Total duration (days)",
y = "Number of invoices", caption = "Based on data from datos.hacienda.gov.py") +
geom_histogram(alpha = 0.8, fill = "#b2b2b2") + geom_vline(aes(xintercept = 60)) +
annotate("text", x = 50, y = 8000, label = "local regulation deadline",
angle = 90, size = 4) + scale_x_continuous(breaks = seq(0, 1550, 50))
# By year
anio <- datos %>% filter((dataset == "A")) %>% group_by(obl_anho_obligacion) %>%
summarise(total2 = n(), duracion = median(total), monto = sum(obl_monto_obligado)) %>%
left_join(indicadores, by = c(obl_anho_obligacion = "anio")) %>% mutate(monto2 = monto/deflator *
100)
anio
## # A tibble: 7 x 7
## obl_anho_obligacion total2 duracion monto interes deflator monto2
## <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2011 33894 34 1.05e12 0.174 81.0 1.29e12
## 2 2012 50309 37 1.96e12 0.172 84.8 2.31e12
## 3 2013 31970 58 1.90e12 0.193 85.5 2.22e12
## 4 2014 44632 57 2.67e12 0.212 89.9 2.97e12
## 5 2015 50868 56 3.61e12 0.197 90.0 4.01e12
## 6 2016 49941 56 3.75e12 0.181 94.7 3.95e12
## 7 2017 54369 52 4.22e12 0.181 100 4.22e12
## Stages of the process
etapas <- datos %>% filter((dataset == "A")) %>% select(obl_anho_obligacion,
obl_fuente_financiamiento_descripcion, r_factura, r_str, r_ot) %>% rename(`Invoice delay` = r_factura,
`TR delay` = r_str, `TO delay` = r_ot) %>% gather(etapa, duracion, -obl_anho_obligacion,
-obl_fuente_financiamiento_descripcion)
etapas$etapa <- factor(etapas$etapa, levels = c("Invoice delay", "TR delay",
"TO delay"))
# Stages and year
ggplot(etapas, aes(x = as.factor(obl_anho_obligacion), y = duracion, fill = etapa)) +
labs(title = "Payment duration by stage and year", subtitle = "Dataset A",
y = "Total duration (days)", x = "Year", caption = "Based on data from datos.hacienda.gov.py") +
theme_Publication() + geom_boxplot() + theme(text = element_text(size = 10)) +
scale_y_continuous(breaks = seq(0, 200, 10)) + facet_wrap(~etapa, ncol = 1) +
scale_fill_manual(" ", values = c(`Invoice delay` = "#559171", `TR delay` = "#b5c62d",
`TO delay` = "#FCA32B"))
etapas_detalle <- datos %>% filter((dataset == "A")) %>% select(obl_anho_obligacion,
r_carga, r_carga_str, r_apro_str, r_tesoro_str, r_carga_ot, r_pago_ot) %>%
group_by(obl_anho_obligacion) %>% summarise(`Invoice approval` = mean(r_carga),
`TR creation` = mean(r_carga_str), `TR approval` = mean(r_apro_str), `Transfer to Treasury` = median(r_tesoro_str),
`TO creation` = mean(r_carga_ot), Payment = mean(r_pago_ot)) %>% gather(etapa,
duracion, -obl_anho_obligacion)
etapas_detalle$etapa <- factor(etapas_detalle$etapa, levels = c("Payment", "TO creation",
"Transfer to Treasury", "TR approval", "TR creation", "Invoice approval"))
ggplot(etapas_detalle, aes(fill = etapa, y = duracion, x = obl_anho_obligacion)) +
geom_bar(stat = "identity") + theme_Publication() + scale_fill_manual(" ",
values = c(`Invoice approval` = "#559171", `TR creation` = "#b5c62d", `TR approval` = "#ffef96",
`Transfer to Treasury` = "#FCA32B", `TO creation` = "#5b5b5f", Payment = "#b2b2b2")) +
labs(title = "Average stage duration", subtitle = "Dataset A", y = "Total duration (days)",
x = "Year", caption = "Based on data from datos.hacienda.gov.py") +
scale_x_continuous(breaks = seq(2011, 2017, 1))
# Stages and funds
etapas$obl_fuente_financiamiento_descripcion <- gsub("10-RECURSOS DEL TESORO",
"Treasury funds", etapas$obl_fuente_financiamiento_descripcion, fixed = T)
etapas$obl_fuente_financiamiento_descripcion <- gsub("20-RECURSOS DEL CREDITO PUBLICO",
"Public credit funds", etapas$obl_fuente_financiamiento_descripcion, fixed = T)
etapas$obl_fuente_financiamiento_descripcion <- gsub("30-RECURSOS INSTITUCIONALES",
"Institutional funds", etapas$obl_fuente_financiamiento_descripcion, fixed = T)
etapas$obl_fuente_financiamiento_descripcion <- factor(etapas$obl_fuente_financiamiento_descripcion,
levels = c("Treasury funds", "Public credit funds", "Institutional funds"))
ggplot(etapas, aes(x = etapa, y = duracion, fill = etapa)) + labs(title = "Payment duration by type of funds",
subtitle = "Dataset A", y = "Total duration (days)", x = "", caption = "Based on data from datos.hacienda.gov.py") +
theme_Publication() + geom_boxplot() + theme(text = element_text(size = 10)) +
scale_y_continuous(breaks = seq(0, 200, 10)) + coord_flip() + facet_wrap(~obl_fuente_financiamiento_descripcion,
ncol = 1) + scale_fill_manual(" ", values = c(`Invoice delay` = "#559171",
`TR delay` = "#b5c62d", `TO delay` = "#FCA32B"))
# Top 10 entities with higher spending
etapas3 <- datos %>% select(obl_entidad_descripcion, r_carga, r_carga_str, r_apro_str,
r_tesoro_str, r_carga_ot, r_pago_ot) %>% group_by(obl_entidad_descripcion) %>%
summarise(`Invoice approval` = mean(r_carga), `TR creation` = mean(r_carga_str),
`TR approval` = mean(r_apro_str), `Transfer to Treasury` = median(r_tesoro_str),
`TO creation` = mean(r_carga_ot), Payment = mean(r_pago_ot)) %>% gather(stage,
duration, -obl_entidad_descripcion) %>% filter(obl_entidad_descripcion ==
"013-MINISTERIO DE OBRAS PUBLICAS Y COMUNICACIONES" | obl_entidad_descripcion ==
"008-MINISTERIO DE SALUD PUBLICA Y BIENESTAR SOCIAL" | obl_entidad_descripcion ==
"007-MINISTERIO DE EDUCACION Y CIENCIAS" | obl_entidad_descripcion == "005-MINISTERIO DE DEFENSA NACIONAL" |
obl_entidad_descripcion == "003-MINISTERIO DEL INTERIOR" | obl_entidad_descripcion ==
"001-PRESIDENCIA DE LA REPUBLICA" | obl_entidad_descripcion == "001-UNIVERSIDAD NACIONAL DE ASUNCIN" |
obl_entidad_descripcion == "006-MINISTERIO DE HACIENDA" | obl_entidad_descripcion ==
"023-SECRETARIA NACIONAL DE LA VIVIENDA Y EL HABITAT" | obl_entidad_descripcion ==
"002-JUSTICIA ELECTORAL")
etapas3$obl_entidad_descripcion <- gsub("013-MINISTERIO DE OBRAS PUBLICAS Y COMUNICACIONES",
"Ministry of Public Works", etapas3$obl_entidad_descripcion, fixed = T)
etapas3$obl_entidad_descripcion <- gsub("008-MINISTERIO DE SALUD PUBLICA Y BIENESTAR SOCIAL",
"Ministry of Health", etapas3$obl_entidad_descripcion, fixed = T)
etapas3$obl_entidad_descripcion <- gsub("007-MINISTERIO DE EDUCACION Y CIENCIAS",
"Ministry of Education", etapas3$obl_entidad_descripcion, fixed = T)
etapas3$obl_entidad_descripcion <- gsub("005-MINISTERIO DE DEFENSA NACIONAL",
"Ministry of Defense", etapas3$obl_entidad_descripcion, fixed = T)
etapas3$obl_entidad_descripcion <- gsub("003-MINISTERIO DEL INTERIOR", "Ministry of Interior",
etapas3$obl_entidad_descripcion, fixed = T)
etapas3$obl_entidad_descripcion <- gsub("001-PRESIDENCIA DE LA REPUBLICA", "Presidency",
etapas3$obl_entidad_descripcion, fixed = T)
etapas3$obl_entidad_descripcion <- gsub("001-UNIVERSIDAD NACIONAL DE ASUNCIN",
"National University of Asuncion", etapas3$obl_entidad_descripcion, fixed = T)
etapas3$obl_entidad_descripcion <- gsub("006-MINISTERIO DE HACIENDA", "Treasury",
etapas3$obl_entidad_descripcion, fixed = T)
etapas3$obl_entidad_descripcion <- gsub("023-SECRETARIA NACIONAL DE LA VIVIENDA Y EL HABITAT",
"Secretary of Housing", etapas3$obl_entidad_descripcion, fixed = T)
etapas3$obl_entidad_descripcion <- gsub("002-JUSTICIA ELECTORAL", "Electoral Justice",
etapas3$obl_entidad_descripcion, fixed = T)
etapas3$stage <- factor(etapas3$stage, levels = c("Payment", "TO creation",
"Transfer to Treasury", "TR approval", "TR creation", "Invoice approval"))
ggplot(etapas3, aes(fill = stage, y = duration, x = reorder(obl_entidad_descripcion,
duration))) + geom_bar(stat = "identity") + theme_Publication() + coord_flip() +
scale_fill_manual(values = c(`Invoice approval` = "#559171", `TR creation` = "#b5c62d",
`TR approval` = "#ffef96", `Transfer to Treasury` = "#FCA32B", `TO creation` = "#5b5b5f",
Payment = "#b2b2b2")) + labs(title = "Average payment duration by stage",
subtitle = "Top 10 entities with higher spending (full dataset)", y = "Total duration (days)",
x = "", caption = "Based on data from datos.hacienda.gov.py")
# By entity
instituciones <- datos %>% filter(obl_entidad_descripcion != "002-MECANISMO NACIONAL DE PREVENCIN DE LA TORTURA",
obl_entidad_descripcion != "002-COM. NAC. DE PREV. CONTRA LA TORTURA Y OTROS TRATOS O PENAS.",
obl_entidad_descripcion != "003-CRDITO AGRCOLA DE HABILITACIN", obl_entidad_descripcion !=
"004-DIRECCION DE BENEFICENCIA Y AYUDA SOCIAL", obl_entidad_descripcion !=
"007-SINDICATURA GENERAL DE QUIEBRAS", obl_entidad_descripcion != "013-ENTE REGULADOR DE SERVICIOS SANITARIOS",
obl_entidad_descripcion != "014-INSTITUTO NACIONAL DE COOPERATIVISMO", obl_entidad_descripcion !=
"014-MINISTERIO DE LA MUJER", obl_entidad_descripcion != "016-SERVICIO NACIONAL DE CALIDAD Y SALUD ANIMAL",
obl_entidad_descripcion != "020-INSTITUTO FORESTAL NACIONAL", obl_entidad_descripcion !=
"021-SECRETARA DEL AMBIENTE", obl_entidad_descripcion != "026-SECRETARIA DE DEFENSA DEL CONSUMIDOR Y EL USUARIO",
obl_entidad_descripcion != "027-COMISIN NACIONAL DE LA COMPETENCIA", obl_entidad_descripcion !=
"028-AGENCIA NACIONAL DE TRANSITO Y SEGURIDAD VIAL", obl_entidad_descripcion !=
"029-CONSEJO NACIONAL DE EDUCACION SUPERIOR", obl_entidad_descripcion !=
"030-AGENCIA NACIONAL DE EVAL. Y ACRED. DE LA EDUCACION SUPERIOR", obl_entidad_descripcion !=
"030-AGENCIA NACIONAL DE EVAL. Y ACRED. DE LA EDUCACION SUPERIOR", obl_entidad_descripcion !=
"031-AUTORIDAD REGULADORA RADIOLGICA Y NUCLEAR") %>% group_by(obl_entidad_descripcion,
dataset) %>% summarise(facturas = n(), monto = sum(obl_monto_obligado, na.rm = TRUE),
montodolar = sum(monto_dolar), duracion_media = median(total), duracion_prom = mean(total),
duracion_max = max(total), invoice = median(r_factura), str = median(r_str),
ot = median(r_ot))
instituciones
## # A tibble: 84 x 11
## # Groups: obl_entidad_descripcion [?]
## obl_entidad_des… dataset facturas monto montodolar duracion_media
## <chr> <chr> <int> <dbl> <dbl> <dbl>
## 1 001-CONGRESO NA… A 5223 1.13e11 22.6 60
## 2 001-CONGRESO NA… B 580 2.00e10 3.97 97
## 3 001-CONTRALORA … A 1376 3.78e10 7.27 35
## 4 001-CONTRALORA … B 101 7.95e 9 1.56 130
## 5 001-CORTE SUPRE… A 6448 2.16e11 40.8 50
## 6 001-CORTE SUPRE… B 952 3.18e10 6.16 156
## 7 001-DEFENSORA D… A 282 5.91e 9 1.17 42
## 8 001-DEFENSORA D… B 17 3.11e 8 0.0628 165
## 9 001-INSTITUTO N… A 123 6.14e 9 1.22 27
## 10 001-INSTITUTO N… B 8 1.26e 9 0.239 98.5
## # … with 74 more rows, and 5 more variables: duracion_prom <dbl>,
## # duracion_max <dbl>, invoice <dbl>, str <dbl>, ot <dbl>
salud_obras2 <- datos %>% filter((dataset == "A")) %>% select(obl_entidad_descripcion,
obl_fuente_financiamiento_descripcion, r_carga, r_carga_str, r_apro_str,
r_tesoro_str, r_carga_ot, r_pago_ot) %>% group_by(obl_entidad_descripcion,
obl_fuente_financiamiento_descripcion) %>% summarise(`Invoice approval` = mean(r_carga),
`TR creation` = mean(r_carga_str), `TR approval` = mean(r_apro_str), `Transfer to Treasury` = median(r_tesoro_str),
`TO creation` = mean(r_carga_ot), Payment = mean(r_pago_ot)) %>% gather(stage,
duration, -obl_entidad_descripcion, -obl_fuente_financiamiento_descripcion) %>%
filter(obl_entidad_descripcion == "013-MINISTERIO DE OBRAS PUBLICAS Y COMUNICACIONES" |
obl_entidad_descripcion == "008-MINISTERIO DE SALUD PUBLICA Y BIENESTAR SOCIAL")
salud_obras2$obl_entidad_descripcion <- gsub("013-MINISTERIO DE OBRAS PUBLICAS Y COMUNICACIONES",
"Ministry of Public Works", salud_obras2$obl_entidad_descripcion, fixed = T)
salud_obras2$obl_entidad_descripcion <- gsub("008-MINISTERIO DE SALUD PUBLICA Y BIENESTAR SOCIAL",
"Ministry of Health", salud_obras2$obl_entidad_descripcion, fixed = T)
salud_obras2$obl_fuente_financiamiento_descripcion <- gsub("10-RECURSOS DEL TESORO",
"Treasury funds", salud_obras2$obl_fuente_financiamiento_descripcion, fixed = T)
salud_obras2$obl_fuente_financiamiento_descripcion <- gsub("20-RECURSOS DEL CREDITO PUBLICO",
"Public credit funds", salud_obras2$obl_fuente_financiamiento_descripcion,
fixed = T)
salud_obras2$obl_fuente_financiamiento_descripcion <- gsub("30-RECURSOS INSTITUCIONALES",
"Institutional funds", salud_obras2$obl_fuente_financiamiento_descripcion,
fixed = T)
salud_obras2$stage <- factor(salud_obras2$stage, levels = c("Payment", "TO creation",
"Transfer to Treasury", "TR approval", "TR creation", "Invoice approval"))
ggplot(salud_obras2, aes(fill = stage, y = duration, x = obl_fuente_financiamiento_descripcion)) +
geom_bar(stat = "identity") + theme_Publication() + scale_fill_manual(values = c(`Invoice approval` = "#559171",
`TR creation` = "#b5c62d", `TR approval` = "#ffef96", `Transfer to Treasury` = "#FCA32B",
`TO creation` = "#5b5b5f", Payment = "#b2b2b2")) + labs(title = "Average payment duration by stage and funds",
subtitle = "Dataset A", y = "Total duration (days)", x = "", caption = "Based on data from datos.hacienda.gov.py") +
facet_wrap(~obl_entidad_descripcion, ncol = 2) + coord_flip()
salud_obras <- datos %>% filter((dataset == "A")) %>% filter(obl_entidad_descripcion ==
"013-MINISTERIO DE OBRAS PUBLICAS Y COMUNICACIONES" | obl_entidad_descripcion ==
"008-MINISTERIO DE SALUD PUBLICA Y BIENESTAR SOCIAL") %>% select(obl_entidad_descripcion,
obl_fuente_financiamiento_descripcion, r_factura, r_str, r_ot) %>% rename(`Invoice delay` = r_factura,
`TR delay` = r_str, `TO delay` = r_ot) %>% gather(etapa, duracion, -obl_entidad_descripcion,
-obl_fuente_financiamiento_descripcion)
salud_obras$etapa <- factor(salud_obras$etapa, levels = c("Invoice delay", "TR delay",
"TO delay"))
salud_obras$obl_fuente_financiamiento_descripcion <- gsub("10-RECURSOS DEL TESORO",
"Treasury funds", salud_obras$obl_fuente_financiamiento_descripcion, fixed = T)
salud_obras$obl_fuente_financiamiento_descripcion <- gsub("20-RECURSOS DEL CREDITO PUBLICO",
"Public credit funds", salud_obras$obl_fuente_financiamiento_descripcion,
fixed = T)
salud_obras$obl_fuente_financiamiento_descripcion <- gsub("30-RECURSOS INSTITUCIONALES",
"Institutional funds", salud_obras$obl_fuente_financiamiento_descripcion,
fixed = T)
salud_obras$obl_fuente_financiamiento_descripcion <- factor(salud_obras$obl_fuente_financiamiento_descripcion,
levels = c("Treasury funds", "Public credit funds", "Institutional funds"))
salud_obras$obl_entidad_descripcion <- gsub("013-MINISTERIO DE OBRAS PUBLICAS Y COMUNICACIONES",
"Ministry of Public Works", salud_obras$obl_entidad_descripcion, fixed = T)
salud_obras$obl_entidad_descripcion <- gsub("008-MINISTERIO DE SALUD PUBLICA Y BIENESTAR SOCIAL",
"Ministry of Health", salud_obras$obl_entidad_descripcion, fixed = T)
ggplot(salud_obras, aes(x = etapa, y = duracion, fill = obl_entidad_descripcion)) +
labs(title = "Payment duration by type of funds", subtitle = "Dataset A",
y = "Total duration (days)", x = "", caption = "Based on data from datos.hacienda.gov.py") +
theme_Publication() + geom_boxplot() + theme(text = element_text(size = 10)) +
scale_y_continuous(breaks = seq(0, 200, 10)) + facet_wrap(~obl_fuente_financiamiento_descripcion,
ncol = 3) + scale_fill_manual(" ", values = c(`Ministry of Public Works` = "#559171",
`Ministry of Health` = "#FCA32B"))
# Providers
proveedores2 <- base_limpia %>% mutate(retraso30 = ifelse(total >= 30, 1, 0)) %>%
group_by(prov_ruc) %>% summarise(monto = sum(obl_monto_obligado), contratos = n_distinct(obl_codigo_contratacion),
instituciones = n_distinct(obl_entidad_descripcion), duracion = median(total),
anios = n_distinct(obl_anho_obligacion), facturas = n(), retrasos = sum(retraso30)) %>%
mutate(dolar = monto/5618.933, dolar_cat = ifelse(dolar > 1000000, 1, 0),
porc_retraso = (round(retrasos/facturas * 100, 1)), porc_cat = ifelse(porc_retraso <
25, "<25", ifelse(porc_retraso < 50, "<50", ifelse(porc_retraso <
75, "<75", "75-100"))))
CrossTable(proveedores2$porc_cat)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 4742
##
##
## | <25 | <50 | <75 | 75-100 |
## |-----------|-----------|-----------|-----------|
## | 548 | 390 | 935 | 2869 |
## | 0.116 | 0.082 | 0.197 | 0.605 |
## |-----------|-----------|-----------|-----------|
##
##
##
##
# Type of purchase
gasto <- datos %>% filter(dataset == "A") %>% group_by(obl_grupo_descripcion) %>%
summarise(n = n(), duracion_media = median(total), duracion_prom = mean(total),
duracion_max = max(total))
# Seasonal trends
datos_st <- datos %>% filter(dataset == "A") %>% mutate(mesf = month(obl_fecha_factura),
mesp = month(ot_fecha_deposito), mes_aniof = format(as.Date(obl_fecha_factura),
"%Y-%m"), mes_aniop = format(as.Date(ot_fecha_deposito), "%Y-%m"))
datos_st$mes_aniof <- as.Date(as.yearmon(datos_st$mes_aniof))
datos_st$mes_aniop <- as.Date(as.yearmon(datos_st$mes_aniop))
facturas <- datos_st %>% filter(dataset == "A") %>% group_by(mes_aniof) %>%
summarise(Invoices = n())
pagos <- datos_st %>% filter(dataset == "A") %>% group_by(mes_aniop) %>% summarise(Payments = n())
tiempo <- left_join(facturas, pagos, by = c(mes_aniof = "mes_aniop"))
tiempo <- gather(tiempo, Series, cantidad, -mes_aniof)
ggplot(tiempo, aes(x = mes_aniof, y = cantidad, group = Series)) + geom_line(aes(color = Series),
na.rm = TRUE) + theme_Publication() + theme(legend.title = element_blank()) +
scale_color_manual(values = c("#559171", "#FCA32B")) + scale_y_continuous(labels = comma) +
scale_x_date(breaks = date_breaks("1 year"), labels = date_format("%b %y")) +
labs(title = "Invoices billed and paid per month 2011-2017", subtitle = "Dataset A",
y = "Number of invoices", x = "Month-Year", caption = "Based on data from datos.hacienda.gov.py")
# Facturas todas las instituciones
facturas_elab1 <- datos %>% filter(dataset == "A") %>% group_by(obl_fecha_factura) %>%
summarise(facturas_creadas = n()) %>% rename(fecha = obl_fecha_factura)
facturas_pago <- datos %>% filter(dataset == "A") %>% group_by(ot_fecha_deposito) %>%
summarise(facturas_pago = n()) %>% rename(fecha = ot_fecha_deposito)
facturas_acumuladas2 <- full_join(facturas_elab1, facturas_pago, by = "fecha")
facturas_acumuladas2[is.na(facturas_acumuladas2)] <- 0
facturas_acumuladas2 <- facturas_acumuladas2 %>% arrange(fecha) %>% mutate(acum_elab = cumsum(facturas_creadas),
acum_pago = cumsum(facturas_pago), pendientes_total = acum_elab - acum_pago)
ggplot(facturas_acumuladas2, aes(x = fecha, y = pendientes_total)) + geom_line() +
theme(legend.title = element_blank()) + theme_Publication() + scale_y_continuous(labels = comma) +
scale_x_datetime(breaks = date_breaks("1 year"), labels = date_format("%y")) +
labs(title = "Unpaid bills", y = "Number of invoices", subtitle = "Dataset A",
x = "Year", caption = "Based on data from datos.hacienda.gov.py")
# By quarter
trimestre <- datos %>% filter(dataset == "A") %>% mutate(quarter = quarter(obl_fecha_factura)) %>%
group_by(quarter) %>% summarise(duracionprom = mean(total), dur_media = median(total))
nomina <- read_csv("nomina.csv")
nomina <- nomina %>% mutate(size = ifelse(funcionarios < 1000, "small", ifelse(funcionarios <
10000, "median", "large")))
nomina$size <- factor(nomina$size, levels = c("small", "median", "large"))
surv <- datos %>% filter(dataset == "A") %>% select(obl_id, obl_monto_obligado,
obl_fuente_financiamiento_descripcion, obl_nivel_descripcion, obl_entidad_descripcion,
total, r_factura, r_str, r_ot, prov_ruc, contr_monto_adjudicado, prov_ruc,
obl_grupo_descripcion, obl_subgrupo_descripcion) %>% left_join(nomina, by = c(obl_entidad_descripcion = "descripcionEntidad")) %>%
filter(!is.na(size)) %>% rename(type = obl_nivel_descripcion, funds = obl_fuente_financiamiento_descripcion,
official = funcionarios, purchase = obl_grupo_descripcion)
surv$type <- gsub("14-CONTRALORA GENERAL DE LA REPBLICA", "Legislative Branch",
surv$type, fixed = T)
surv$type <- gsub("11-PODER LEGISLATIVO", "Legislative Branch", surv$type, fixed = T)
surv$type <- gsub("15-DEFENSORA DEL PUEBLO", "Legislative Branch", surv$type,
fixed = T)
surv$type <- gsub("15-OTROS ORGANISMOS DEL ESTADO", "Other", surv$type, fixed = T)
surv$type <- gsub("23-ENTES AUTNOMOS Y AUTRQUICOS", "Other", surv$type, fixed = T)
surv$type <- gsub("27-ENTIDADES FINANCIERAS OFICIALES", "Other", surv$type,
fixed = T)
surv$type <- gsub("28-UNIVERSIDADES NACIONALES", "Universities", surv$type,
fixed = T)
surv$type <- gsub("12-PODER EJECUTIVO", "Executive Branch", surv$type, fixed = T)
surv$type <- gsub("13-PODER JUDICIAL", "Judiciary Branch", surv$type, fixed = T)
surv$funds <- gsub("10-RECURSOS DEL TESORO", "Treasury funds", surv$funds, fixed = T)
surv$funds <- gsub("20-RECURSOS DEL CREDITO PUBLICO", "Public credit funds",
surv$funds, fixed = T)
surv$funds <- gsub("30-RECURSOS INSTITUCIONALES", "Institutional funds", surv$funds,
fixed = T)
surv$funds <- factor(surv$funds, levels = c("Institutional funds", "Treasury funds",
"Public credit funds"))
surv$type <- factor(surv$type, levels = c("Executive Branch", "Judiciary Branch",
"Legislative Branch", "Universities", "Other"))
surv$purchase <- gsub("200-SERVICIOS NO PERSONALES", "services", surv$purchase,
fixed = T)
surv$purchase <- gsub("300-BIENES DE CONSUMO E INSUMOS", "goods and materials",
surv$purchase, fixed = T)
surv$purchase <- gsub("500-INVERSION FSICA", "investment", surv$purchase,
fixed = T)
surv$purchase <- gsub("800-TRANSFERENCIAS", "other", surv$purchase, fixed = T)
surv$purchase <- gsub("400-BIENES DE CAMBIO", "other", surv$purchase, fixed = T)
surv$purchase <- factor(surv$purchase, levels = c("services", "goods and materials",
"investment", "other"))
surv <- surv %>% mutate(cens = 1, log_amount = log(obl_monto_obligado), log_contrato = log(contr_monto_adjudicado),
log_total = log(total), funds2 = ifelse(funds == "Treasury funds", 1, 0))
## Survival object
surv_object <- Surv(time = surv$total, event = surv$cens)
# Kaplan-Meyer curves
fit <- survfit(surv_object ~ 1, data = surv)
ggsurvplot(fit, data = surv, fun = "pct", palette = c("#E7B800"), ggtheme = theme_minimal(),
title = "Invoices payment duration", legend = "none")
fit1 <- survfit(surv_object ~ funds, data = surv)
plot1 <- ggsurvplot(fit1, data = surv, pval = F, fun = "pct", palette = c("#559171",
"#b5c62d", "#FCA32B"), legend.title = "Type of funds", ggtheme = theme_minimal())
plot1$plot + geom_vline(aes(xintercept = median(datos$total[datos$dataset ==
"A"]))) + annotate("text", x = 46, y = 80, label = "Median duration", angle = 90,
size = 3)
fit2 <- survfit(surv_object ~ type, data = surv)
ggsurvplot(fit2, data = surv, pval = F, fun = "pct", legend.title = "Type of institution",
ggtheme = theme_minimal(), legend = "right", palette = c("#E7B800", "#2E9FDF",
"#ff7373", "#7fc97f", "#a6cee3"), legend.labs = c("Executive Branch",
"Judiciary Branch", "Legislative Branch", "Universities", "Other"))
fit3 <- survfit(surv_object ~ size, data = surv)
plot2 <- ggsurvplot(fit3, data = surv, pval = F, fun = "pct", legend.title = "Size of institution",
palette = c("#559171", "#b5c62d", "#FCA32B"), ggtheme = theme_minimal(),
legend.labs = c("small", "median", "large"))
plot2$plot + geom_vline(aes(xintercept = median(datos$total[datos$dataset ==
"A"]))) + annotate("text", x = 46, y = 80, label = "Median duration", angle = 90,
size = 3)
fit4 <- survfit(surv_object ~ purchase, data = surv)
ggsurvplot(fit4, data = surv, pval = TRUE, fun = "pct", legend.title = "Type of purchase",
palette = c("#E7B800", "#2E9FDF", "#ff7373", "#7fc97f"))
## Parametric function tests
fit_exp <- flexsurvreg(surv_object ~ 1, dist = "exponential")
fit_weibull <- flexsurvreg(surv_object ~ 1, dist = "weibull")
fit_lognormal <- flexsurvreg(surv_object ~ 1, dist = "lnorm")
fit_loglogis <- flexsurvreg(surv_object ~ 1, dist = "llogis")
plot(fit_weibull)
plot(fit_exp)
plot(fit_lognormal)
plot(fit_loglogis)
# Log-test
fit_weibull$loglik
## [1] -1499671
fit_exp$loglik
## [1] -1584678
fit_loglogis$loglik
## [1] -1506679
fit_lognormal$loglik
## [1] -1499352
fit_weibull$AIC
## [1] 2999346
fit_exp$AIC
## [1] 3169358
fit_loglogis$AIC
## [1] 3013362
fit_lognormal$AIC
## [1] 2998708
## lognormal isn't statistically different from Weibull, we choose Weibull
qchisq(0.95, 2)
## [1] 5.991465
2 * (fit_weibull$loglik - fit_lognormal$loglik) #2 df
## [1] -638.2443
# Models
weib <- flexsurvreg(Surv(total, cens) ~ log_amount + funds2 + size + ejecucion +
log_contrato + purchase, data = surv, dist = "weibull")
weib
## Call:
## flexsurvreg(formula = Surv(total, cens) ~ log_amount + funds2 +
## size + ejecucion + log_contrato + purchase, data = surv,
## dist = "weibull")
##
## Estimates:
## data mean est L95%
## shape NA 2.065248 2.059635
## scale NA 115.517403 112.221653
## log_amount 15.422829 -0.029945 -0.030926
## funds2 0.707471 0.274678 0.270700
## sizemedian 0.487889 0.198292 0.190504
## sizelarge 0.447956 0.357829 0.349664
## ejecucion 0.813933 -1.532200 -1.557684
## log_contrato 19.849689 0.030648 0.029783
## purchasegoods and materials 0.421230 0.045439 0.041354
## purchaseinvestment 0.096565 0.036023 0.029218
## purchaseother 0.002234 0.151268 0.113943
## U95% se exp(est)
## shape 2.070876 0.002868 NA
## scale 118.909944 1.705990 NA
## log_amount -0.028965 0.000500 0.970499
## funds2 0.278656 0.002030 1.316107
## sizemedian 0.206080 0.003974 1.219319
## sizelarge 0.365993 0.004166 1.430220
## ejecucion -1.506716 0.013002 0.216060
## log_contrato 0.031513 0.000441 1.031122
## purchasegoods and materials 0.049524 0.002084 1.046487
## purchaseinvestment 0.042829 0.003472 1.036680
## purchaseother 0.188593 0.019044 1.163308
## L95% U95%
## shape NA NA
## scale NA NA
## log_amount 0.969547 0.971451
## funds2 1.310881 1.321353
## sizemedian 1.209859 1.228852
## sizelarge 1.418591 1.441946
## ejecucion 0.210623 0.221637
## log_contrato 1.030231 1.032014
## purchasegoods and materials 1.042221 1.050771
## purchaseinvestment 1.029649 1.043760
## purchaseother 1.120688 1.207550
##
## N = 296299, Events: 296299, Censored: 0
## Total time at risk: 16546174
## Log-likelihood = -1385706, df = 11
## AIC = 2771433
weib1 <- survreg(Surv(total, cens) ~ log_amount + funds2 + size + ejecucion +
log_contrato + purchase, data = surv, dist = "weibull")
weib1
## Call:
## survreg(formula = Surv(total, cens) ~ log_amount + funds2 + size +
## ejecucion + log_contrato + purchase, data = surv, dist = "weibull")
##
## Coefficients:
## (Intercept) log_amount
## 4.74942120 -0.02994534
## funds2 sizemedian
## 0.27467804 0.19829231
## sizelarge ejecucion
## 0.35782862 -1.53219995
## log_contrato purchasegoods and materials
## 0.03064767 0.04543896
## purchaseinvestment purchaseother
## 0.03602345 0.15126805
##
## Scale= 0.4842034
##
## Loglik(model)= -1385706 Loglik(intercept only)= -1408384
## Chisq= 45357.25 on 9 degrees of freedom, p= <0.0000000000000002
## n=296299 (19455 observations deleted due to missingness)
summary(weib1)
##
## Call:
## survreg(formula = Surv(total, cens) ~ log_amount + funds2 + size +
## ejecucion + log_contrato + purchase, data = surv, dist = "weibull")
## Value Std. Error z
## (Intercept) 4.749421 0.014770 321.56
## log_amount -0.029945 0.000500 -59.84
## funds2 0.274678 0.002030 135.32
## sizemedian 0.198292 0.003974 49.90
## sizelarge 0.357829 0.004166 85.90
## ejecucion -1.532200 0.013002 -117.84
## log_contrato 0.030648 0.000441 69.42
## purchasegoods and materials 0.045439 0.002084 21.80
## purchaseinvestment 0.036023 0.003472 10.37
## purchaseother 0.151268 0.019044 7.94
## Log(scale) -0.725250 0.001389 -522.26
## p
## (Intercept) < 0.0000000000000002
## log_amount < 0.0000000000000002
## funds2 < 0.0000000000000002
## sizemedian < 0.0000000000000002
## sizelarge < 0.0000000000000002
## ejecucion < 0.0000000000000002
## log_contrato < 0.0000000000000002
## purchasegoods and materials < 0.0000000000000002
## purchaseinvestment < 0.0000000000000002
## purchaseother 0.000000000000002
## Log(scale) < 0.0000000000000002
##
## Scale= 0.484
##
## Weibull distribution
## Loglik(model)= -1385706 Loglik(intercept only)= -1408384
## Chisq= 45357.25 on 9 degrees of freedom, p= 0
## Number of Newton-Raphson Iterations: 5
## n=296299 (19455 observations deleted due to missingness)
costos <- datos %>% mutate(quarter = quarter(obl_fecha_factura)) %>% select(dataset,
obl_monto_obligado, obl_anho_obligacion, obl_entidad_descripcion, quarter,
total, total2, r_factura, r_str, r_ot, r_carga_str, r_apro_str, r_tesoro_str,
r_carga_ot, r_pago_ot, interes, deflator) %>% mutate(totalf = total/365,
retraso30 = total - 30, retraso30 = ifelse(retraso30 < 0, 0, retraso30),
retraso30f = retraso30/365, retraso75 = total - 75, retraso75 = ifelse(retraso75 <
0, 0, retraso75), retraso75f = retraso75/365, retrasofac = r_factura -
15, retrasofac = ifelse(retrasofac < 0, 0, retrasofac), retrasofacf = retrasofac/365,
retraso45 = total - 45, retraso45 = ifelse(retraso45 < 0, 0, retraso45),
retraso45f = retraso45/365, retraso60 = total - 60, retraso60 = ifelse(retraso60 <
0, 0, retraso60), retraso60f = retraso60/365, rfactura = r_factura/365,
rstr = r_str/365, rot = r_ot/365, rcargastr = r_carga_str/365, raprobstr = r_apro_str/365,
rtesorostr = r_tesoro_str/365, rcargaot = r_carga_ot/365, rpago = r_pago_ot/365,
costo30 = ((obl_monto_obligado * retraso30f * interes)/deflator) * 100,
costo_total = ((obl_monto_obligado * totalf * interes)/deflator) * 100,
costo_75 = ((obl_monto_obligado * retraso75f * interes)/deflator) * 100,
costo_fac = ((obl_monto_obligado * retrasofacf * interes)/deflator) * 100,
costo_45 = ((obl_monto_obligado * retraso45f * interes)/deflator) * 100,
costo_60 = ((obl_monto_obligado * retraso60f * interes)/deflator) * 100,
costo_total_nominal = (obl_monto_obligado * totalf * interes), costo_fac = ((obl_monto_obligado *
retrasofacf * interes)/deflator) * 100, costo_factura = ((obl_monto_obligado *
rfactura * interes)/deflator) * 100, costo_str = ((obl_monto_obligado *
rstr * interes)/deflator) * 100, costo_ot = ((obl_monto_obligado * rot *
interes)/deflator) * 100, costo_cargastr = ((obl_monto_obligado * rcargastr *
interes)/deflator) * 100, costo_aprovstr = ((obl_monto_obligado * raprobstr *
interes)/deflator) * 100, costo_tesorostr = ((obl_monto_obligado * rtesorostr *
interes)/deflator) * 100, costo_cargaot = ((obl_monto_obligado * rcargaot *
interes)/deflator) * 100, costo_pago = ((obl_monto_obligado * rpago *
interes)/deflator) * 100)
costo_total <- costos %>% group_by(dataset) %>% select(starts_with("costo")) %>%
summarise_all(sum) %>% gather(costo, monto, -dataset) %>% mutate(costo_dolar = (monto/5618.933)/1000000) %>%
ungroup() %>% group_by(costo) %>% summarise(costo_dolar = sum(costo_dolar))
costo_gdp17 <- costos %>% select(starts_with("costo")) %>% summarise_all(sum) %>%
gather(costo, monto) %>% mutate(costo_dolar = (monto/5618.933)/1000000,
GDP_2017 = 165393694205500, perc_gdp = round((monto/GDP_2017 * 100), 2))
# PIB Percentage
costo_gdp2 <- costos %>% select(obl_anho_obligacion, costo30, costo_total) %>%
rename(`Total payment duration` = costo_total, `Late payments` = costo30) %>%
group_by(obl_anho_obligacion) %>% summarise_all(sum) %>% gather(costo, monto,
-obl_anho_obligacion) %>% mutate(costo_dolar = (monto/5618.933)/1000000,
GDP_2017 = 165393694205500, perc_gdp = round((monto/GDP_2017 * 100), 3))
ggplot(costo_gdp2, aes(x = obl_anho_obligacion, y = perc_gdp, colour = costo)) +
geom_line() + theme(legend.title = element_blank()) + theme_Publication() +
scale_color_manual(" ", values = c("#559171", "#FCA32B")) + labs(title = "Cost as a percentage of GDP",
subtitle = "Full dataset", y = "% of GDP", x = "Year", caption = "Based on data from datos.hacienda.gov.py") +
scale_x_continuous(breaks = seq(2011, 2017, 1))
# Year
year_cost <- costos %>% group_by(obl_anho_obligacion, dataset) %>% rename(Year = obl_anho_obligacion) %>%
summarise(`Total cost` = (sum(costo30)/1000000)/5618.933) %>% gather(Type,
cost, -Year, -dataset)
year_cost$dataset <- factor(year_cost$dataset, levels = c("B", "A"))
ggplot(year_cost, aes(x = Year, y = cost, fill = dataset)) + geom_bar(stat = "identity") +
scale_fill_manual(" ", values = c(A = "#559171", B = "#FCA32B")) + theme(legend.title = element_blank()) +
theme_Publication() + labs(title = "Cost of delayed payments by year", subtitle = "30-day deadline",
y = "Millions of dollars", x = "Year", caption = "Based on data from datos.hacienda.gov.py") +
scale_y_continuous(labels = comma) + scale_x_continuous(breaks = seq(2011,
2017, 1))
# Cost by stages
costo_etapa <- costo_total %>% group_by(costo) %>% summarise(costo_dolar = sum(costo_dolar)) %>%
filter(costo == "costo_factura" | costo == "costo_cargastr" | costo == "costo_aprovstr" |
costo == "costo_tesorostr" | costo == "costo_cargaot" | costo == "costo_pago") %>%
mutate(stage = ifelse(costo == "costo_factura", "Invoice", ifelse(costo ==
"costo_cargastr", "TR stage", ifelse(costo == "costo_aprovstr", "TR stage",
ifelse(costo == "costo_tesorostr", "TR stage", "TO stage")))))
costo_etapa$costo <- gsub("costo_factura", "Invoice approval", costo_etapa$costo,
fixed = T)
costo_etapa$costo <- gsub("costo_cargastr", "TR creation", costo_etapa$costo,
fixed = T)
costo_etapa$costo <- gsub("costo_aprovstr", "TR approval", costo_etapa$costo,
fixed = T)
costo_etapa$costo <- gsub("costo_tesorostr", "Transfer to Treasury", costo_etapa$costo,
fixed = T)
costo_etapa$costo <- gsub("costo_cargaot", "TO creation", costo_etapa$costo,
fixed = T)
costo_etapa$costo <- gsub("costo_pago", "Payment", costo_etapa$costo, fixed = T)
costo_etapa$stage <- factor(costo_etapa$stage, levels = c("Invoice", "TR stage",
"TO stage"))
costo_etapa$costo <- factor(costo_etapa$costo, levels = c("Payment", "TO creation",
"Transfer to Treasury", "TR approval", "TR creation", "Invoice approval"))
ggplot(costo_etapa, aes(x = stage, y = costo_dolar, fill = costo)) + geom_bar(stat = "identity") +
theme_Publication() + coord_flip() + labs(title = "Cost of payment duration by stages",
subtitle = "Full dataset", y = "Millions of dollars", x = "", caption = "Based on data from datos.hacienda.gov.py") +
scale_y_continuous(breaks = seq(0, 150, 10)) + scale_fill_manual(" ", values = c(`Invoice approval` = "#559171",
`TR creation` = "#b5c62d", `TR approval` = "#ffef96", `Transfer to Treasury` = "#FCA32B",
`TO creation` = "#5b5b5f", Payment = "#b2b2b2"))
# Cost by institution
costo_inst <- costos %>% filter(obl_entidad_descripcion != "002-MECANISMO NACIONAL DE PREVENCIN DE LA TORTURA",
obl_entidad_descripcion != "002-COM. NAC. DE PREV. CONTRA LA TORTURA Y OTROS TRATOS O PENAS.",
obl_entidad_descripcion != "003-CRDITO AGRCOLA DE HABILITACIN", obl_entidad_descripcion !=
"004-DIRECCION DE BENEFICENCIA Y AYUDA SOCIAL", obl_entidad_descripcion !=
"007-SINDICATURA GENERAL DE QUIEBRAS", obl_entidad_descripcion != "013-ENTE REGULADOR DE SERVICIOS SANITARIOS",
obl_entidad_descripcion != "014-INSTITUTO NACIONAL DE COOPERATIVISMO", obl_entidad_descripcion !=
"014-MINISTERIO DE LA MUJER", obl_entidad_descripcion != "016-SERVICIO NACIONAL DE CALIDAD Y SALUD ANIMAL",
obl_entidad_descripcion != "020-INSTITUTO FORESTAL NACIONAL", obl_entidad_descripcion !=
"021-SECRETARA DEL AMBIENTE", obl_entidad_descripcion != "026-SECRETARIA DE DEFENSA DEL CONSUMIDOR Y EL USUARIO",
obl_entidad_descripcion != "027-COMISIN NACIONAL DE LA COMPETENCIA", obl_entidad_descripcion !=
"028-AGENCIA NACIONAL DE TRANSITO Y SEGURIDAD VIAL", obl_entidad_descripcion !=
"029-CONSEJO NACIONAL DE EDUCACION SUPERIOR", obl_entidad_descripcion !=
"030-AGENCIA NACIONAL DE EVAL. Y ACRED. DE LA EDUCACION SUPERIOR", obl_entidad_descripcion !=
"030-AGENCIA NACIONAL DE EVAL. Y ACRED. DE LA EDUCACION SUPERIOR", obl_entidad_descripcion !=
"031-AUTORIDAD REGULADORA RADIOLGICA Y NUCLEAR") %>% group_by(obl_entidad_descripcion,
dataset) %>% select(starts_with("costo")) %>% summarise_all(sum) %>% arrange(desc(costo_total)) %>%
mutate(acumulado = cumsum(costo_total)/sum(costo_total) * 100)
costo_inst2 <- costo_inst %>% filter(obl_entidad_descripcion == "013-MINISTERIO DE OBRAS PUBLICAS Y COMUNICACIONES" |
obl_entidad_descripcion == "008-MINISTERIO DE SALUD PUBLICA Y BIENESTAR SOCIAL" |
obl_entidad_descripcion == "007-MINISTERIO DE EDUCACION Y CIENCIAS" | obl_entidad_descripcion ==
"005-MINISTERIO DE DEFENSA NACIONAL" | obl_entidad_descripcion == "003-MINISTERIO DEL INTERIOR") %>%
mutate(costo_total_dolar = (costo_total/5618.933)/1000000)
costo_inst2$obl_entidad_descripcion <- gsub("013-MINISTERIO DE OBRAS PUBLICAS Y COMUNICACIONES",
"Ministry of Public Works", costo_inst2$obl_entidad_descripcion, fixed = T)
costo_inst2$obl_entidad_descripcion <- gsub("008-MINISTERIO DE SALUD PUBLICA Y BIENESTAR SOCIAL",
"Ministry of Health", costo_inst2$obl_entidad_descripcion, fixed = T)
costo_inst2$obl_entidad_descripcion <- gsub("007-MINISTERIO DE EDUCACION Y CIENCIAS",
"Ministry of Education", costo_inst2$obl_entidad_descripcion, fixed = T)
costo_inst2$obl_entidad_descripcion <- gsub("005-MINISTERIO DE DEFENSA NACIONAL",
"Ministry of Defense", costo_inst2$obl_entidad_descripcion, fixed = T)
costo_inst2$obl_entidad_descripcion <- gsub("003-MINISTERIO DEL INTERIOR", "Ministry of Interior",
costo_inst2$obl_entidad_descripcion, fixed = T)
costo_inst2$obl_entidad_descripcion <- factor(costo_inst2$obl_entidad_descripcion,
levels = c("Ministry of Interior", "Ministry of Defense", "Ministry of Education",
"Ministry of Health", "Ministry of Public Works"))
costo_inst2$dataset <- factor(costo_inst2$dataset, levels = c("B", "A"))
ggplot(costo_inst2, aes(x = obl_entidad_descripcion, y = costo_total_dolar,
fill = dataset)) + geom_bar(stat = "identity") + scale_fill_manual("Dataset",
values = c(A = "#559171", B = "#FCA32B")) + theme(legend.title = element_blank()) +
theme_Publication() + coord_flip() + labs(title = "Total cost by institution",
y = "Cost in millions of dollars", x = "Institution", caption = "Based on data from datos.hacienda.gov.py") +
scale_y_continuous(breaks = seq(0, 90, 5))